From f6998dec4b45c46aa78a63d2a18ca897a8411858 Mon Sep 17 00:00:00 2001 From: rolv Date: Sun, 2 Mar 2025 17:42:24 +0000 Subject: [PATCH 01/25] feat: use data annotations for Markdown files --- src/cli/check.rs | 8 +- src/parsers/html.rs | 86 +++++++++++++++++ src/parsers/markdown.rs | 139 +++++++++++++++++++++++++++ src/parsers/mod.rs | 11 +++ src/{parsers.rs => parsers/typst.rs} | 107 +-------------------- 5 files changed, 241 insertions(+), 110 deletions(-) create mode 100644 src/parsers/html.rs create mode 100644 src/parsers/markdown.rs create mode 100644 src/parsers/mod.rs rename src/{parsers.rs => parsers/typst.rs} (51%) diff --git a/src/cli/check.rs b/src/cli/check.rs index 7176995..21db7cc 100644 --- a/src/cli/check.rs +++ b/src/cli/check.rs @@ -20,7 +20,7 @@ use crate::{ server::ServerClient, }, error::{Error, Result}, - parsers::{parse_html, parse_markdown, parse_typst}, + parsers::{html::parse_html, markdown::parse_markdown, typst::parse_typst}, }; use super::ExecuteSubcommand; @@ -151,6 +151,7 @@ impl ExecuteSubcommand for Command { }; let file_content = std::fs::read_to_string(filename)?; + let (response, text): (check::Response, String) = match &file_type { FileType::Auto => unreachable!(), FileType::Raw => { @@ -166,10 +167,7 @@ impl ExecuteSubcommand for Command { let text = parse_html(&file_content); Data::from_iter([DataAnnotation::new_text(text)]) }, - FileType::Markdown => { - let text = parse_markdown(&file_content); - Data::from_iter([DataAnnotation::new_text(text)]) - }, + FileType::Markdown => parse_markdown(&file_content), _ => unreachable!(), }; let response = server_client diff --git a/src/parsers/html.rs b/src/parsers/html.rs new file mode 100644 index 0000000..31bfd9a --- /dev/null +++ b/src/parsers/html.rs @@ -0,0 +1,86 @@ +//! Parse the contents of HTML files into a format parseable by the LanguageTool +//! API. + +/// Parse the contents of an HTML file into a text format to be sent to the +/// LanguageTool API. +pub fn parse_html(file_content: impl AsRef) -> String { + use html_parser::Node; + + let mut txt = String::new(); + + let html = html_parser::Dom::parse(file_content.as_ref()).unwrap_or_default(); + let mut children: Vec = html.children.into_iter().rev().collect(); + + fn handle_node(txt: &mut String, node: Node) { + if let Some(e) = node.element() { + match e.name.as_str() { + "head" | "script" | "style" => { + return; + }, + "code" => { + txt.push_str("_code_"); + return; + }, + "a" => { + txt.push_str("_link_"); + return; + }, + "pre" => { + txt.push_str("_pre_"); + txt.push_str("\n\n"); + return; + }, + s => { + let add_children = |txt: &mut String| { + if !e.children.is_empty() { + // Recursively handle children + e.children.clone().into_iter().for_each(|n| { + handle_node(txt, n); + }); + }; + }; + + match s { + "p" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "li" | "td" | "th" => { + while txt.chars().last().is_some_and(|c| c.is_whitespace()) { + txt.pop(); + } + txt.push_str("\n\n"); + add_children(txt); + txt.push_str("\n\n"); + }, + _ => { + add_children(txt); + }, + } + }, + } + } + + if let Some(t) = node.text() { + let mut text = t.trim().to_owned(); + if !text.is_empty() { + let mut chars = t.chars(); + + // Maintain leading/trailing white space, but only a single space + if chars.next().is_some_and(|c| c.is_whitespace()) { + while txt.chars().last().is_some_and(|c| c.is_whitespace()) { + txt.pop(); + } + text.insert(0, ' '); + } + if chars.last().is_some_and(|c| c.is_whitespace()) { + text.push(' '); + } + + txt.push_str(&text); + } + } + } + + while let Some(node) = children.pop() { + handle_node(&mut txt, node); + } + + txt +} diff --git a/src/parsers/markdown.rs b/src/parsers/markdown.rs new file mode 100644 index 0000000..c96f749 --- /dev/null +++ b/src/parsers/markdown.rs @@ -0,0 +1,139 @@ +//! Parse the contents of Markdown files into a format parseable by the +//! LanguageTool API. + +use crate::api::check::{Data, DataAnnotation}; + +/// Parse the contents of a Markdown file into a text format to be sent to the +/// LanguageTool API. +#[must_use] +pub fn parse_markdown(file_content: &str) -> Data<'_> { + use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd}; + + let mut annotations: Vec = vec![]; + + // Stack to keep track of the current "tag" context + let mut tags = vec![]; + + Parser::new_ext(file_content, Options::all()).for_each(|event| { + match event { + Event::Start(tag) => { + match tag { + // Start list items + Tag::List(_) | Tag::Item => { + annotations.push(DataAnnotation::new_text("- ")); + }, + _ => {}, + } + + tags.push(tag); + }, + Event::End(tag) => { + match tag { + // Separate list items and table cells + TagEnd::List(_) | TagEnd::Item | TagEnd::TableRow | TagEnd::TableHead => { + annotations.push(DataAnnotation::new_text("\n")); + }, + TagEnd::TableCell => { + annotations.push(DataAnnotation::new_text(" | ")); + }, + _ => {}, + }; + + if tags + .last() + .is_some_and(|t| TagEnd::from(t.to_owned()) == tag) + { + tags.pop(); + }; + }, + + Event::Html(s) | Event::InlineHtml(s) => { + let data = super::html::parse_html(s.into_string()); + annotations.push(DataAnnotation::new_text(data)); + }, + + Event::Text(mut s) => { + // Add space between sentences + if s.chars() + .last() + .is_some_and(|c| matches!(c, '.' | '!' | '?')) + { + s = pulldown_cmark::CowStr::from(s.to_string() + " "); + } + + let Some(tag) = tags.last() else { + annotations.push(DataAnnotation::new_text(s.to_owned())); + return; + }; + + match tag { + Tag::Heading { level, .. } => { + let s = format!("{s}\n"); + annotations.push(DataAnnotation::new_text(format!( + "{} {s}\n", + "#".repeat(*level as usize) + ))); + }, + + Tag::Emphasis => { + annotations + .push(DataAnnotation::new_interpreted_markup(format!("_{s}_"), s)) + }, + Tag::Strong => { + annotations.push(DataAnnotation::new_interpreted_markup( + format!("**{s}**"), + s, + )) + }, + Tag::Strikethrough => { + annotations + .push(DataAnnotation::new_interpreted_markup(format!("~{s}~"), s)) + }, + + // No changes necessary + Tag::Paragraph + | Tag::List(_) + | Tag::Item + | Tag::BlockQuote + | Tag::TableCell => { + annotations.push(DataAnnotation::new_text(s)); + }, + + // Ignored + Tag::CodeBlock(_) | Tag::Link { .. } | Tag::Image { .. } => { + annotations.push(DataAnnotation::new_interpreted_markup(s, "_")); + }, + _ => {}, + } + }, + Event::Code(s) => { + annotations.push(DataAnnotation::new_interpreted_markup(s, "_")); + }, + + Event::HardBreak => { + annotations.push(DataAnnotation::new_text("\n\n")); + }, + Event::SoftBreak => { + if let Some(last) = annotations.last() { + // Don't add space if the last text already ends with a space + if last + .text + .as_ref() + .is_some_and(|t| t.chars().last().is_some_and(|c| c.is_ascii_whitespace())) + || last.interpret_as.as_ref().is_some_and(|t| { + t.chars().last().is_some_and(|c| c.is_ascii_whitespace()) + }) + { + return; + }; + } + + annotations.push(DataAnnotation::new_text(" ")); + }, + + Event::FootnoteReference(_) | Event::TaskListMarker(_) | Event::Rule => {}, + }; + }); + + Data::from_iter(annotations) +} diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs new file mode 100644 index 0000000..dda4aed --- /dev/null +++ b/src/parsers/mod.rs @@ -0,0 +1,11 @@ +//! Utilities for parsing the contents of different file types into a format +//! representation that can be parsed by the LanguageTool API. + +#![cfg(feature = "html")] +pub mod html; + +#[cfg(feature = "markdown")] +pub mod markdown; + +#[cfg(feature = "typst")] +pub mod typst; diff --git a/src/parsers.rs b/src/parsers/typst.rs similarity index 51% rename from src/parsers.rs rename to src/parsers/typst.rs index b1f4c7c..c1df1d9 100644 --- a/src/parsers.rs +++ b/src/parsers/typst.rs @@ -1,113 +1,10 @@ -//! Utilities for parsing the contents of different file types into a text -//! representation that can be sent to the LanguageTool API. +//! Parse the contents of Typst files into a format parseable by the +//! LanguageTool API. use crate::api::check::{Data, DataAnnotation}; -/// Parse the contents of an HTML file into a text format to be sent to the -/// LanguageTool API. -#[cfg(feature = "html")] -pub fn parse_html(file_content: impl AsRef) -> String { - use html_parser::Node; - - let mut txt = String::new(); - - let html = html_parser::Dom::parse(file_content.as_ref()).unwrap_or_default(); - let mut children: Vec = html.children.into_iter().rev().collect(); - - fn handle_node(txt: &mut String, node: Node) { - if let Some(e) = node.element() { - match e.name.as_str() { - "head" | "script" | "style" => { - return; - }, - "code" => { - txt.push_str("_code_"); - return; - }, - "a" => { - txt.push_str("_link_"); - return; - }, - "pre" => { - txt.push_str("_pre_"); - txt.push_str("\n\n"); - return; - }, - s => { - let add_children = |txt: &mut String| { - if !e.children.is_empty() { - // Recursively handle children - e.children.clone().into_iter().for_each(|n| { - handle_node(txt, n); - }); - }; - }; - - match s { - "p" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "li" | "td" | "th" => { - while txt.chars().last().is_some_and(|c| c.is_whitespace()) { - txt.pop(); - } - txt.push_str("\n\n"); - add_children(txt); - txt.push_str("\n\n"); - }, - _ => { - add_children(txt); - }, - } - }, - } - } - - if let Some(t) = node.text() { - let mut text = t.trim().to_owned(); - if !text.is_empty() { - let mut chars = t.chars(); - - // Maintain leading/trailing white space, but only a single space - if chars.next().is_some_and(|c| c.is_whitespace()) { - while txt.chars().last().is_some_and(|c| c.is_whitespace()) { - txt.pop(); - } - text.insert(0, ' '); - } - if chars.last().is_some_and(|c| c.is_whitespace()) { - text.push(' '); - } - - txt.push_str(&text); - } - } - } - - while let Some(node) = children.pop() { - handle_node(&mut txt, node); - } - - txt -} - -/// Parse the contents of a Markdown file into a text format to be sent to the -/// LanguageTool API. -#[cfg(feature = "markdown")] -pub fn parse_markdown(file_content: impl AsRef) -> String { - use pulldown_cmark::{html, Options, Parser}; - - let mut options = Options::empty(); - options.insert(Options::ENABLE_STRIKETHROUGH); - options.insert(Options::ENABLE_TABLES); - - let parser = Parser::new_ext(file_content.as_ref(), options); - let mut html = String::new(); - html::push_html(&mut html, parser); - - parse_html(html) -} - /// Parse the contents of a Typst file into a text format to be sent to the /// LanguageTool API. -#[cfg(feature = "typst")] pub fn parse_typst(file_content: impl AsRef) -> Data<'static> { use typst_syntax::{parse, SyntaxKind, SyntaxNode}; From c19b15b961d8156f1deb89bfe1c23f6efd548690 Mon Sep 17 00:00:00 2001 From: rolv Date: Sat, 8 Mar 2025 15:40:51 +0000 Subject: [PATCH 02/25] refactor: use shared ignore pattern --- src/parsers/markdown.rs | 9 ++++++--- src/parsers/mod.rs | 3 +++ src/parsers/typst.rs | 7 +++++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/parsers/markdown.rs b/src/parsers/markdown.rs index c96f749..d327d3f 100644 --- a/src/parsers/markdown.rs +++ b/src/parsers/markdown.rs @@ -1,7 +1,10 @@ //! Parse the contents of Markdown files into a format parseable by the //! LanguageTool API. -use crate::api::check::{Data, DataAnnotation}; +use crate::{ + api::check::{Data, DataAnnotation}, + parsers::IGNORE, +}; /// Parse the contents of a Markdown file into a text format to be sent to the /// LanguageTool API. @@ -101,13 +104,13 @@ pub fn parse_markdown(file_content: &str) -> Data<'_> { // Ignored Tag::CodeBlock(_) | Tag::Link { .. } | Tag::Image { .. } => { - annotations.push(DataAnnotation::new_interpreted_markup(s, "_")); + annotations.push(DataAnnotation::new_interpreted_markup(s, IGNORE)); }, _ => {}, } }, Event::Code(s) => { - annotations.push(DataAnnotation::new_interpreted_markup(s, "_")); + annotations.push(DataAnnotation::new_interpreted_markup(s, IGNORE)); }, Event::HardBreak => { diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs index dda4aed..bc4f51a 100644 --- a/src/parsers/mod.rs +++ b/src/parsers/mod.rs @@ -9,3 +9,6 @@ pub mod markdown; #[cfg(feature = "typst")] pub mod typst; + +/// Pattern that is ignored by the LanguageTool API. +const IGNORE: &str = "_"; diff --git a/src/parsers/typst.rs b/src/parsers/typst.rs index c1df1d9..7267f6c 100644 --- a/src/parsers/typst.rs +++ b/src/parsers/typst.rs @@ -1,7 +1,10 @@ //! Parse the contents of Typst files into a format parseable by the //! LanguageTool API. -use crate::api::check::{Data, DataAnnotation}; +use crate::{ + api::check::{Data, DataAnnotation}, + parsers::IGNORE, +}; /// Parse the contents of a Typst file into a text format to be sent to the /// LanguageTool API. @@ -50,7 +53,7 @@ pub fn parse_typst(file_content: impl AsRef) -> Data<'static> { // issues. The following sentence would give an error for // repeated whitespace otherwise: This has ``` `backticks` // ``` in it - "_ignore_".to_string(), + IGNORE, )); continue; }, From 7d276f1b0216a00e1f7b185e8d4de4c7e71dc657 Mon Sep 17 00:00:00 2001 From: rolv Date: Sat, 8 Mar 2025 15:42:37 +0000 Subject: [PATCH 03/25] feat: use data annotations for HTML files --- Cargo.lock | 409 +++++++++++++++++++++++++++++----------- Cargo.toml | 5 +- src/cli/check.rs | 5 +- src/parsers/html.rs | 151 ++++++++------- src/parsers/markdown.rs | 4 +- 5 files changed, 385 insertions(+), 189 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3bcf389..142de05 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -153,15 +153,6 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - [[package]] name = "bstr" version = "1.11.3" @@ -179,6 +170,12 @@ version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" version = "1.10.0" @@ -347,15 +344,6 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" -[[package]] -name = "cpufeatures" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" -dependencies = [ - "libc", -] - [[package]] name = "criterion" version = "0.5.1" @@ -424,31 +412,45 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" [[package]] -name = "crypto-common" -version = "0.1.6" +name = "cssparser" +version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3" dependencies = [ - "generic-array", - "typenum", + "cssparser-macros", + "dtoa-short", + "itoa", + "phf", + "smallvec", ] [[package]] -name = "difflib" -version = "0.4.0" +name = "cssparser-macros" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote", + "syn 2.0.98", +] [[package]] -name = "digest" -version = "0.10.7" +name = "derive_more" +version = "0.99.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +checksum = "3da29a38df43d6f156149c9b43ded5e018ddff2a855cf2cfd62e8cd7d079c69f" dependencies = [ - "block-buffer", - "crypto-common", + "proc-macro2", + "quote", + "syn 2.0.98", ] +[[package]] +name = "difflib" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" + [[package]] name = "displaydoc" version = "0.2.5" @@ -466,6 +468,21 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "dtoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6add3b8cff394282be81f3fc1a0605db594ed69890078ca6e2cab1c408bcf04" + +[[package]] +name = "dtoa-short" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" +dependencies = [ + "dtoa", +] + [[package]] name = "ecow" version = "0.2.3" @@ -475,6 +492,12 @@ dependencies = [ "serde", ] +[[package]] +name = "ego-tree" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8" + [[package]] name = "either" version = "1.13.0" @@ -576,6 +599,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures" version = "0.3.31" @@ -666,13 +699,12 @@ dependencies = [ ] [[package]] -name = "generic-array" -version = "0.14.7" +name = "fxhash" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" dependencies = [ - "typenum", - "version_check", + "byteorder", ] [[package]] @@ -750,18 +782,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" [[package]] -name = "html_parser" -version = "0.7.0" +name = "html5ever" +version = "0.29.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f56db07b6612644f6f7719f8ef944f75fff9d6378fdf3d316fd32194184abd" +checksum = "3b7410cae13cbc75623c98ac4cbfd1f0bedddf3227afc24f370cf0f50a44a11c" dependencies = [ - "doc-comment", - "pest", - "pest_derive", - "serde", - "serde_derive", - "serde_json", - "thiserror 1.0.69", + "log", + "mac", + "markup5ever", + "match_token", ] [[package]] @@ -1050,9 +1079,9 @@ dependencies = [ "clap_complete", "codspeed-criterion-compat", "criterion", + "ego-tree", "enum_dispatch", "futures", - "html_parser", "is-terminal", "lifetime", "log", @@ -1060,11 +1089,12 @@ dependencies = [ "pretty_env_logger", "pulldown-cmark", "reqwest", + "scraper", "serde", "serde_json", "tempfile", "termcolor", - "thiserror 1.0.69", + "thiserror", "tokio", "typst-syntax", ] @@ -1123,12 +1153,53 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7a7213d12e1864c0f002f52c2923d4556935a43dec5e71355c2760e0f6e7a18" +dependencies = [ + "log", + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "match_token" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "memchr" version = "2.7.4" @@ -1178,6 +1249,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "normalize-line-endings" version = "0.3.0" @@ -1268,6 +1345,29 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -1275,48 +1375,55 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] -name = "pest" -version = "2.7.15" +name = "phf" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b7cafe60d6cf8e62e1b9b2ea516a089c008945bb5a275416789e7db0bc199dc" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" dependencies = [ - "memchr", - "thiserror 2.0.11", - "ucd-trie", + "phf_macros", + "phf_shared", ] [[package]] -name = "pest_derive" -version = "2.7.15" +name = "phf_codegen" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "816518421cfc6887a0d62bf441b6ffb4536fcc926395a69e1a85852d4363f57e" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" dependencies = [ - "pest", - "pest_generator", + "phf_generator", + "phf_shared", ] [[package]] -name = "pest_generator" -version = "2.7.15" +name = "phf_generator" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d1396fd3a870fc7838768d171b4616d5c91f6cc25e377b673d714567d99377b" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ - "pest", - "pest_meta", + "phf_shared", + "rand", +] + +[[package]] +name = "phf_macros" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" +dependencies = [ + "phf_generator", + "phf_shared", "proc-macro2", "quote", "syn 2.0.98", ] [[package]] -name = "pest_meta" -version = "2.7.15" +name = "phf_shared" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1e58089ea25d717bfd31fb534e4f3afcc2cc569c70de3e239778991ea3b7dea" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" dependencies = [ - "once_cell", - "pest", - "sha2", + "siphasher", ] [[package]] @@ -1371,6 +1478,12 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "predicates" version = "3.1.3" @@ -1448,6 +1561,21 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" + [[package]] name = "rayon" version = "1.10.0" @@ -1468,6 +1596,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "redox_syscall" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b8c0c260b63a8219631167be35e6a988e9554dbd323f8bd08439c8ed1302bd1" +dependencies = [ + "bitflags 2.8.0", +] + [[package]] name = "regex" version = "1.11.1" @@ -1595,6 +1732,27 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "scraper" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "527e65d9d888567588db4c12da1087598d0f6f8b346cc2c5abc91f05fc2dffe2" +dependencies = [ + "cssparser", + "ego-tree", + "getopts", + "html5ever", + "precomputed-hash", + "selectors", + "tendril", +] + [[package]] name = "security-framework" version = "2.11.1" @@ -1618,6 +1776,25 @@ dependencies = [ "libc", ] +[[package]] +name = "selectors" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8" +dependencies = [ + "bitflags 2.8.0", + "cssparser", + "derive_more", + "fxhash", + "log", + "new_debug_unreachable", + "phf", + "phf_codegen", + "precomputed-hash", + "servo_arc", + "smallvec", +] + [[package]] name = "serde" version = "1.0.217" @@ -1672,14 +1849,12 @@ dependencies = [ ] [[package]] -name = "sha2" -version = "0.10.8" +name = "servo_arc" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +checksum = "ae65c4249478a2647db249fb43e23cec56a2c8974a427e7bd8cb5a1d0964921a" dependencies = [ - "cfg-if", - "cpufeatures", - "digest", + "stable_deref_trait", ] [[package]] @@ -1725,6 +1900,31 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "string_cache" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "938d512196766101d333398efde81bc1f37b00cb42c2f8350e5df639f040bbbe" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", +] + [[package]] name = "strsim" version = "0.11.1" @@ -1805,6 +2005,17 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "termcolor" version = "1.4.1" @@ -1842,16 +2053,7 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl 1.0.69", -] - -[[package]] -name = "thiserror" -version = "2.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" -dependencies = [ - "thiserror-impl 2.0.11", + "thiserror-impl", ] [[package]] @@ -1865,17 +2067,6 @@ dependencies = [ "syn 2.0.98", ] -[[package]] -name = "thiserror-impl" -version = "2.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.98", -] - [[package]] name = "tinystr" version = "0.7.6" @@ -2011,12 +2202,6 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" -[[package]] -name = "typenum" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" - [[package]] name = "typst-syntax" version = "0.12.0" @@ -2048,12 +2233,6 @@ dependencies = [ "thin-vec", ] -[[package]] -name = "ucd-trie" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" - [[package]] name = "unicase" version = "2.8.1" @@ -2107,6 +2286,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf16_iter" version = "1.0.5" @@ -2131,12 +2316,6 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" -[[package]] -name = "version_check" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" - [[package]] name = "wait-timeout" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index 6738bb6..ce73917 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,8 @@ clap = {version = "^4.5.18", features = ["cargo", "derive", "env", "wrap_help"], clap-verbosity-flag = { version = "3.0.2", optional = true } clap_complete = {version = "^4.5.2", optional = true} enum_dispatch = {version = "0.3.13", optional = true} -html_parser = {version = "0.7.0", optional = true} +scraper = {version = "0.23.1", optional = true} +ego-tree = {version = "0.10.0", optional = true} is-terminal = {version = "0.4.3", optional = true} lifetime = {version = "0.1.0", features = ["macros"]} log = { version = "0.4.25", optional = true } @@ -46,7 +47,7 @@ color = ["annotate-snippets?/color", "dep:termcolor"] default = ["cli", "native-tls"] docker = [] full = ["cli-complete", "docker", "unstable"] -html = ["dep:html_parser"] +html = ["dep:ego-tree", "dep:scraper"] markdown = ["dep:pulldown-cmark", "html"] multithreaded = ["dep:tokio"] native-tls = ["reqwest/native-tls"] diff --git a/src/cli/check.rs b/src/cli/check.rs index 21db7cc..ec65059 100644 --- a/src/cli/check.rs +++ b/src/cli/check.rs @@ -163,10 +163,7 @@ impl ExecuteSubcommand for Command { FileType::Typst | FileType::Markdown | FileType::Html => { let data = match file_type { FileType::Typst => parse_typst(&file_content), - FileType::Html => { - let text = parse_html(&file_content); - Data::from_iter([DataAnnotation::new_text(text)]) - }, + FileType::Html => parse_html(&file_content), FileType::Markdown => parse_markdown(&file_content), _ => unreachable!(), }; diff --git a/src/parsers/html.rs b/src/parsers/html.rs index 31bfd9a..2230777 100644 --- a/src/parsers/html.rs +++ b/src/parsers/html.rs @@ -1,86 +1,105 @@ //! Parse the contents of HTML files into a format parseable by the LanguageTool //! API. +use ego_tree::NodeRef; +use scraper::{Html, Node}; + +use crate::{ + api::check::{Data, DataAnnotation}, + parsers::IGNORE, +}; + /// Parse the contents of an HTML file into a text format to be sent to the /// LanguageTool API. -pub fn parse_html(file_content: impl AsRef) -> String { - use html_parser::Node; +#[must_use] +pub fn parse_html(file_content: &str) -> Data<'static> { + let mut annotations: Vec = vec![]; - let mut txt = String::new(); + fn handle_node(annotations: &mut Vec, node: NodeRef<'_, Node>) { + let n = node.value(); + match n { + Node::Element(el) => { + match el.name() { + "head" | "script" | "style" => {}, - let html = html_parser::Dom::parse(file_content.as_ref()).unwrap_or_default(); - let mut children: Vec = html.children.into_iter().rev().collect(); + "code" => { + annotations.push(DataAnnotation::new_interpreted_markup( + "...", + IGNORE, + )); + }, - fn handle_node(txt: &mut String, node: Node) { - if let Some(e) = node.element() { - match e.name.as_str() { - "head" | "script" | "style" => { - return; - }, - "code" => { - txt.push_str("_code_"); - return; - }, - "a" => { - txt.push_str("_link_"); - return; - }, - "pre" => { - txt.push_str("_pre_"); - txt.push_str("\n\n"); - return; - }, - s => { - let add_children = |txt: &mut String| { - if !e.children.is_empty() { - // Recursively handle children - e.children.clone().into_iter().for_each(|n| { - handle_node(txt, n); - }); - }; - }; + "img" => { + annotations.push(DataAnnotation::new_interpreted_markup("", IGNORE)); + }, - match s { - "p" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "li" | "td" | "th" => { - while txt.chars().last().is_some_and(|c| c.is_whitespace()) { - txt.pop(); - } - txt.push_str("\n\n"); - add_children(txt); - txt.push_str("\n\n"); - }, - _ => { - add_children(txt); - }, - } - }, - } - } + s => { + match s { + "p" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "li" | "td" | "th" + | "div" => { + annotations.push(DataAnnotation::new_interpreted_markup( + format!("<{s}>"), + "\n\n", + )); + for node in node.children() { + handle_node(annotations, node); + } + annotations.push(DataAnnotation::new_interpreted_markup( + format!(""), + "\n\n", + )); + }, + _ => { + annotations.push(DataAnnotation::new_markup(format!("<{s}>"))); + for node in node.children() { + handle_node(annotations, node); + } + annotations.push(DataAnnotation::new_markup(format!(""))); + }, + } + }, + } + }, - if let Some(t) = node.text() { - let mut text = t.trim().to_owned(); - if !text.is_empty() { - let mut chars = t.chars(); + Node::Text(t) => { + let mut text = t.trim().to_owned(); + if !text.is_empty() { + let mut chars = t.chars(); - // Maintain leading/trailing white space, but only a single space - if chars.next().is_some_and(|c| c.is_whitespace()) { - while txt.chars().last().is_some_and(|c| c.is_whitespace()) { - txt.pop(); + // Maintain leading/trailing white space, but only a single space + if chars.next().is_some_and(|c| c.is_whitespace()) { + while text.chars().last().is_some_and(|c| c.is_whitespace()) { + text.pop(); + } + text.insert(0, ' '); } - text.insert(0, ' '); - } - if chars.last().is_some_and(|c| c.is_whitespace()) { - text.push(' '); + if chars.last().is_some_and(|c| c.is_whitespace()) { + text.push(' '); + } + + annotations.push(DataAnnotation::new_text(text)) + } else { + annotations.push(DataAnnotation::new_text("\n\n")); } + }, + + Node::Comment(c) => { + let comment = c.to_string(); + + annotations.push(DataAnnotation::new_interpreted_markup( + format!("",), + format!("\n\n{comment}\n\n"), + )); + }, - txt.push_str(&text); - } + _ => {}, } } - while let Some(node) = children.pop() { - handle_node(&mut txt, node); + let document = Html::parse_document(file_content); + for node in document.root_element().children() { + handle_node(&mut annotations, node); } - txt + Data::from_iter(annotations) } diff --git a/src/parsers/markdown.rs b/src/parsers/markdown.rs index d327d3f..d3686c7 100644 --- a/src/parsers/markdown.rs +++ b/src/parsers/markdown.rs @@ -51,8 +51,8 @@ pub fn parse_markdown(file_content: &str) -> Data<'_> { }, Event::Html(s) | Event::InlineHtml(s) => { - let data = super::html::parse_html(s.into_string()); - annotations.push(DataAnnotation::new_text(data)); + let data = super::html::parse_html(s.as_ref()).annotation.into_iter(); + annotations.extend(data); }, Event::Text(mut s) => { From 433db83b2f7b10b4cf51fcf44fe3aae031395627 Mon Sep 17 00:00:00 2001 From: rolv Date: Sat, 8 Mar 2025 20:33:33 +0000 Subject: [PATCH 04/25] feat: splitting for data annotation requests --- src/api/check.rs | 102 +++++++++++++++++++++++++++++++++++++++++++++- src/api/server.rs | 51 +++++++++++++++++++---- src/cli/check.rs | 4 +- 3 files changed, 147 insertions(+), 10 deletions(-) diff --git a/src/api/check.rs b/src/api/check.rs index b87fd11..53dd1ce 100644 --- a/src/api/check.rs +++ b/src/api/check.rs @@ -239,6 +239,62 @@ pub struct Data<'source> { pub annotation: Vec>, } +impl Data<'_> { + /// Split data into as few fragments as possible, where each fragment + /// contains (if possible) a maximum of `n` characters in it's + /// annotations' markup and text fields. + /// + /// Pattern str `pat` is used for splitting. + #[must_use] + pub fn split(self, n: usize, pat: &str) -> Vec { + // Build vec of breakpoints and the length of the text + markup at that + // potential breakpoint + let mut break_point_lengths = vec![]; + let mut len = 0; + for (i, ann) in self.annotation.iter().enumerate() { + len += + ann.text.as_deref().unwrap_or("").len() + ann.markup.as_deref().unwrap_or("").len(); + if ann.text.as_ref().is_some_and(|t| t.contains(pat)) { + break_point_lengths.push((i, len)); + } + } + + // Decide which breakpoints to split the annotations at + let mut break_points: Vec = vec![]; + if break_point_lengths.len() > 1 { + let (mut i, mut ii) = (0, 1); + let (mut base, mut curr) = (0, 0); + while ii < break_point_lengths.len() { + curr += break_point_lengths[i].1 - base; + + if break_point_lengths[ii].1 - base + curr > n { + break_points.push(break_point_lengths[i].0); + base = break_point_lengths[i].1; + curr = 0; + } + + i += 1; + ii += 1; + } + } + + // Split annotations based on calculated break points + let mut split = Vec::with_capacity(break_points.len()); + let mut iter = self.into_iter(); + let mut taken = 0; + let mut annotations = vec![]; + for break_point in break_points { + while taken != break_point + 1 { + annotations.push(iter.next().unwrap()); + taken += 1; + } + split.push(Data::from_iter(mem::take(&mut annotations))); + } + + split + } +} + impl IntoStatic for Data<'_> { type Static = Data<'static>; fn into_static(self) -> Self::Static { @@ -259,6 +315,15 @@ impl<'source, T: Into>> FromIterator for Data<'source } } +impl<'source> IntoIterator for Data<'source> { + type Item = DataAnnotation<'source>; + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.annotation.into_iter() + } +} + impl Serialize for Data<'_> { fn serialize(&self, serializer: S) -> std::result::Result where @@ -601,10 +666,20 @@ impl<'source> Request<'source> { /// /// # Errors /// - /// If `self.text` is none. + /// If `self.text` is [`None`] and `self.data` is [`None`]. pub fn try_split(mut self, n: usize, pat: &str) -> Result> { + // DATA ANNOTATIONS + if let Some(data) = mem::take(&mut self.data) { + return Ok(data + .split(n, pat) + .into_iter() + .map(|d| self.clone().with_data(d)) + .collect()); + } + + // TEXT let text = mem::take(&mut self.text) - .ok_or_else(|| Error::InvalidRequest("missing text field".to_string()))?; + .ok_or_else(|| Error::InvalidRequest("missing text or data field".to_string()))?; let string: &str = match &text { Cow::Owned(s) => s.as_str(), Cow::Borrowed(s) => s, @@ -944,6 +1019,29 @@ impl Response { } annotation } + + /// Joins the given [`Request`] to the current one. + /// + /// This is especially useful when a request was split into multiple + /// requests. + #[must_use] + pub fn append(mut self, mut other: Self) -> Self { + #[cfg(feature = "unstable")] + if let Some(ref mut sr_other) = other.sentence_ranges { + match self.sentence_ranges { + Some(ref mut sr_self) => { + sr_self.append(sr_other); + }, + None => { + std::mem::swap(&mut self.sentence_ranges, &mut other.sentence_ranges); + }, + } + } + + self.matches.append(&mut other.matches); + + self + } } /// Check response with additional context. diff --git a/src/api/server.rs b/src/api/server.rs index 1b88a96..7fdd74a 100644 --- a/src/api/server.rs +++ b/src/api/server.rs @@ -413,15 +413,15 @@ impl ServerClient { ) -> Result> { use std::borrow::Cow; - let mut tasks = Vec::with_capacity(requests.len()); + let mut response_with_context: Option = None; - requests + let tasks = requests .into_iter() .map(|r| r.into_static()) - .for_each(|request| { + .map(|request| { let server_client = self.clone(); - tasks.push(tokio::spawn(async move { + tokio::spawn(async move { let response = server_client.check(&request).await?; let text = request.text.ok_or_else(|| { Error::InvalidRequest( @@ -430,11 +430,9 @@ impl ServerClient { ) })?; Result::<(Cow<'static, str>, Response)>::Ok((text, response)) - })); + }) }); - let mut response_with_context: Option = None; - for task in tasks { let (text, response) = task.await.unwrap()?; @@ -447,6 +445,45 @@ impl ServerClient { Ok(response_with_context.unwrap()) } + /// Send multiple check requests and join them into a single response, + /// without any context. + /// + /// # Error + /// + /// If any of the requests has `self.text` or `self.data` field which is + /// [`None`]. + #[cfg(feature = "multithreaded")] + pub async fn check_multiple_and_join_without_context<'source>( + &self, + requests: Vec>, + ) -> Result { + let mut response: Option = None; + + let tasks = requests + .into_iter() + .map(|r| r.into_static()) + .map(|request| { + let server_client = self.clone(); + + tokio::spawn(async move { + let response = server_client.check(&request).await?; + Result::::Ok(response) + }) + }); + + // Make requests in sequence + for task in tasks { + let resp = task.await.unwrap()?; + + response = Some(match response { + Some(r) => r.append(resp), + None => resp, + }) + } + + Ok(response.unwrap()) + } + /// Send a check request to the server, await for the response and annotate /// it. #[cfg(feature = "annotate")] diff --git a/src/cli/check.rs b/src/cli/check.rs index ec65059..66965b8 100644 --- a/src/cli/check.rs +++ b/src/cli/check.rs @@ -167,8 +167,10 @@ impl ExecuteSubcommand for Command { FileType::Markdown => parse_markdown(&file_content), _ => unreachable!(), }; + let requests = (request.clone().with_data(data)) + .split(self.max_length, self.split_pattern.as_str()); let response = server_client - .check(&request.clone().with_data(data)) + .check_multiple_and_join_without_context(requests) .await?; (response, file_content) }, From 40c35a9e030bdd7713d6676a0f4586118c63218b Mon Sep 17 00:00:00 2001 From: rolv Date: Sat, 8 Mar 2025 23:25:20 +0000 Subject: [PATCH 05/25] fix: pass `cargo check` for different features combinations --- src/api/server.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/api/server.rs b/src/api/server.rs index 7fdd74a..4c1aa53 100644 --- a/src/api/server.rs +++ b/src/api/server.rs @@ -1,14 +1,17 @@ //! Structure to communicate with some `LanguageTool` server through the API. +#[cfg(feature = "multithreaded")] +use crate::api::check; use crate::{ api::{ - check::{self, Request, Response}, + check::{Request, Response}, languages, words, }, error::{Error, Result}, }; #[cfg(feature = "cli")] use clap::Args; +#[cfg(feature = "multithreaded")] use lifetime::IntoStatic; use reqwest::Client; use serde::{Deserialize, Serialize}; From 1ec8f829e94400ea5880acdd136c5cee512df35f Mon Sep 17 00:00:00 2001 From: rolv Date: Sat, 8 Mar 2025 23:31:03 +0000 Subject: [PATCH 06/25] fix: satisfy toml pre-commit hook --- Cargo.toml | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ce73917..2ad45c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,23 +10,31 @@ required-features = ["cli"] [dependencies] annotate-snippets = {version = "^0.9.1", optional = true} -clap = {version = "^4.5.18", features = ["cargo", "derive", "env", "wrap_help"], optional = true} -clap-verbosity-flag = { version = "3.0.2", optional = true } +clap = {version = "^4.5.18", features = [ + "cargo", + "derive", + "env", + "wrap_help", +], optional = true} +clap-verbosity-flag = {version = "3.0.2", optional = true} clap_complete = {version = "^4.5.2", optional = true} -enum_dispatch = {version = "0.3.13", optional = true} -scraper = {version = "0.23.1", optional = true} ego-tree = {version = "0.10.0", optional = true} +enum_dispatch = {version = "0.3.13", optional = true} is-terminal = {version = "0.4.3", optional = true} lifetime = {version = "0.1.0", features = ["macros"]} -log = { version = "0.4.25", optional = true } -pretty_env_logger = { version = "0.5.0", optional = true } +log = {version = "0.4.25", optional = true} +pretty_env_logger = {version = "0.5.0", optional = true} pulldown-cmark = {version = "0.10.2", optional = true} reqwest = {version = "^0.11", default-features = false, features = ["json"]} +scraper = {version = "0.23.1", optional = true} serde = {version = "^1.0", features = ["derive"]} serde_json = "^1.0" termcolor = {version = "1.2.0", optional = true} thiserror = "^1.0" -tokio = {version = "^1.0", features = ["macros", "rt-multi-thread"], optional = true} +tokio = {version = "^1.0", features = [ + "macros", + "rt-multi-thread", +], optional = true} typst-syntax = {version = "^0.12.0", optional = true} [dev-dependencies] @@ -41,23 +49,36 @@ tokio = {version = "^1.0", features = ["macros"]} [features] annotate = ["dep:annotate-snippets"] -cli = ["annotate", "color", "dep:clap", "dep:clap-verbosity-flag", "dep:enum_dispatch", "dep:is-terminal", "dep:log", "dep:pretty_env_logger", "multithreaded", "typst", "markdown", "html"] +clap-verbosity-flag = ["dep:clap-verbosity-flag"] +cli = [ + "annotate", + "color", + "dep:clap", + "dep:clap-verbosity-flag", + "dep:enum_dispatch", + "dep:is-terminal", + "dep:log", + "dep:pretty_env_logger", + "multithreaded", + "typst", + "markdown", + "html", +] cli-complete = ["cli", "clap_complete"] color = ["annotate-snippets?/color", "dep:termcolor"] default = ["cli", "native-tls"] docker = [] full = ["cli-complete", "docker", "unstable"] html = ["dep:ego-tree", "dep:scraper"] +log = ["dep:log"] markdown = ["dep:pulldown-cmark", "html"] multithreaded = ["dep:tokio"] native-tls = ["reqwest/native-tls"] native-tls-vendored = ["reqwest/native-tls-vendored"] +pretty_env_logger = ["dep:pretty_env_logger"] pulldown-cmark = ["dep:pulldown-cmark"] typst = ["dep:typst-syntax"] unstable = [] -log = ["dep:log"] -pretty_env_logger = ["dep:pretty_env_logger"] -clap-verbosity-flag = ["dep:clap-verbosity-flag"] [lib] name = "languagetool_rust" From 2c51dd4442d373559ed51d655f8e64d823596603 Mon Sep 17 00:00:00 2001 From: rolv Date: Tue, 11 Mar 2025 22:43:48 +0000 Subject: [PATCH 07/25] feat(tests): snapshot tests for file parsers --- Cargo.lock | 65 ++++++++ Cargo.toml | 3 +- tests/cli.rs | 48 ++++++ tests/sample_files/example.html | 123 +++++++++++++++ tests/sample_files/example.typ | 148 ++++++++++++++++++ .../snapshots/cli__autodetect_html_file.snap | 25 +++ .../cli__autodetect_markdown_file.snap | 60 +++++++ .../snapshots/cli__autodetect_typst_file.snap | 18 +++ 8 files changed, 489 insertions(+), 1 deletion(-) create mode 100644 tests/sample_files/example.html create mode 100644 tests/sample_files/example.typ create mode 100644 tests/snapshots/cli__autodetect_html_file.snap create mode 100644 tests/snapshots/cli__autodetect_markdown_file.snap create mode 100644 tests/snapshots/cli__autodetect_typst_file.snap diff --git a/Cargo.lock b/Cargo.lock index 142de05..347ab40 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -328,6 +328,18 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "windows-sys 0.59.0", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -504,6 +516,12 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -1019,6 +1037,20 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "insta" +version = "1.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50259abbaa67d11d2bcafc7ba1d094ed7a0c70e3ce893f0d0997f73558cb3084" +dependencies = [ + "console", + "linked-hash-map", + "once_cell", + "pin-project", + "serde", + "similar", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -1082,6 +1114,7 @@ dependencies = [ "ego-tree", "enum_dispatch", "futures", + "insta", "is-terminal", "lifetime", "log", @@ -1141,6 +1174,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "linux-raw-sys" version = "0.4.15" @@ -1426,6 +1465,26 @@ dependencies = [ "siphasher", ] +[[package]] +name = "pin-project" +version = "1.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -1863,6 +1922,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "similar" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" + [[package]] name = "siphasher" version = "1.0.1" diff --git a/Cargo.toml b/Cargo.toml index a531d8a..027df03 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,6 +43,7 @@ assert_matches = "1.5.0" codspeed-criterion-compat = "2.7.0" criterion = "0.5" futures = "0.3" +insta = {version = "1.41.1", features = ["yaml"]} predicates = "3.0.3" tempfile = "3.5.0" tokio = {version = "^1.0", features = ["macros"]} @@ -94,7 +95,7 @@ license = "MIT" name = "languagetool-rust" readme = "README.md" repository = "https://github.com/jeertmans/languagetool-rust" -rust-version = "1.77.2" +rust-version = "1.80.0" version = "2.1.4" [package.metadata.release] diff --git a/tests/cli.rs b/tests/cli.rs index 314ef24..d16973c 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -1,6 +1,12 @@ +use std::{path::PathBuf, sync::LazyLock}; + use assert_cmd::Command; use predicates::{boolean::OrPredicate, str::contains}; +static PATH_ROOT: LazyLock = LazyLock::new(|| PathBuf::from(env!("CARGO_MANIFEST_DIR"))); +static PATH_SAMPLE_FILES: LazyLock = + LazyLock::new(|| PATH_ROOT.join("tests").join("sample_files")); + #[test] fn test_basic_check_text() { let mut cmd = Command::cargo_bin("ltrs").unwrap(); @@ -503,3 +509,45 @@ fn test_words_delete() { contains("invalid request"), )); } + +#[test] +fn test_check_file_typst() { + let mut cmd = Command::cargo_bin("ltrs").unwrap(); + let output = cmd + .arg("check") + .arg(PATH_SAMPLE_FILES.join("example.typ")) + .output() + .unwrap(); + insta::assert_snapshot!( + "autodetect_typst_file", + String::from_utf8(output.stdout).unwrap() + ); +} + +#[test] +fn test_check_file_html() { + let mut cmd = Command::cargo_bin("ltrs").unwrap(); + let output = cmd + .arg("check") + .arg(PATH_SAMPLE_FILES.join("example.html")) + .output() + .unwrap(); + insta::assert_snapshot!( + "autodetect_html_file", + String::from_utf8(output.stdout).unwrap() + ); +} + +#[test] +fn test_check_file_markdown() { + let mut cmd = Command::cargo_bin("ltrs").unwrap(); + let output = cmd + .arg("check") + .arg(PATH_ROOT.join("README.md")) + .output() + .unwrap(); + insta::assert_snapshot!( + "autodetect_markdown_file", + String::from_utf8(output.stdout).unwrap() + ); +} diff --git a/tests/sample_files/example.html b/tests/sample_files/example.html new file mode 100644 index 0000000..d0de15a --- /dev/null +++ b/tests/sample_files/example.html @@ -0,0 +1,123 @@ + + + + +/home/rolv/Documents/zk/program/i4BJbPUbyR_stremio.md + + + + + +al + + + + + +

+ Fork me? Fork you, @octocat! Here is a link: makarainen.net +

+ + +#[cfg(feature = "html")] +pub fn parse_html(file_content: impl AsRef<str>) -> String { + use html_parser::{ElementVariant, Node}; + use select::{document::Document, node::Data, predicate}; + + let mut txt = String::new(); + + + +

Hello world

+ + +
+
+ Task +
+
+
+
+ Test an incorect spling +
+
+
+
+ +
+ +

+ Sean made a change +

+
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Header 1Header 2Header 3Header 4
Row 1 Col 1Row 1 Col 2Row 1 Col 3Row 1 Col 4
Row 2 Col 1Row 2 Col 2Row 2 Col 3Row 2 Col 4
Row 3 Col 1Row 3 Col 2Row 3 Col 3Row 3 Col 4
+
+ + + diff --git a/tests/sample_files/example.typ b/tests/sample_files/example.typ new file mode 100644 index 0000000..a83a174 --- /dev/null +++ b/tests/sample_files/example.typ @@ -0,0 +1,148 @@ +#set page(width: 10cm, height: auto) + += Introduction +In this report, we will explore the +various factors that influence _fluid +dynamics_ in glaciers and how they +contribute to the formation and +behaviour of these natural structures. + ++ The climate + - Temperatre + - Precipitation ++ The topography ++ The geology + +Glaciers as the one shown in +@glaciers will cease to exist if +we don't take action soon! + +#figure( + image("glacier.jpg", width: 70%), + caption: [ + _Glaciers_ form an important part + of the earth's climate system. + ], +) + + += Methods +We follow the glacier melting models +established in @glacier-melt. + +#bibliography("works.bib") + +The flow rate of a glacier is given +by the following equation: + +$ Q = rho A v + "time offset" $ + +Total displaced soil by glacial flow: + +$ 7.32 beta + + sum_(i=0)^nabla + (Q_i (a_i - epsilon)) / 2 $ + += Tables + +/* Text in a comment +* block. */ +// Text in a regular comment. + +#table( + columns: (1fr, auto, auto), + inset: 10pt, + align: horizon, + table.header( + [], [*Volume*], [*Parameters*], + ), + image("cylinder.svg"), + $ pi h (D^2 - d^2) / 4 $, + [ + $h$: height \ + $D$: outer radius \ + $d$: inner radius + ], + image("tetrahedron.svg"), + $ sqrt(2) / 12 a^3 $, + [$a$: edge length] +) + +#set table( + stroke: none, + gutter: 0.2em, + fill: (x, y) => + if x == 0 or y == 0 { gray }, + inset: (right: 1.5em), +) + +#show table.cell: it => { + if it.x == 0 or it.y == 0 { + set text(white) + strong(it) + } else if it.body == [] { + // Replace empty cells with 'N/A' + pad(..it.inset)[_N/A_] + } else { + it + } +} + +#let a = table.cell( + fill: green.lighten(60%), +)[A] +#let b = table.cell( + fill: aqua.lighten(60%), +)[B] + +#table( + columns: 4, + [], [Exam 1], [Exam 2], [Exam 3], + + [John], [], a, [], + [Mary], [], a, a, + [Robert], b, a, b, +) + += Code blocks + +Adding `rbx` to `rcx` gives +the desired result. + +What is ```rust fn main()``` in Rust +would be ```c int main()``` in C. + +```rust +fn main() { + println!("Hello World!"); +} +``` + +This has ``` `backticks` ``` in it +(but the spaces are trimmed). And +``` here``` the leading space is +also trimmed. + += Fibonacci sequence +The Fibonacci sequence is defined through the +recurrence relation $F_n = F_(n-1) + F_(n-2)$. +It can also be expressed in _closed form:_ + +$ F_n = round(1 / sqrt(5) phi.alt^n), quad + phi.alt = (1 + sqrt(5)) / 2 $ + +#let count = 8 +#let nums = range(1, count + 1) +#let fib(n) = ( + if n <= 2 { 1 } + else { fib(n - 1) + fib(n - 2) } +) + +The first #count numbers of the sequence are: + +#align(center, table( + columns: count, + ..nums.map(n => $F_#n$), + ..nums.map(n => str(fib(n))), +)) + diff --git a/tests/snapshots/cli__autodetect_html_file.snap b/tests/snapshots/cli__autodetect_html_file.snap new file mode 100644 index 0000000..6731a66 --- /dev/null +++ b/tests/snapshots/cli__autodetect_html_file.snap @@ -0,0 +1,25 @@ +--- +source: tests/cli.rs +expression: "String::from_utf8(output.stdout).unwrap()" +--- +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> /home/rolv/repos/languagetool-rust/tests/sample_files/example.html:4:38 + | +4 | /home/rolv/repos/languagetool-rust/tests/sample_files/example.html:5:44 + | +5 | ...ask
Test an incorect spling
... + | ^^^^^^^^ Possible spelling mistake + | -------- help: incorrect + | +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> /home/rolv/repos/languagetool-rust/tests/sample_files/example.html:5:44 + | +5 | ...>
Test an incorect spling
... + | ^^^^^^ Possible spelling mistake + | ------ help: spring, spying, sling, spline, splint, ... (2 not shown) + | diff --git a/tests/snapshots/cli__autodetect_markdown_file.snap b/tests/snapshots/cli__autodetect_markdown_file.snap new file mode 100644 index 0000000..6300d2e --- /dev/null +++ b/tests/snapshots/cli__autodetect_markdown_file.snap @@ -0,0 +1,60 @@ +--- +source: tests/cli.rs +expression: "String::from_utf8(output.stdout).unwrap()" +--- +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> /home/rolv/repos/languagetool-rust/README.md:5:44 + | +5 | ...piling LTRS. #### Default Features - - **cli**: Adds command-line related methods for ... + | ^^^^^^^ Possible spelling mistake + | ------- help: CLI, Clip, CGI, CPI, CSI, ... (1504 not shown) + | +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> /home/rolv/repos/languagetool-rust/README.md:9:44 + | +9 | ...ate**, **color**, **multithreaded**. - **native-tls**: Enables TLS functionality provided by ... + | ^^^^^^^^^^^^^^ Possible spelling mistake + | -------------- help: natives, natively, nativists, nativeness, naivetes + | +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> /home/rolv/repos/languagetool-rust/README.md:3:44 + | +3 | ...annotate results from check request. - **cli-complete**: Adds commands to generate completion f... + | ^^^^^^^^^^^^^^^^ Possible spelling mistake + | ---------------- help: incomplete + | +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> /home/rolv/repos/languagetool-rust/README.md:6:44 + | +6 | ...shells. This feature also activates the **cli** feature. Enter ltrs completions --help ... + | ^^^^^^^ Possible spelling mistake + | ------- help: CLI, clip, CGI, CPI, CSI, ... (1541 not shown) + | +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> /home/rolv/repos/languagetool-rust/README.md:10:44 + | +10 | ...es color outputting in the terminal. If **cli** feature is also enabled, the --color= /home/rolv/repos/languagetool-rust/README.md:13:44 + | +13 | ...d**: Enables multithreaded requests. - **native-tls-vendored**: Enables the vendored feature of native... + | ^^^^^^^^^^^^^^^^^^^^^^^ Possible spelling mistake + | ----------------------- help: native-TLS-vendored + | +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> /home/rolv/repos/languagetool-rust/README.md:14:44 + | +14 | ...u are planning to use HTTPS servers. - **undoc**: Adds more fields to JSON responses tha... + | ^^^^^^^^^ Possible spelling mistake + | --------- help: undo, undock, undos + | +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> /home/rolv/repos/languagetool-rust/README.md:19:44 + | +19 | ...cases. All added fields are stored in a hashmap as JSON values. ## Related Projects ... + | ^^^^^^^ Possible spelling mistake + | ------- help: hash map + | diff --git a/tests/snapshots/cli__autodetect_typst_file.snap b/tests/snapshots/cli__autodetect_typst_file.snap new file mode 100644 index 0000000..323e9f9 --- /dev/null +++ b/tests/snapshots/cli__autodetect_typst_file.snap @@ -0,0 +1,18 @@ +--- +source: tests/cli.rs +expression: "String::from_utf8(output.stdout).unwrap()" +--- +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake. ‘behaviour’ is British English. + --> /home/rolv/repos/languagetool-rust/tests/sample_files/example.typ:7:44 + | +7 | ...ow they contribute to the formation and behaviour of these natural structures. + The cli... + | ^^^^^^^^^ Possible spelling mistake + | --------- help: behavior + | +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> /home/rolv/repos/languagetool-rust/tests/sample_files/example.typ:9:44 + | +9 | ...behaviour of these natural structures. + The climate - Temperatre - Precipitation + The topography + The... + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Possible spelling mistake + | ----------------------------- help: Temperature, Temperate + | From 11de4b203f3ef83d38ad5f9f1e30806d057901e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9rome=20Eertmans?= Date: Wed, 12 Mar 2025 12:31:27 +0100 Subject: [PATCH 08/25] remove dup --- src/api/server.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/api/server.rs b/src/api/server.rs index 3c0643b..4bb56ff 100644 --- a/src/api/server.rs +++ b/src/api/server.rs @@ -420,12 +420,6 @@ impl ServerClient { requests: Vec>, ) -> Result> { use std::borrow::Cow; - - if requests.is_empty() { - return Err(Error::InvalidRequest( - "no request; cannot join zero request".to_string(), - )); - } if requests.is_empty() { return Err(Error::InvalidRequest( From 4fa1244584765fe41ad93b04935b08b7452ab543 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9rome=20Eertmans?= Date: Wed, 12 Mar 2025 12:52:41 +0100 Subject: [PATCH 09/25] fix(tests): go for `lazy_static` --- Cargo.lock | 1 + Cargo.toml | 3 ++- tests/cli.rs | 9 +++++---- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 347ab40..423a544 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1116,6 +1116,7 @@ dependencies = [ "futures", "insta", "is-terminal", + "lazy_static", "lifetime", "log", "predicates", diff --git a/Cargo.toml b/Cargo.toml index 027df03..3e5f825 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,6 +44,7 @@ codspeed-criterion-compat = "2.7.0" criterion = "0.5" futures = "0.3" insta = {version = "1.41.1", features = ["yaml"]} +lazy_static = "1.5.0" predicates = "3.0.3" tempfile = "3.5.0" tokio = {version = "^1.0", features = ["macros"]} @@ -95,7 +96,7 @@ license = "MIT" name = "languagetool-rust" readme = "README.md" repository = "https://github.com/jeertmans/languagetool-rust" -rust-version = "1.80.0" +rust-version = "1.77.2" version = "2.1.4" [package.metadata.release] diff --git a/tests/cli.rs b/tests/cli.rs index 863f026..6d45368 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -1,4 +1,4 @@ -use std::{path::PathBuf, sync::LazyLock}; +use std::path::PathBuf; use assert_cmd::Command; use predicates::{ @@ -6,9 +6,10 @@ use predicates::{ str::{contains, is_empty, is_match}, }; -static PATH_ROOT: LazyLock = LazyLock::new(|| PathBuf::from(env!("CARGO_MANIFEST_DIR"))); -static PATH_SAMPLE_FILES: LazyLock = - LazyLock::new(|| PATH_ROOT.join("tests").join("sample_files")); +lazy_static::lazy_static! { + static ref PATH_ROOT: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + static ref PATH_SAMPLE_FILES: PathBuf = PATH_ROOT.join("tests").join("sample_files"); +} #[test] fn test_basic_check_text() { From 44b10d2446f86ed64b3758146ee1a2afa8bdca92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9rome=20Eertmans?= Date: Wed, 12 Mar 2025 12:53:22 +0100 Subject: [PATCH 10/25] fmt --- src/api/server.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/api/server.rs b/src/api/server.rs index 4bb56ff..cb63393 100644 --- a/src/api/server.rs +++ b/src/api/server.rs @@ -444,7 +444,7 @@ impl ServerClient { Result::<(Cow<'static, str>, Response)>::Ok((text, response)) }) }); - + let mut response_with_context: Option = None; for task in tasks { From 5b95ba4a0a8ee4d8ab20844e24f6c5136d238c0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9rome=20Eertmans?= Date: Wed, 12 Mar 2025 12:57:18 +0100 Subject: [PATCH 11/25] setup MSRV to minimal --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 3e5f825..3c8b6af 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,7 +96,7 @@ license = "MIT" name = "languagetool-rust" readme = "README.md" repository = "https://github.com/jeertmans/languagetool-rust" -rust-version = "1.77.2" +rust-version = "1.77.0" version = "2.1.4" [package.metadata.release] From 3c560b936ddd4fa32738fa8129e082a44b59cd34 Mon Sep 17 00:00:00 2001 From: rolv Date: Wed, 12 Mar 2025 22:16:34 +0000 Subject: [PATCH 12/25] fix(tests): filter out paths from snapshots --- Cargo.lock | 2 +- Cargo.toml | 6 +++++- tests/cli.rs | 18 +++++++++++++++--- tests/snapshots/cli__autodetect_html_file.snap | 6 +++--- .../cli__autodetect_markdown_file.snap | 16 ++++++++-------- .../snapshots/cli__autodetect_typst_file.snap | 4 ++-- 6 files changed, 34 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 423a544..bacbb34 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1047,7 +1047,7 @@ dependencies = [ "linked-hash-map", "once_cell", "pin-project", - "serde", + "regex", "similar", ] diff --git a/Cargo.toml b/Cargo.toml index 3c8b6af..1def06d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,7 +43,7 @@ assert_matches = "1.5.0" codspeed-criterion-compat = "2.7.0" criterion = "0.5" futures = "0.3" -insta = {version = "1.41.1", features = ["yaml"]} +insta = {version = "1.41.1", features = ["filters"]} lazy_static = "1.5.0" predicates = "3.0.3" tempfile = "3.5.0" @@ -120,3 +120,7 @@ required-features = ["cli"] [[test]] name = "match-positions" path = "tests/match_positions.rs" + +[profile.dev.package] +insta.opt-level = 3 +similar.opt-level = 3 diff --git a/tests/cli.rs b/tests/cli.rs index 6d45368..288ca8f 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -11,6 +11,18 @@ lazy_static::lazy_static! { static ref PATH_SAMPLE_FILES: PathBuf = PATH_ROOT.join("tests").join("sample_files"); } +const PATH_FILTERS: [(&str, &str); 1] = [(r" --> .*[\/].*\n", " --> [path]\n")]; +macro_rules! assert_snapshot { + ($label: expr, $snap: expr) => { + insta::with_settings!({filters => PATH_FILTERS}, { + insta::assert_snapshot!( + $label, + $snap + ); + }); + }; +} + #[test] fn test_basic_check_text() { let mut cmd = Command::cargo_bin("ltrs").unwrap(); @@ -575,7 +587,7 @@ fn test_check_file_typst() { .arg(PATH_SAMPLE_FILES.join("example.typ")) .output() .unwrap(); - insta::assert_snapshot!( + assert_snapshot!( "autodetect_typst_file", String::from_utf8(output.stdout).unwrap() ); @@ -589,7 +601,7 @@ fn test_check_file_html() { .arg(PATH_SAMPLE_FILES.join("example.html")) .output() .unwrap(); - insta::assert_snapshot!( + assert_snapshot!( "autodetect_html_file", String::from_utf8(output.stdout).unwrap() ); @@ -603,7 +615,7 @@ fn test_check_file_markdown() { .arg(PATH_ROOT.join("README.md")) .output() .unwrap(); - insta::assert_snapshot!( + assert_snapshot!( "autodetect_markdown_file", String::from_utf8(output.stdout).unwrap() ); diff --git a/tests/snapshots/cli__autodetect_html_file.snap b/tests/snapshots/cli__autodetect_html_file.snap index 6731a66..33fc1bd 100644 --- a/tests/snapshots/cli__autodetect_html_file.snap +++ b/tests/snapshots/cli__autodetect_html_file.snap @@ -3,21 +3,21 @@ source: tests/cli.rs expression: "String::from_utf8(output.stdout).unwrap()" --- error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. - --> /home/rolv/repos/languagetool-rust/tests/sample_files/example.html:4:38 + --> [path] | 4 | /home/rolv/repos/languagetool-rust/tests/sample_files/example.html:5:44 + --> [path] | 5 | ...ask
Test an incorect spling
... | ^^^^^^^^ Possible spelling mistake | -------- help: incorrect | error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. - --> /home/rolv/repos/languagetool-rust/tests/sample_files/example.html:5:44 + --> [path] | 5 | ...>
Test an incorect spling
... | ^^^^^^ Possible spelling mistake diff --git a/tests/snapshots/cli__autodetect_markdown_file.snap b/tests/snapshots/cli__autodetect_markdown_file.snap index 6300d2e..1859f59 100644 --- a/tests/snapshots/cli__autodetect_markdown_file.snap +++ b/tests/snapshots/cli__autodetect_markdown_file.snap @@ -3,56 +3,56 @@ source: tests/cli.rs expression: "String::from_utf8(output.stdout).unwrap()" --- error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. - --> /home/rolv/repos/languagetool-rust/README.md:5:44 + --> [path] | 5 | ...piling LTRS. #### Default Features - - **cli**: Adds command-line related methods for ... | ^^^^^^^ Possible spelling mistake | ------- help: CLI, Clip, CGI, CPI, CSI, ... (1504 not shown) | error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. - --> /home/rolv/repos/languagetool-rust/README.md:9:44 + --> [path] | 9 | ...ate**, **color**, **multithreaded**. - **native-tls**: Enables TLS functionality provided by ... | ^^^^^^^^^^^^^^ Possible spelling mistake | -------------- help: natives, natively, nativists, nativeness, naivetes | error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. - --> /home/rolv/repos/languagetool-rust/README.md:3:44 + --> [path] | 3 | ...annotate results from check request. - **cli-complete**: Adds commands to generate completion f... | ^^^^^^^^^^^^^^^^ Possible spelling mistake | ---------------- help: incomplete | error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. - --> /home/rolv/repos/languagetool-rust/README.md:6:44 + --> [path] | 6 | ...shells. This feature also activates the **cli** feature. Enter ltrs completions --help ... | ^^^^^^^ Possible spelling mistake | ------- help: CLI, clip, CGI, CPI, CSI, ... (1541 not shown) | error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. - --> /home/rolv/repos/languagetool-rust/README.md:10:44 + --> [path] | 10 | ...es color outputting in the terminal. If **cli** feature is also enabled, the --color= /home/rolv/repos/languagetool-rust/README.md:13:44 + --> [path] | 13 | ...d**: Enables multithreaded requests. - **native-tls-vendored**: Enables the vendored feature of native... | ^^^^^^^^^^^^^^^^^^^^^^^ Possible spelling mistake | ----------------------- help: native-TLS-vendored | error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. - --> /home/rolv/repos/languagetool-rust/README.md:14:44 + --> [path] | 14 | ...u are planning to use HTTPS servers. - **undoc**: Adds more fields to JSON responses tha... | ^^^^^^^^^ Possible spelling mistake | --------- help: undo, undock, undos | error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. - --> /home/rolv/repos/languagetool-rust/README.md:19:44 + --> [path] | 19 | ...cases. All added fields are stored in a hashmap as JSON values. ## Related Projects ... | ^^^^^^^ Possible spelling mistake diff --git a/tests/snapshots/cli__autodetect_typst_file.snap b/tests/snapshots/cli__autodetect_typst_file.snap index 323e9f9..3cb1f44 100644 --- a/tests/snapshots/cli__autodetect_typst_file.snap +++ b/tests/snapshots/cli__autodetect_typst_file.snap @@ -3,14 +3,14 @@ source: tests/cli.rs expression: "String::from_utf8(output.stdout).unwrap()" --- error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake. ‘behaviour’ is British English. - --> /home/rolv/repos/languagetool-rust/tests/sample_files/example.typ:7:44 + --> [path] | 7 | ...ow they contribute to the formation and behaviour of these natural structures. + The cli... | ^^^^^^^^^ Possible spelling mistake | --------- help: behavior | error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. - --> /home/rolv/repos/languagetool-rust/tests/sample_files/example.typ:9:44 + --> [path] | 9 | ...behaviour of these natural structures. + The climate - Temperatre - Precipitation + The topography + The... | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Possible spelling mistake From 07235cd65fdabb3fbbb86759c65685fb6ac948a2 Mon Sep 17 00:00:00 2001 From: rolv Date: Wed, 12 Mar 2025 22:30:50 +0000 Subject: [PATCH 13/25] refactor(tests): use util function for building cmd --- tests/cli.rs | 146 ++++++++++++++++++++------------------------------- 1 file changed, 56 insertions(+), 90 deletions(-) diff --git a/tests/cli.rs b/tests/cli.rs index 288ca8f..ba2b68c 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -23,10 +23,13 @@ macro_rules! assert_snapshot { }; } +fn get_cmd() -> Command { + Command::cargo_bin("ltrs").unwrap() +} + #[test] fn test_basic_check_text() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -36,8 +39,11 @@ fn test_basic_check_text() { #[test] fn test_basic_check_no_errors() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd.arg("check").arg("-t").arg("\"I am a star.\"").assert(); + let assert = get_cmd() + .arg("check") + .arg("-t") + .arg("\"I am a star.\"") + .assert(); assert .success() .stdout(contains("No errors were found in provided text")); @@ -45,8 +51,7 @@ fn test_basic_check_no_errors() { #[test] fn test_basic_check_empty_text() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd.arg("check").arg("--text=").assert(); + let assert = get_cmd().arg("check").arg("--text=").assert(); assert .success() .stderr(is_match(r".*WARN.* No input text was provided, skipping.").unwrap()); @@ -54,8 +59,7 @@ fn test_basic_check_empty_text() { #[test] fn test_basic_check_data() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-d") .arg( @@ -68,8 +72,7 @@ fn test_basic_check_data() { #[test] fn test_basic_check_wrong_data_1() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-d") .arg("\"some text that is given as text\"") @@ -79,15 +82,13 @@ fn test_basic_check_wrong_data_1() { #[test] fn test_basic_check_wrong_data_2() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd.arg("check").arg("-d").arg("\"{}\"").assert(); + let assert = get_cmd().arg("check").arg("-d").arg("\"{}\"").assert(); assert.failure().stderr(contains("invalid value")); } #[test] fn test_basic_check_wrong_data_3() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-d") .arg("\"some text that is given as text\"") @@ -97,8 +98,7 @@ fn test_basic_check_wrong_data_3() { #[test] fn test_basic_check_piped() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .write_stdin("some text that is written to stdin") .assert(); @@ -107,8 +107,7 @@ fn test_basic_check_piped() { #[test] fn test_basic_check_stdin_verbose() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-v") .arg("-l") @@ -126,8 +125,10 @@ fn test_basic_check_file() { let mut file = tempfile::NamedTempFile::new().unwrap(); writeln!(file, "Some text with a error inside.").unwrap(); - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd.arg("check").arg(file.path().to_str().unwrap()).assert(); + let assert = get_cmd() + .arg("check") + .arg(file.path().to_str().unwrap()) + .assert(); assert.success(); } @@ -141,8 +142,7 @@ fn test_basic_check_files() { let mut file2 = tempfile::NamedTempFile::new().unwrap(); writeln!(file2, "Another text with an eror.").unwrap(); - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg(file1.path().to_str().unwrap()) .arg(file2.path().to_str().unwrap()) @@ -159,8 +159,7 @@ fn test_basic_check_files_with_empty_file() { let file2 = tempfile::NamedTempFile::new().unwrap(); - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-v") .arg(file1.path().to_str().unwrap()) @@ -173,8 +172,7 @@ fn test_basic_check_files_with_empty_file() { #[test] fn test_basic_check_unexisting_file() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("some_file_path_that_should_not_exist.txt") .assert(); @@ -183,8 +181,7 @@ fn test_basic_check_unexisting_file() { #[test] fn test_check_with_language() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -196,8 +193,7 @@ fn test_check_with_language() { #[test] fn test_check_with_wrong_language() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -209,8 +205,7 @@ fn test_check_with_wrong_language() { #[test] fn test_check_with_unexisting_language() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -225,8 +220,7 @@ fn test_check_with_unexisting_language() { #[test] fn test_check_with_username_and_key() { // TODO: remove the "invalid request" predicate as of LT 6.0 - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -243,8 +237,7 @@ fn test_check_with_username_and_key() { #[test] fn test_check_with_username_only() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -258,8 +251,7 @@ fn test_check_with_username_only() { #[test] fn test_check_with_key_only() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -273,8 +265,7 @@ fn test_check_with_key_only() { #[test] fn test_check_with_dict() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -286,8 +277,7 @@ fn test_check_with_dict() { #[test] fn test_check_with_dicts() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -299,8 +289,7 @@ fn test_check_with_dicts() { #[test] fn test_check_with_preferred_variant() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -312,8 +301,7 @@ fn test_check_with_preferred_variant() { #[test] fn test_check_with_preferred_variants() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -325,8 +313,7 @@ fn test_check_with_preferred_variants() { #[test] fn test_check_with_language_and_preferred_variant() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -343,8 +330,7 @@ fn test_check_with_language_and_preferred_variant() { #[test] fn test_check_with_enabled_rule() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -356,8 +342,7 @@ fn test_check_with_enabled_rule() { #[test] fn test_check_with_enabled_rules() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -369,8 +354,7 @@ fn test_check_with_enabled_rules() { #[test] fn test_check_with_disabled_rule() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -382,8 +366,7 @@ fn test_check_with_disabled_rule() { #[test] fn test_check_with_disabled_rules() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -395,8 +378,7 @@ fn test_check_with_disabled_rules() { #[test] fn test_check_with_enabled_category() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -408,8 +390,7 @@ fn test_check_with_enabled_category() { #[test] fn test_check_with_enabled_categories() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -421,8 +402,7 @@ fn test_check_with_enabled_categories() { #[test] fn test_check_with_disabled_category() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -434,8 +414,7 @@ fn test_check_with_disabled_category() { #[test] fn test_check_with_disabled_categories() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -447,8 +426,7 @@ fn test_check_with_disabled_categories() { #[test] fn test_check_with_enabled_only_rule() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -461,8 +439,7 @@ fn test_check_with_enabled_only_rule() { #[test] fn test_check_with_enabled_only_category() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -475,8 +452,7 @@ fn test_check_with_enabled_only_category() { #[test] fn test_check_with_enabled_only_without_enabled() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -487,8 +463,7 @@ fn test_check_with_enabled_only_without_enabled() { #[test] fn test_check_with_picky_level() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -500,8 +475,7 @@ fn test_check_with_picky_level() { #[test] fn test_check_with_unexisting_level() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -513,23 +487,20 @@ fn test_check_with_unexisting_level() { #[test] fn test_languages() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd.arg("languages").assert(); + let assert = get_cmd().arg("languages").assert(); assert.success(); } #[test] fn test_ping() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd.arg("ping").assert(); + let assert = get_cmd().arg("ping").assert(); assert.success().stdout(contains("PONG! Delay: ")); } #[test] fn test_words() { // TODO: remove the "invalid request" predicate as of LT 6.0 - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("words") .arg("--username") .arg("user") @@ -545,8 +516,7 @@ fn test_words() { #[test] fn test_words_add() { // TODO: remove the "invalid request" predicate as of LT 6.0 - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("words") .arg("add") .arg("--username") @@ -563,8 +533,7 @@ fn test_words_add() { #[test] fn test_words_delete() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("words") .arg("delete") .arg("--username") @@ -581,8 +550,7 @@ fn test_words_delete() { #[test] fn test_check_file_typst() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let output = cmd + let output = get_cmd() .arg("check") .arg(PATH_SAMPLE_FILES.join("example.typ")) .output() @@ -595,8 +563,7 @@ fn test_check_file_typst() { #[test] fn test_check_file_html() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let output = cmd + let output = get_cmd() .arg("check") .arg(PATH_SAMPLE_FILES.join("example.html")) .output() @@ -609,8 +576,7 @@ fn test_check_file_html() { #[test] fn test_check_file_markdown() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let output = cmd + let output = get_cmd() .arg("check") .arg(PATH_ROOT.join("README.md")) .output() From 83494051c961a2bc12b4ad560c27ffd47370dc75 Mon Sep 17 00:00:00 2001 From: rolv Date: Wed, 12 Mar 2025 23:12:31 +0000 Subject: [PATCH 14/25] fix(tests): adjust snapshots to results from running against docker-hosted LanguageTool API --- tests/snapshots/cli__autodetect_html_file.snap | 2 +- tests/snapshots/cli__autodetect_markdown_file.snap | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/snapshots/cli__autodetect_html_file.snap b/tests/snapshots/cli__autodetect_html_file.snap index 33fc1bd..43fb0bb 100644 --- a/tests/snapshots/cli__autodetect_html_file.snap +++ b/tests/snapshots/cli__autodetect_html_file.snap @@ -7,7 +7,7 @@ error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. | 4 | [path] diff --git a/tests/snapshots/cli__autodetect_markdown_file.snap b/tests/snapshots/cli__autodetect_markdown_file.snap index 1859f59..b80961f 100644 --- a/tests/snapshots/cli__autodetect_markdown_file.snap +++ b/tests/snapshots/cli__autodetect_markdown_file.snap @@ -7,14 +7,14 @@ error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. | 5 | ...piling LTRS. #### Default Features - - **cli**: Adds command-line related methods for ... | ^^^^^^^ Possible spelling mistake - | ------- help: CLI, Clip, CGI, CPI, CSI, ... (1504 not shown) + | ------- help: CLI, Clip, CGI, CPI, CSI, ... (1428 not shown) | error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. --> [path] | 9 | ...ate**, **color**, **multithreaded**. - **native-tls**: Enables TLS functionality provided by ... | ^^^^^^^^^^^^^^ Possible spelling mistake - | -------------- help: natives, natively, nativists, nativeness, naivetes + | -------------- help: natives, natively, nativists | error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. --> [path] @@ -28,14 +28,14 @@ error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. | 6 | ...shells. This feature also activates the **cli** feature. Enter ltrs completions --help ... | ^^^^^^^ Possible spelling mistake - | ------- help: CLI, clip, CGI, CPI, CSI, ... (1541 not shown) + | ------- help: CLI, clip, CGI, CPI, CSI, ... (1461 not shown) | error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. --> [path] | 10 | ...es color outputting in the terminal. If **cli** feature is also enabled, the --color= [path] From 64078cac56dc44f931e1befe7a8de17f3073d9ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9rome=20Eertmans?= Date: Thu, 13 Mar 2025 11:31:52 +0100 Subject: [PATCH 15/25] chore(tests): only test snapshots on latest --- .github/workflows/rustlib.yml | 6 +++++- Cargo.toml | 3 +++ src/error.rs | 7 ++----- tests/cli.rs | 3 +++ 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/workflows/rustlib.yml b/.github/workflows/rustlib.yml index e44e52d..c140f0b 100644 --- a/.github/workflows/rustlib.yml +++ b/.github/workflows/rustlib.yml @@ -31,6 +31,10 @@ jobs: fail-fast: false matrix: tag: [latest, '5.5', '5.6', '5.7', '5.8', '5.9', '6.0', '6.1', '6.2', '6.3', '6.4', '6.5'] + features: [] + include: + - tag: latest + features: --all-features runs-on: ubuntu-latest services: languagetool: @@ -53,4 +57,4 @@ jobs: uses: Swatinem/rust-cache@v2 - run: curl -LsSf https://get.nexte.st/latest/linux | tar zxf - -C ${CARGO_HOME:-~/.cargo}/bin - - run: cargo nextest run --all-features --no-capture + - run: cargo nextest run ${{ matrix.features }} --no-capture diff --git a/Cargo.toml b/Cargo.toml index 1def06d..bf3355a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -82,6 +82,9 @@ pulldown-cmark = ["dep:pulldown-cmark"] typst = ["dep:typst-syntax"] unstable = [] +# Test features +snapshots = [] + [lib] name = "languagetool_rust" path = "src/lib.rs" diff --git a/src/error.rs b/src/error.rs index b1f258c..2af3644 100644 --- a/src/error.rs +++ b/src/error.rs @@ -125,10 +125,9 @@ mod tests { assert_matches!(error, Error::IO(_)); } - #[ignore] #[test] fn test_error_invalid_request() { - let result = std::fs::read_to_string(""); // TODO + let result = crate::api::check::Request::new().try_get_text(); assert!(result.is_err()); let error: Error = result.unwrap_err().into(); @@ -136,10 +135,9 @@ mod tests { assert_matches!(error, Error::InvalidRequest(_)); } - #[ignore] #[test] fn test_error_invalid_value() { - let result = std::fs::read_to_string(""); // TODO + let result = crate::api::server::parse_port("test"); assert!(result.is_err()); let error: Error = result.unwrap_err().into(); @@ -147,7 +145,6 @@ mod tests { assert_matches!(error, Error::InvalidValue(_)); } - #[ignore] #[tokio::test] async fn test_error_reqwest() { let result = reqwest::get("").await; diff --git a/tests/cli.rs b/tests/cli.rs index 3ed6101..3ca3e98 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -550,6 +550,7 @@ fn test_words_delete() { )); } +#[cfg_attr(not(feature = "snapshots"), ignore)] #[test] fn test_check_file_typst() { let output = get_cmd() @@ -563,6 +564,7 @@ fn test_check_file_typst() { ); } +#[cfg_attr(not(feature = "snapshots"), ignore)] #[test] fn test_check_file_html() { let output = get_cmd() @@ -576,6 +578,7 @@ fn test_check_file_html() { ); } +#[cfg_attr(not(feature = "snapshots"), ignore)] #[test] fn test_check_file_markdown() { let output = get_cmd() From 7209411ea5867b5e3bc56b916f27376853f96505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9rome=20Eertmans?= Date: Thu, 13 Mar 2025 11:33:18 +0100 Subject: [PATCH 16/25] chore(ci): fix when CI runs --- .github/workflows/rustdoc.yml | 2 +- .github/workflows/rustlib.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/rustdoc.yml b/.github/workflows/rustdoc.yml index 4264261..babfee1 100644 --- a/.github/workflows/rustdoc.yml +++ b/.github/workflows/rustdoc.yml @@ -1,7 +1,7 @@ on: pull_request: paths: - - src/lib/**.rs + - src/**.rs - Cargo.toml workflow_dispatch: diff --git a/.github/workflows/rustlib.yml b/.github/workflows/rustlib.yml index c140f0b..18d1580 100644 --- a/.github/workflows/rustlib.yml +++ b/.github/workflows/rustlib.yml @@ -1,7 +1,7 @@ on: pull_request: paths: - - src/lib/**.rs + - '**.rs' - .github/workflows/rustlib.yml - Cargo.toml workflow_dispatch: From 727ca6ab49cd38d8920d7d4cc84cf0c128b3e93b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9rome=20Eertmans?= Date: Thu, 13 Mar 2025 11:34:03 +0100 Subject: [PATCH 17/25] chore(tests): only test snapshots on latest chore(ci): fix when CI runs --- .github/workflows/rustdoc.yml | 2 +- .github/workflows/rustlib.yml | 8 ++++++-- Cargo.toml | 3 +++ src/error.rs | 7 ++----- tests/cli.rs | 3 +++ 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/.github/workflows/rustdoc.yml b/.github/workflows/rustdoc.yml index 4264261..babfee1 100644 --- a/.github/workflows/rustdoc.yml +++ b/.github/workflows/rustdoc.yml @@ -1,7 +1,7 @@ on: pull_request: paths: - - src/lib/**.rs + - src/**.rs - Cargo.toml workflow_dispatch: diff --git a/.github/workflows/rustlib.yml b/.github/workflows/rustlib.yml index e44e52d..18d1580 100644 --- a/.github/workflows/rustlib.yml +++ b/.github/workflows/rustlib.yml @@ -1,7 +1,7 @@ on: pull_request: paths: - - src/lib/**.rs + - '**.rs' - .github/workflows/rustlib.yml - Cargo.toml workflow_dispatch: @@ -31,6 +31,10 @@ jobs: fail-fast: false matrix: tag: [latest, '5.5', '5.6', '5.7', '5.8', '5.9', '6.0', '6.1', '6.2', '6.3', '6.4', '6.5'] + features: [] + include: + - tag: latest + features: --all-features runs-on: ubuntu-latest services: languagetool: @@ -53,4 +57,4 @@ jobs: uses: Swatinem/rust-cache@v2 - run: curl -LsSf https://get.nexte.st/latest/linux | tar zxf - -C ${CARGO_HOME:-~/.cargo}/bin - - run: cargo nextest run --all-features --no-capture + - run: cargo nextest run ${{ matrix.features }} --no-capture diff --git a/Cargo.toml b/Cargo.toml index 1def06d..bf3355a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -82,6 +82,9 @@ pulldown-cmark = ["dep:pulldown-cmark"] typst = ["dep:typst-syntax"] unstable = [] +# Test features +snapshots = [] + [lib] name = "languagetool_rust" path = "src/lib.rs" diff --git a/src/error.rs b/src/error.rs index b1f258c..2af3644 100644 --- a/src/error.rs +++ b/src/error.rs @@ -125,10 +125,9 @@ mod tests { assert_matches!(error, Error::IO(_)); } - #[ignore] #[test] fn test_error_invalid_request() { - let result = std::fs::read_to_string(""); // TODO + let result = crate::api::check::Request::new().try_get_text(); assert!(result.is_err()); let error: Error = result.unwrap_err().into(); @@ -136,10 +135,9 @@ mod tests { assert_matches!(error, Error::InvalidRequest(_)); } - #[ignore] #[test] fn test_error_invalid_value() { - let result = std::fs::read_to_string(""); // TODO + let result = crate::api::server::parse_port("test"); assert!(result.is_err()); let error: Error = result.unwrap_err().into(); @@ -147,7 +145,6 @@ mod tests { assert_matches!(error, Error::InvalidValue(_)); } - #[ignore] #[tokio::test] async fn test_error_reqwest() { let result = reqwest::get("").await; diff --git a/tests/cli.rs b/tests/cli.rs index 3ed6101..3ca3e98 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -550,6 +550,7 @@ fn test_words_delete() { )); } +#[cfg_attr(not(feature = "snapshots"), ignore)] #[test] fn test_check_file_typst() { let output = get_cmd() @@ -563,6 +564,7 @@ fn test_check_file_typst() { ); } +#[cfg_attr(not(feature = "snapshots"), ignore)] #[test] fn test_check_file_html() { let output = get_cmd() @@ -576,6 +578,7 @@ fn test_check_file_html() { ); } +#[cfg_attr(not(feature = "snapshots"), ignore)] #[test] fn test_check_file_markdown() { let output = get_cmd() From 75a6a0351cf775245b74475341bd6b71f752b236 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9rome=20Eertmans?= Date: Thu, 13 Mar 2025 11:38:06 +0100 Subject: [PATCH 18/25] fmt --- CHANGELOG.md | 2 +- Cargo.toml | 26 ++++++++++++-------------- RELEASE-PROCESS.md | 2 ++ tests/sample_files/example.typ | 1 - 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 36aff8f..921f3c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -247,7 +247,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix compilation error in `src/lib/error.rs` when `feature = "cli"` was not set. - Remove unused print in `src/lib/server.rs`. -> **_NOTE:_** Pre v1.0.0, the changelog was generated using the `cliff` tool that is based on commits. +> ***NOTE:*** Pre v1.0.0, the changelog was generated using the `cliff` tool that is based on commits. ## [0.0.18](https://github.com/jeertmans/languagetool-rust/compare/v0.0.17...v0.0.18) - 2022-06-22 diff --git a/Cargo.toml b/Cargo.toml index bf3355a..81078a2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,12 +79,10 @@ native-tls = ["reqwest/native-tls"] native-tls-vendored = ["reqwest/native-tls-vendored"] pretty_env_logger = ["dep:pretty_env_logger"] pulldown-cmark = ["dep:pulldown-cmark"] +snapshots = [] # Only for testing typst = ["dep:typst-syntax"] unstable = [] -# Test features -snapshots = [] - [lib] name = "languagetool_rust" path = "src/lib.rs" @@ -102,18 +100,22 @@ repository = "https://github.com/jeertmans/languagetool-rust" rust-version = "1.77.0" version = "2.1.4" +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + [package.metadata.release] pre-release-replacements = [ - {file="CHANGELOG.md", search="Unreleased", replace="{{version}}", min=1}, - {file="CHANGELOG.md", search="\\.\\.\\.HEAD\\)", replace="...{{version}}) {{date}}", exactly=1}, - {file="CHANGELOG.md", search="", replace="\n\n## [Unreleased](https://github.com/jeertmans/languagetool-rust/compare/v{{version}}...HEAD)", exactly=1}, + {file = "CHANGELOG.md", search = "Unreleased", replace = "{{version}}", min = 1}, + {file = "CHANGELOG.md", search = "\\.\\.\\.HEAD\\)", replace = "...{{version}}) {{date}}", exactly = 1}, + {file = "CHANGELOG.md", search = "", replace = "\n\n## [Unreleased](https://github.com/jeertmans/languagetool-rust/compare/v{{version}}...HEAD)", exactly = 1}, ] -tag = false publish = false +tag = false -[package.metadata.docs.rs] -all-features = true -rustdoc-args = ["--cfg", "docsrs"] +[profile.dev.package] +insta.opt-level = 3 +similar.opt-level = 3 [[test]] name = "cli" @@ -123,7 +125,3 @@ required-features = ["cli"] [[test]] name = "match-positions" path = "tests/match_positions.rs" - -[profile.dev.package] -insta.opt-level = 3 -similar.opt-level = 3 diff --git a/RELEASE-PROCESS.md b/RELEASE-PROCESS.md index 87f21fc..45d3fa9 100644 --- a/RELEASE-PROCESS.md +++ b/RELEASE-PROCESS.md @@ -9,9 +9,11 @@ Note that, by default, every command runs in *dry mode*, and you need to append to actually perform the action. Here are the the following steps to install `cargo-release`: + ```bash cargo install cargo-release ``` + Here are the following steps to release a new version: 1. create a branch `release-x.y.z` from the main branch; diff --git a/tests/sample_files/example.typ b/tests/sample_files/example.typ index a83a174..8aae2ad 100644 --- a/tests/sample_files/example.typ +++ b/tests/sample_files/example.typ @@ -145,4 +145,3 @@ The first #count numbers of the sequence are: ..nums.map(n => $F_#n$), ..nums.map(n => str(fib(n))), )) - From c0dea8c851076babba7e4f5a7c4f6031f08fcd23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9rome=20Eertmans?= Date: Thu, 13 Mar 2025 11:39:54 +0100 Subject: [PATCH 19/25] fix(ci): add dummy value --- .github/workflows/rustlib.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rustlib.yml b/.github/workflows/rustlib.yml index 18d1580..89b3643 100644 --- a/.github/workflows/rustlib.yml +++ b/.github/workflows/rustlib.yml @@ -31,7 +31,7 @@ jobs: fail-fast: false matrix: tag: [latest, '5.5', '5.6', '5.7', '5.8', '5.9', '6.0', '6.1', '6.2', '6.3', '6.4', '6.5'] - features: [] + features: [''] include: - tag: latest features: --all-features From d7a81c7b0dd478c90ea873710a87fba1454384b1 Mon Sep 17 00:00:00 2001 From: rolv Date: Mon, 17 Mar 2025 10:11:53 +0000 Subject: [PATCH 20/25] fix(ci): clear artifacts after each run of `cargo hack` to avoid exhausting all storage on the CI container --- .github/workflows/rustcheck.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rustcheck.yml b/.github/workflows/rustcheck.yml index d50fd8d..d0c2786 100644 --- a/.github/workflows/rustcheck.yml +++ b/.github/workflows/rustcheck.yml @@ -13,4 +13,4 @@ jobs: steps: - uses: actions/checkout@v3 - uses: taiki-e/install-action@cargo-hack - - run: cargo hack check --feature-powerset --no-dev-deps + - run: cargo hack check --feature-powerset --no-dev-deps --clean-per-run From de79ac7ff5db09cebac1bfb3d3bb77760efb0dd1 Mon Sep 17 00:00:00 2001 From: rolv Date: Mon, 17 Mar 2025 11:35:01 +0000 Subject: [PATCH 21/25] refactor: adjust `cli` feature combination --- Cargo.toml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 81078a2..dba346f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -54,17 +54,17 @@ annotate = ["dep:annotate-snippets"] clap-verbosity-flag = ["dep:clap-verbosity-flag"] cli = [ "annotate", + "clap-verbosity-flag", "color", "dep:clap", - "dep:clap-verbosity-flag", "dep:enum_dispatch", "dep:is-terminal", - "dep:log", - "dep:pretty_env_logger", + "html", + "log", + "markdown", "multithreaded", + "pretty_env_logger", "typst", - "markdown", - "html", ] cli-complete = ["cli", "clap_complete"] color = ["annotate-snippets?/color", "dep:termcolor"] @@ -78,7 +78,6 @@ multithreaded = ["dep:tokio"] native-tls = ["reqwest/native-tls"] native-tls-vendored = ["reqwest/native-tls-vendored"] pretty_env_logger = ["dep:pretty_env_logger"] -pulldown-cmark = ["dep:pulldown-cmark"] snapshots = [] # Only for testing typst = ["dep:typst-syntax"] unstable = [] From 29b0ce893f40f0c7a3804529b01c67b10514b128 Mon Sep 17 00:00:00 2001 From: rolv Date: Mon, 17 Mar 2025 11:35:29 +0000 Subject: [PATCH 22/25] fix(ci): significantly reduce `cargo hack` runtime by reducing possible combinations --- .github/workflows/rustcheck.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/rustcheck.yml b/.github/workflows/rustcheck.yml index d0c2786..94eba66 100644 --- a/.github/workflows/rustcheck.yml +++ b/.github/workflows/rustcheck.yml @@ -13,4 +13,13 @@ jobs: steps: - uses: actions/checkout@v3 - uses: taiki-e/install-action@cargo-hack - - run: cargo hack check --feature-powerset --no-dev-deps --clean-per-run + - run: > + cargo hack check + --feature-powerset + --no-dev-deps + --clean-per-run + --group-features cli,log,pretty_env_logger,color,clap-verbosity-flag + --group-features cli-complete,docker + --group-features typst,html,markdown + --mutually-exclusive-features native-tls,native-tls-vendored + --exclude-features snapshots From 413d99d7f6aabcf0dd9d885825283c25664ab1b5 Mon Sep 17 00:00:00 2001 From: rolv Date: Tue, 18 Mar 2025 21:48:58 +0000 Subject: [PATCH 23/25] refactor: use plain markup instead of interpreted markup --- src/parsers/markdown.rs | 15 ++++++++++++--- src/parsers/typst.rs | 14 ++------------ 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/parsers/markdown.rs b/src/parsers/markdown.rs index d3686c7..88ddd61 100644 --- a/src/parsers/markdown.rs +++ b/src/parsers/markdown.rs @@ -93,6 +93,15 @@ pub fn parse_markdown(file_content: &str) -> Data<'_> { .push(DataAnnotation::new_interpreted_markup(format!("~{s}~"), s)) }, + Tag::Link { + title, dest_url, .. + } => { + annotations.push(DataAnnotation::new_interpreted_markup( + format!("[{title}]({dest_url})"), + title.to_string(), + )); + }, + // No changes necessary Tag::Paragraph | Tag::List(_) @@ -102,9 +111,9 @@ pub fn parse_markdown(file_content: &str) -> Data<'_> { annotations.push(DataAnnotation::new_text(s)); }, - // Ignored - Tag::CodeBlock(_) | Tag::Link { .. } | Tag::Image { .. } => { - annotations.push(DataAnnotation::new_interpreted_markup(s, IGNORE)); + // Just markup + Tag::CodeBlock(_) | Tag::Image { .. } => { + annotations.push(DataAnnotation::new_markup(s)); }, _ => {}, } diff --git a/src/parsers/typst.rs b/src/parsers/typst.rs index 7267f6c..812bf24 100644 --- a/src/parsers/typst.rs +++ b/src/parsers/typst.rs @@ -1,10 +1,7 @@ //! Parse the contents of Typst files into a format parseable by the //! LanguageTool API. -use crate::{ - api::check::{Data, DataAnnotation}, - parsers::IGNORE, -}; +use crate::api::check::{Data, DataAnnotation}; /// Parse the contents of a Typst file into a text format to be sent to the /// LanguageTool API. @@ -47,14 +44,7 @@ pub fn parse_typst(file_content: impl AsRef) -> Data<'static> { } } - annotations.push(DataAnnotation::new_interpreted_markup( - markup, - // This pattern is ignored by LanguageTool, and allows us to avoid whitespace - // issues. The following sentence would give an error for - // repeated whitespace otherwise: This has ``` `backticks` - // ``` in it - IGNORE, - )); + annotations.push(DataAnnotation::new_markup(markup)); continue; }, // Markup with valid text interpretations From b58470fcd002936bd98f53dc78745846b006c667 Mon Sep 17 00:00:00 2001 From: rolv Date: Tue, 1 Apr 2025 22:20:34 +0100 Subject: [PATCH 24/25] accept snapshot changes for false positives --- .../cli__autodetect_markdown_file.snap | 51 ++++++++++++++++- .../snapshots/cli__autodetect_typst_file.snap | 56 +++++++++++++++++++ 2 files changed, 106 insertions(+), 1 deletion(-) diff --git a/tests/snapshots/cli__autodetect_markdown_file.snap b/tests/snapshots/cli__autodetect_markdown_file.snap index b80961f..56f083c 100644 --- a/tests/snapshots/cli__autodetect_markdown_file.snap +++ b/tests/snapshots/cli__autodetect_markdown_file.snap @@ -2,6 +2,41 @@ source: tests/cli.rs expression: "String::from_utf8(output.stdout).unwrap()" --- +error[COMMA_PARENTHESIS_WHITESPACE]: Don’t put a space before the full stop. + --> [path] + | +6 | ...ges and is free to use, more on that on_ [](https://languagetool.org/)_. __There is a public API (with a free tie... + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Use of whitespace before comma and before/after parentheses + | --------------------------------- help: . + | +error[WHITESPACE_RULE]: Possible typo: you repeated a whitespace + --> [path] + | +11 | ...rs very easily via Rust code! _Crates.io docs.rs codecov- - [](#about) - [](#cli-referenc... + | ^^^^^^^^^ Whitespace repetition (bad formatting) + | --------- help: + | +error[COMMA_PARENTHESIS_WHITESPACE]: Don’t put a space before the full stop. + --> [path] + | +13 | ...on. Installation guidelines can be found [](https://www.docker.com/get-started/). On Linux platforms, you might need to c... + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Use of whitespace before comma and before/after parentheses + | ----------------------------------------- help: . + | +error[COMMA_PARENTHESIS_WHITESPACE]: Don’t put a space before the full stop. + --> [path] + | +14 | ...vent the _sudo privilege issue_ by doing [](https://docs.docker.com/engine/install/linux-postinstall/). ## API Reference + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Use of whitespace before comma and before/after parentheses + | --------------------------------------------------------------- help: . + | +error[COMMA_PARENTHESIS_WHITESPACE]: Don’t put a space before the full stop. + --> [path] + | +5 | ... or crate, then we recommend reading the [](https://docs.rs/languagetool-rust). To use LanguageTool-Rust in your Rust p... + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Use of whitespace before comma and before/after parentheses + | --------------------------------------- help: . + | error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. --> [path] | @@ -51,10 +86,24 @@ error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. | ^^^^^^^^^ Possible spelling mistake | --------- help: undo, undock, undos | +error[THE_CC]: It appears that a noun is missing after “the”. + --> [path] + | +16 | ... JSON responses that are not present in the [](https://languagetool.org/http-api/sw... + | ^^^ the and + | --- + | +error[CONSECUTIVE_SPACES]: It seems like there are too many consecutive spaces here. + --> [path] + | +16 | ...ON responses that are not present in the [](https://languagetool.org/http-api/swagger-ui/#!/default/) but might be present in some cases. All ... + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Two consecutive spaces + | -------------------------------------------------------------- help: + | error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. --> [path] | -19 | ...cases. All added fields are stored in a hashmap as JSON values. ## Related Projects ... +20 | ...cases. All added fields are stored in a hashmap as JSON values. ## Related Projects ... | ^^^^^^^ Possible spelling mistake | ------- help: hash map | diff --git a/tests/snapshots/cli__autodetect_typst_file.snap b/tests/snapshots/cli__autodetect_typst_file.snap index 3cb1f44..f63887b 100644 --- a/tests/snapshots/cli__autodetect_typst_file.snap +++ b/tests/snapshots/cli__autodetect_typst_file.snap @@ -16,3 +16,59 @@ error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Possible spelling mistake | ----------------------------- help: Temperature, Temperate | +error[COMMA_PARENTHESIS_WHITESPACE]: Don’t put a space before the full stop. + --> [path] + | +8 | ...he glacier melting models established in @glacier-melt. bibliography("works.bib") The flow ra... + | ^^^^^^^^^^^^^^^ Use of whitespace before comma and before/after parentheses + | --------------- help: . + | +error[CONSECUTIVE_SPACES]: It seems like there are too many consecutive spaces here. + --> [path] + | +1 | = Code blocks Adding `rbx` to `rcx` gives the desired result. What... + | ^^^^^^^ Two consecutive spaces + | ------- help: + | +error[CONSECUTIVE_SPACES]: It seems like there are too many consecutive spaces here. + --> [path] + | +1 | = Code blocks Adding `rbx` to `rcx` gives the desired result. What is ```ru... + | ^^^^^^^ Two consecutive spaces + | ------- help: + | +error[TO_NON_BASE]: The verb after “to” should be in the base form as part of the to-infinitive. A verb can take many forms, but the base form is always used in the to-infinitive. + --> [path] + | +2 | = Code blocks Adding `rbx` to `rcx` gives the desired result. What is ```rust fn... + | ^^^^^ 'to' + non-base form + | ----- help: give + | +error[CONSECUTIVE_SPACES]: It seems like there are too many consecutive spaces here. + --> [path] + | +4 | ...`rcx` gives the desired result. What is ```rust fn main()``` in Rust would be ```c int main()``` in C... + | ^^^^^^^^^^^^^^^^^^^^^^ Two consecutive spaces + | ---------------------- help: + | +error[CONSECUTIVE_SPACES]: It seems like there are too many consecutive spaces here. + --> [path] + | +5 | ...is ```rust fn main()``` in Rust would be ```c int main()``` in C. ```rust fn main() { println!(... + | ^^^^^^^^^^^^^^^^^^^^ Two consecutive spaces + | -------------------- help: + | +error[CONSECUTIVE_SPACES]: It seems like there are too many consecutive spaces here. + --> [path] + | +8 | ...rintln!("Hello World!"); } ``` This has ``` `backticks` ``` in it (but the spaces are trimmed). And ... + | ^^^^^^^^^^^^^^^^^^^^^ Two consecutive spaces + | --------------------- help: + | +error[COMMA_PARENTHESIS_WHITESPACE]: Don’t put a space before the full stop. + --> [path] + | +18 | ... defined through the recurrence relation $F_n = F_(n-1) + F_(n-2)$. It can also be expressed in _closed for... + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ Use of whitespace before comma and before/after parentheses + | --------------------------- help: . + | From dc81d5cd7f917b73bbe09e2ba8d1d5e6c803345b Mon Sep 17 00:00:00 2001 From: rolv Date: Tue, 1 Apr 2025 22:37:10 +0100 Subject: [PATCH 25/25] fix: remove needless lifetime --- src/api/server.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/api/server.rs b/src/api/server.rs index cb63393..4706e63 100644 --- a/src/api/server.rs +++ b/src/api/server.rs @@ -467,9 +467,9 @@ impl ServerClient { /// If any of the requests has `self.text` or `self.data` field which is /// [`None`]. #[cfg(feature = "multithreaded")] - pub async fn check_multiple_and_join_without_context<'source>( + pub async fn check_multiple_and_join_without_context( &self, - requests: Vec>, + requests: Vec>, ) -> Result { let mut response: Option = None;