diff --git a/.github/workflows/rustcheck.yml b/.github/workflows/rustcheck.yml index d50fd8d..94eba66 100644 --- a/.github/workflows/rustcheck.yml +++ b/.github/workflows/rustcheck.yml @@ -13,4 +13,13 @@ jobs: steps: - uses: actions/checkout@v3 - uses: taiki-e/install-action@cargo-hack - - run: cargo hack check --feature-powerset --no-dev-deps + - run: > + cargo hack check + --feature-powerset + --no-dev-deps + --clean-per-run + --group-features cli,log,pretty_env_logger,color,clap-verbosity-flag + --group-features cli-complete,docker + --group-features typst,html,markdown + --mutually-exclusive-features native-tls,native-tls-vendored + --exclude-features snapshots diff --git a/.github/workflows/rustdoc.yml b/.github/workflows/rustdoc.yml index 4264261..babfee1 100644 --- a/.github/workflows/rustdoc.yml +++ b/.github/workflows/rustdoc.yml @@ -1,7 +1,7 @@ on: pull_request: paths: - - src/lib/**.rs + - src/**.rs - Cargo.toml workflow_dispatch: diff --git a/.github/workflows/rustlib.yml b/.github/workflows/rustlib.yml index e44e52d..89b3643 100644 --- a/.github/workflows/rustlib.yml +++ b/.github/workflows/rustlib.yml @@ -1,7 +1,7 @@ on: pull_request: paths: - - src/lib/**.rs + - '**.rs' - .github/workflows/rustlib.yml - Cargo.toml workflow_dispatch: @@ -31,6 +31,10 @@ jobs: fail-fast: false matrix: tag: [latest, '5.5', '5.6', '5.7', '5.8', '5.9', '6.0', '6.1', '6.2', '6.3', '6.4', '6.5'] + features: [''] + include: + - tag: latest + features: --all-features runs-on: ubuntu-latest services: languagetool: @@ -53,4 +57,4 @@ jobs: uses: Swatinem/rust-cache@v2 - run: curl -LsSf https://get.nexte.st/latest/linux | tar zxf - -C ${CARGO_HOME:-~/.cargo}/bin - - run: cargo nextest run --all-features --no-capture + - run: cargo nextest run ${{ matrix.features }} --no-capture diff --git a/CHANGELOG.md b/CHANGELOG.md index 36aff8f..921f3c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -247,7 +247,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix compilation error in `src/lib/error.rs` when `feature = "cli"` was not set. - Remove unused print in `src/lib/server.rs`. -> **_NOTE:_** Pre v1.0.0, the changelog was generated using the `cliff` tool that is based on commits. +> ***NOTE:*** Pre v1.0.0, the changelog was generated using the `cliff` tool that is based on commits. ## [0.0.18](https://github.com/jeertmans/languagetool-rust/compare/v0.0.17...v0.0.18) - 2022-06-22 diff --git a/Cargo.lock b/Cargo.lock index 3bcf389..bacbb34 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -153,15 +153,6 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - [[package]] name = "bstr" version = "1.11.3" @@ -179,6 +170,12 @@ version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" version = "1.10.0" @@ -331,6 +328,18 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "windows-sys 0.59.0", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -347,15 +356,6 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" -[[package]] -name = "cpufeatures" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" -dependencies = [ - "libc", -] - [[package]] name = "criterion" version = "0.5.1" @@ -424,31 +424,45 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" [[package]] -name = "crypto-common" -version = "0.1.6" +name = "cssparser" +version = "0.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3" dependencies = [ - "generic-array", - "typenum", + "cssparser-macros", + "dtoa-short", + "itoa", + "phf", + "smallvec", ] [[package]] -name = "difflib" -version = "0.4.0" +name = "cssparser-macros" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote", + "syn 2.0.98", +] [[package]] -name = "digest" -version = "0.10.7" +name = "derive_more" +version = "0.99.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +checksum = "3da29a38df43d6f156149c9b43ded5e018ddff2a855cf2cfd62e8cd7d079c69f" dependencies = [ - "block-buffer", - "crypto-common", + "proc-macro2", + "quote", + "syn 2.0.98", ] +[[package]] +name = "difflib" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" + [[package]] name = "displaydoc" version = "0.2.5" @@ -466,6 +480,21 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "dtoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6add3b8cff394282be81f3fc1a0605db594ed69890078ca6e2cab1c408bcf04" + +[[package]] +name = "dtoa-short" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" +dependencies = [ + "dtoa", +] + [[package]] name = "ecow" version = "0.2.3" @@ -475,12 +504,24 @@ dependencies = [ "serde", ] +[[package]] +name = "ego-tree" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8" + [[package]] name = "either" version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -576,6 +617,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures" version = "0.3.31" @@ -666,13 +717,12 @@ dependencies = [ ] [[package]] -name = "generic-array" -version = "0.14.7" +name = "fxhash" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" dependencies = [ - "typenum", - "version_check", + "byteorder", ] [[package]] @@ -750,18 +800,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" [[package]] -name = "html_parser" -version = "0.7.0" +name = "html5ever" +version = "0.29.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f56db07b6612644f6f7719f8ef944f75fff9d6378fdf3d316fd32194184abd" +checksum = "3b7410cae13cbc75623c98ac4cbfd1f0bedddf3227afc24f370cf0f50a44a11c" dependencies = [ - "doc-comment", - "pest", - "pest_derive", - "serde", - "serde_derive", - "serde_json", - "thiserror 1.0.69", + "log", + "mac", + "markup5ever", + "match_token", ] [[package]] @@ -990,6 +1037,20 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "insta" +version = "1.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50259abbaa67d11d2bcafc7ba1d094ed7a0c70e3ce893f0d0997f73558cb3084" +dependencies = [ + "console", + "linked-hash-map", + "once_cell", + "pin-project", + "regex", + "similar", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -1050,21 +1111,24 @@ dependencies = [ "clap_complete", "codspeed-criterion-compat", "criterion", + "ego-tree", "enum_dispatch", "futures", - "html_parser", + "insta", "is-terminal", + "lazy_static", "lifetime", "log", "predicates", "pretty_env_logger", "pulldown-cmark", "reqwest", + "scraper", "serde", "serde_json", "tempfile", "termcolor", - "thiserror 1.0.69", + "thiserror", "tokio", "typst-syntax", ] @@ -1111,6 +1175,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "linux-raw-sys" version = "0.4.15" @@ -1123,12 +1193,53 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7a7213d12e1864c0f002f52c2923d4556935a43dec5e71355c2760e0f6e7a18" +dependencies = [ + "log", + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "match_token" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + [[package]] name = "memchr" version = "2.7.4" @@ -1178,6 +1289,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "normalize-line-endings" version = "0.3.0" @@ -1268,6 +1385,29 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + [[package]] name = "percent-encoding" version = "2.3.1" @@ -1275,48 +1415,75 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] -name = "pest" -version = "2.7.15" +name = "phf" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b7cafe60d6cf8e62e1b9b2ea516a089c008945bb5a275416789e7db0bc199dc" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" dependencies = [ - "memchr", - "thiserror 2.0.11", - "ucd-trie", + "phf_macros", + "phf_shared", ] [[package]] -name = "pest_derive" -version = "2.7.15" +name = "phf_codegen" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "816518421cfc6887a0d62bf441b6ffb4536fcc926395a69e1a85852d4363f57e" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" dependencies = [ - "pest", - "pest_generator", + "phf_generator", + "phf_shared", ] [[package]] -name = "pest_generator" -version = "2.7.15" +name = "phf_generator" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d1396fd3a870fc7838768d171b4616d5c91f6cc25e377b673d714567d99377b" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ - "pest", - "pest_meta", + "phf_shared", + "rand", +] + +[[package]] +name = "phf_macros" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" +dependencies = [ + "phf_generator", + "phf_shared", "proc-macro2", "quote", "syn 2.0.98", ] [[package]] -name = "pest_meta" -version = "2.7.15" +name = "phf_shared" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1e58089ea25d717bfd31fb534e4f3afcc2cc569c70de3e239778991ea3b7dea" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" dependencies = [ - "once_cell", - "pest", - "sha2", + "siphasher", +] + +[[package]] +name = "pin-project" +version = "1.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", ] [[package]] @@ -1371,6 +1538,12 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "predicates" version = "3.1.3" @@ -1448,6 +1621,21 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" + [[package]] name = "rayon" version = "1.10.0" @@ -1468,6 +1656,15 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "redox_syscall" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b8c0c260b63a8219631167be35e6a988e9554dbd323f8bd08439c8ed1302bd1" +dependencies = [ + "bitflags 2.8.0", +] + [[package]] name = "regex" version = "1.11.1" @@ -1595,6 +1792,27 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "scraper" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "527e65d9d888567588db4c12da1087598d0f6f8b346cc2c5abc91f05fc2dffe2" +dependencies = [ + "cssparser", + "ego-tree", + "getopts", + "html5ever", + "precomputed-hash", + "selectors", + "tendril", +] + [[package]] name = "security-framework" version = "2.11.1" @@ -1618,6 +1836,25 @@ dependencies = [ "libc", ] +[[package]] +name = "selectors" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8" +dependencies = [ + "bitflags 2.8.0", + "cssparser", + "derive_more", + "fxhash", + "log", + "new_debug_unreachable", + "phf", + "phf_codegen", + "precomputed-hash", + "servo_arc", + "smallvec", +] + [[package]] name = "serde" version = "1.0.217" @@ -1672,14 +1909,12 @@ dependencies = [ ] [[package]] -name = "sha2" -version = "0.10.8" +name = "servo_arc" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +checksum = "ae65c4249478a2647db249fb43e23cec56a2c8974a427e7bd8cb5a1d0964921a" dependencies = [ - "cfg-if", - "cpufeatures", - "digest", + "stable_deref_trait", ] [[package]] @@ -1688,6 +1923,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "similar" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" + [[package]] name = "siphasher" version = "1.0.1" @@ -1725,6 +1966,31 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "string_cache" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "938d512196766101d333398efde81bc1f37b00cb42c2f8350e5df639f040bbbe" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", +] + [[package]] name = "strsim" version = "0.11.1" @@ -1805,6 +2071,17 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "termcolor" version = "1.4.1" @@ -1842,16 +2119,7 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl 1.0.69", -] - -[[package]] -name = "thiserror" -version = "2.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" -dependencies = [ - "thiserror-impl 2.0.11", + "thiserror-impl", ] [[package]] @@ -1865,17 +2133,6 @@ dependencies = [ "syn 2.0.98", ] -[[package]] -name = "thiserror-impl" -version = "2.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.98", -] - [[package]] name = "tinystr" version = "0.7.6" @@ -2011,12 +2268,6 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" -[[package]] -name = "typenum" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" - [[package]] name = "typst-syntax" version = "0.12.0" @@ -2048,12 +2299,6 @@ dependencies = [ "thin-vec", ] -[[package]] -name = "ucd-trie" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" - [[package]] name = "unicase" version = "2.8.1" @@ -2107,6 +2352,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf16_iter" version = "1.0.5" @@ -2131,12 +2382,6 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" -[[package]] -name = "version_check" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" - [[package]] name = "wait-timeout" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index fbaf3ed..dba346f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,22 +10,31 @@ required-features = ["cli"] [dependencies] annotate-snippets = {version = "^0.9.1", optional = true} -clap = {version = "^4.5.18", features = ["cargo", "derive", "env", "wrap_help"], optional = true} -clap-verbosity-flag = { version = "3.0.2", optional = true } +clap = {version = "^4.5.18", features = [ + "cargo", + "derive", + "env", + "wrap_help", +], optional = true} +clap-verbosity-flag = {version = "3.0.2", optional = true} clap_complete = {version = "^4.5.2", optional = true} +ego-tree = {version = "0.10.0", optional = true} enum_dispatch = {version = "0.3.13", optional = true} -html_parser = {version = "0.7.0", optional = true} is-terminal = {version = "0.4.3", optional = true} lifetime = {version = "0.1.0", features = ["macros"]} -log = { version = "0.4.25", optional = true } -pretty_env_logger = { version = "0.5.0", optional = true } +log = {version = "0.4.25", optional = true} +pretty_env_logger = {version = "0.5.0", optional = true} pulldown-cmark = {version = "0.10.2", optional = true} reqwest = {version = "^0.11", default-features = false, features = ["json"]} +scraper = {version = "0.23.1", optional = true} serde = {version = "^1.0", features = ["derive"]} serde_json = "^1.0" termcolor = {version = "1.2.0", optional = true} thiserror = "^1.0" -tokio = {version = "^1.0", features = ["macros", "rt-multi-thread"], optional = true} +tokio = {version = "^1.0", features = [ + "macros", + "rt-multi-thread", +], optional = true} typst-syntax = {version = "^0.12.0", optional = true} [dev-dependencies] @@ -34,29 +43,44 @@ assert_matches = "1.5.0" codspeed-criterion-compat = "2.7.0" criterion = "0.5" futures = "0.3" +insta = {version = "1.41.1", features = ["filters"]} +lazy_static = "1.5.0" predicates = "3.0.3" tempfile = "3.5.0" tokio = {version = "^1.0", features = ["macros"]} [features] annotate = ["dep:annotate-snippets"] -cli = ["annotate", "color", "dep:clap", "dep:clap-verbosity-flag", "dep:enum_dispatch", "dep:is-terminal", "dep:log", "dep:pretty_env_logger", "multithreaded", "typst", "markdown", "html"] +clap-verbosity-flag = ["dep:clap-verbosity-flag"] +cli = [ + "annotate", + "clap-verbosity-flag", + "color", + "dep:clap", + "dep:enum_dispatch", + "dep:is-terminal", + "html", + "log", + "markdown", + "multithreaded", + "pretty_env_logger", + "typst", +] cli-complete = ["cli", "clap_complete"] color = ["annotate-snippets?/color", "dep:termcolor"] default = ["cli", "native-tls"] docker = [] full = ["cli-complete", "docker", "unstable"] -html = ["dep:html_parser"] +html = ["dep:ego-tree", "dep:scraper"] +log = ["dep:log"] markdown = ["dep:pulldown-cmark", "html"] multithreaded = ["dep:tokio"] native-tls = ["reqwest/native-tls"] native-tls-vendored = ["reqwest/native-tls-vendored"] -pulldown-cmark = ["dep:pulldown-cmark"] +pretty_env_logger = ["dep:pretty_env_logger"] +snapshots = [] # Only for testing typst = ["dep:typst-syntax"] unstable = [] -log = ["dep:log"] -pretty_env_logger = ["dep:pretty_env_logger"] -clap-verbosity-flag = ["dep:clap-verbosity-flag"] [lib] name = "languagetool_rust" @@ -72,21 +96,25 @@ license = "MIT" name = "languagetool-rust" readme = "README.md" repository = "https://github.com/jeertmans/languagetool-rust" -rust-version = "1.77.2" +rust-version = "1.77.0" version = "2.1.4" +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] + [package.metadata.release] pre-release-replacements = [ - {file="CHANGELOG.md", search="Unreleased", replace="{{version}}", min=1}, - {file="CHANGELOG.md", search="\\.\\.\\.HEAD\\)", replace="...{{version}}) {{date}}", exactly=1}, - {file="CHANGELOG.md", search="", replace="\n\n## [Unreleased](https://github.com/jeertmans/languagetool-rust/compare/v{{version}}...HEAD)", exactly=1}, + {file = "CHANGELOG.md", search = "Unreleased", replace = "{{version}}", min = 1}, + {file = "CHANGELOG.md", search = "\\.\\.\\.HEAD\\)", replace = "...{{version}}) {{date}}", exactly = 1}, + {file = "CHANGELOG.md", search = "", replace = "\n\n## [Unreleased](https://github.com/jeertmans/languagetool-rust/compare/v{{version}}...HEAD)", exactly = 1}, ] -tag = false publish = false +tag = false -[package.metadata.docs.rs] -all-features = true -rustdoc-args = ["--cfg", "docsrs"] +[profile.dev.package] +insta.opt-level = 3 +similar.opt-level = 3 [[test]] name = "cli" diff --git a/RELEASE-PROCESS.md b/RELEASE-PROCESS.md index 87f21fc..45d3fa9 100644 --- a/RELEASE-PROCESS.md +++ b/RELEASE-PROCESS.md @@ -9,9 +9,11 @@ Note that, by default, every command runs in *dry mode*, and you need to append to actually perform the action. Here are the the following steps to install `cargo-release`: + ```bash cargo install cargo-release ``` + Here are the following steps to release a new version: 1. create a branch `release-x.y.z` from the main branch; diff --git a/src/api/check.rs b/src/api/check.rs index b87fd11..53dd1ce 100644 --- a/src/api/check.rs +++ b/src/api/check.rs @@ -239,6 +239,62 @@ pub struct Data<'source> { pub annotation: Vec>, } +impl Data<'_> { + /// Split data into as few fragments as possible, where each fragment + /// contains (if possible) a maximum of `n` characters in it's + /// annotations' markup and text fields. + /// + /// Pattern str `pat` is used for splitting. + #[must_use] + pub fn split(self, n: usize, pat: &str) -> Vec { + // Build vec of breakpoints and the length of the text + markup at that + // potential breakpoint + let mut break_point_lengths = vec![]; + let mut len = 0; + for (i, ann) in self.annotation.iter().enumerate() { + len += + ann.text.as_deref().unwrap_or("").len() + ann.markup.as_deref().unwrap_or("").len(); + if ann.text.as_ref().is_some_and(|t| t.contains(pat)) { + break_point_lengths.push((i, len)); + } + } + + // Decide which breakpoints to split the annotations at + let mut break_points: Vec = vec![]; + if break_point_lengths.len() > 1 { + let (mut i, mut ii) = (0, 1); + let (mut base, mut curr) = (0, 0); + while ii < break_point_lengths.len() { + curr += break_point_lengths[i].1 - base; + + if break_point_lengths[ii].1 - base + curr > n { + break_points.push(break_point_lengths[i].0); + base = break_point_lengths[i].1; + curr = 0; + } + + i += 1; + ii += 1; + } + } + + // Split annotations based on calculated break points + let mut split = Vec::with_capacity(break_points.len()); + let mut iter = self.into_iter(); + let mut taken = 0; + let mut annotations = vec![]; + for break_point in break_points { + while taken != break_point + 1 { + annotations.push(iter.next().unwrap()); + taken += 1; + } + split.push(Data::from_iter(mem::take(&mut annotations))); + } + + split + } +} + impl IntoStatic for Data<'_> { type Static = Data<'static>; fn into_static(self) -> Self::Static { @@ -259,6 +315,15 @@ impl<'source, T: Into>> FromIterator for Data<'source } } +impl<'source> IntoIterator for Data<'source> { + type Item = DataAnnotation<'source>; + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.annotation.into_iter() + } +} + impl Serialize for Data<'_> { fn serialize(&self, serializer: S) -> std::result::Result where @@ -601,10 +666,20 @@ impl<'source> Request<'source> { /// /// # Errors /// - /// If `self.text` is none. + /// If `self.text` is [`None`] and `self.data` is [`None`]. pub fn try_split(mut self, n: usize, pat: &str) -> Result> { + // DATA ANNOTATIONS + if let Some(data) = mem::take(&mut self.data) { + return Ok(data + .split(n, pat) + .into_iter() + .map(|d| self.clone().with_data(d)) + .collect()); + } + + // TEXT let text = mem::take(&mut self.text) - .ok_or_else(|| Error::InvalidRequest("missing text field".to_string()))?; + .ok_or_else(|| Error::InvalidRequest("missing text or data field".to_string()))?; let string: &str = match &text { Cow::Owned(s) => s.as_str(), Cow::Borrowed(s) => s, @@ -944,6 +1019,29 @@ impl Response { } annotation } + + /// Joins the given [`Request`] to the current one. + /// + /// This is especially useful when a request was split into multiple + /// requests. + #[must_use] + pub fn append(mut self, mut other: Self) -> Self { + #[cfg(feature = "unstable")] + if let Some(ref mut sr_other) = other.sentence_ranges { + match self.sentence_ranges { + Some(ref mut sr_self) => { + sr_self.append(sr_other); + }, + None => { + std::mem::swap(&mut self.sentence_ranges, &mut other.sentence_ranges); + }, + } + } + + self.matches.append(&mut other.matches); + + self + } } /// Check response with additional context. diff --git a/src/api/server.rs b/src/api/server.rs index 855ad7a..4706e63 100644 --- a/src/api/server.rs +++ b/src/api/server.rs @@ -1,14 +1,17 @@ //! Structure to communicate with some `LanguageTool` server through the API. +#[cfg(feature = "multithreaded")] +use crate::api::check; use crate::{ api::{ - check::{self, Request, Response}, + check::{Request, Response}, languages, words, }, error::{Error, Result}, }; #[cfg(feature = "cli")] use clap::Args; +#[cfg(feature = "multithreaded")] use lifetime::IntoStatic; use reqwest::{ header::{HeaderValue, ACCEPT}, @@ -423,15 +426,14 @@ impl ServerClient { "no request; cannot join zero request".to_string(), )); } - let mut tasks = Vec::with_capacity(requests.len()); - requests + let tasks = requests .into_iter() .map(|r| r.into_static()) - .for_each(|request| { + .map(|request| { let server_client = self.clone(); - tasks.push(tokio::spawn(async move { + tokio::spawn(async move { let response = server_client.check(&request).await?; let text = request.text.ok_or_else(|| { Error::InvalidRequest( @@ -440,7 +442,7 @@ impl ServerClient { ) })?; Result::<(Cow<'static, str>, Response)>::Ok((text, response)) - })); + }) }); let mut response_with_context: Option = None; @@ -457,6 +459,45 @@ impl ServerClient { Ok(response_with_context.unwrap()) } + /// Send multiple check requests and join them into a single response, + /// without any context. + /// + /// # Error + /// + /// If any of the requests has `self.text` or `self.data` field which is + /// [`None`]. + #[cfg(feature = "multithreaded")] + pub async fn check_multiple_and_join_without_context( + &self, + requests: Vec>, + ) -> Result { + let mut response: Option = None; + + let tasks = requests + .into_iter() + .map(|r| r.into_static()) + .map(|request| { + let server_client = self.clone(); + + tokio::spawn(async move { + let response = server_client.check(&request).await?; + Result::::Ok(response) + }) + }); + + // Make requests in sequence + for task in tasks { + let resp = task.await.unwrap()?; + + response = Some(match response { + Some(r) => r.append(resp), + None => resp, + }) + } + + Ok(response.unwrap()) + } + /// Send a check request to the server, await for the response and annotate /// it. #[cfg(feature = "annotate")] diff --git a/src/cli/check.rs b/src/cli/check.rs index ae0caee..210350a 100644 --- a/src/cli/check.rs +++ b/src/cli/check.rs @@ -21,7 +21,7 @@ use crate::{ server::ServerClient, }, error::{Error, Result}, - parsers::{parse_html, parse_markdown, parse_typst}, + parsers::{html::parse_html, markdown::parse_markdown, typst::parse_typst}, }; use super::ExecuteSubcommand; @@ -175,6 +175,7 @@ impl ExecuteSubcommand for Command { }; let file_content = std::fs::read_to_string(filename)?; + let (response, text): (check::Response, String) = match &file_type { FileType::Auto => unreachable!(), FileType::Raw => { @@ -192,18 +193,14 @@ impl ExecuteSubcommand for Command { FileType::Typst | FileType::Markdown | FileType::Html => { let data = match file_type { FileType::Typst => parse_typst(&file_content), - FileType::Html => { - let text = parse_html(&file_content); - Data::from_iter([DataAnnotation::new_text(text)]) - }, - FileType::Markdown => { - let text = parse_markdown(&file_content); - Data::from_iter([DataAnnotation::new_text(text)]) - }, + FileType::Html => parse_html(&file_content), + FileType::Markdown => parse_markdown(&file_content), _ => unreachable!(), }; + let requests = (request.clone().with_data(data)) + .split(self.max_length, self.split_pattern.as_str()); let response = server_client - .check(&request.clone().with_data(data)) + .check_multiple_and_join_without_context(requests) .await?; (response, file_content) }, diff --git a/src/error.rs b/src/error.rs index b1f258c..2af3644 100644 --- a/src/error.rs +++ b/src/error.rs @@ -125,10 +125,9 @@ mod tests { assert_matches!(error, Error::IO(_)); } - #[ignore] #[test] fn test_error_invalid_request() { - let result = std::fs::read_to_string(""); // TODO + let result = crate::api::check::Request::new().try_get_text(); assert!(result.is_err()); let error: Error = result.unwrap_err().into(); @@ -136,10 +135,9 @@ mod tests { assert_matches!(error, Error::InvalidRequest(_)); } - #[ignore] #[test] fn test_error_invalid_value() { - let result = std::fs::read_to_string(""); // TODO + let result = crate::api::server::parse_port("test"); assert!(result.is_err()); let error: Error = result.unwrap_err().into(); @@ -147,7 +145,6 @@ mod tests { assert_matches!(error, Error::InvalidValue(_)); } - #[ignore] #[tokio::test] async fn test_error_reqwest() { let result = reqwest::get("").await; diff --git a/src/parsers.rs b/src/parsers.rs deleted file mode 100644 index b1f4c7c..0000000 --- a/src/parsers.rs +++ /dev/null @@ -1,211 +0,0 @@ -//! Utilities for parsing the contents of different file types into a text -//! representation that can be sent to the LanguageTool API. - -use crate::api::check::{Data, DataAnnotation}; - -/// Parse the contents of an HTML file into a text format to be sent to the -/// LanguageTool API. -#[cfg(feature = "html")] -pub fn parse_html(file_content: impl AsRef) -> String { - use html_parser::Node; - - let mut txt = String::new(); - - let html = html_parser::Dom::parse(file_content.as_ref()).unwrap_or_default(); - let mut children: Vec = html.children.into_iter().rev().collect(); - - fn handle_node(txt: &mut String, node: Node) { - if let Some(e) = node.element() { - match e.name.as_str() { - "head" | "script" | "style" => { - return; - }, - "code" => { - txt.push_str("_code_"); - return; - }, - "a" => { - txt.push_str("_link_"); - return; - }, - "pre" => { - txt.push_str("_pre_"); - txt.push_str("\n\n"); - return; - }, - s => { - let add_children = |txt: &mut String| { - if !e.children.is_empty() { - // Recursively handle children - e.children.clone().into_iter().for_each(|n| { - handle_node(txt, n); - }); - }; - }; - - match s { - "p" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "li" | "td" | "th" => { - while txt.chars().last().is_some_and(|c| c.is_whitespace()) { - txt.pop(); - } - txt.push_str("\n\n"); - add_children(txt); - txt.push_str("\n\n"); - }, - _ => { - add_children(txt); - }, - } - }, - } - } - - if let Some(t) = node.text() { - let mut text = t.trim().to_owned(); - if !text.is_empty() { - let mut chars = t.chars(); - - // Maintain leading/trailing white space, but only a single space - if chars.next().is_some_and(|c| c.is_whitespace()) { - while txt.chars().last().is_some_and(|c| c.is_whitespace()) { - txt.pop(); - } - text.insert(0, ' '); - } - if chars.last().is_some_and(|c| c.is_whitespace()) { - text.push(' '); - } - - txt.push_str(&text); - } - } - } - - while let Some(node) = children.pop() { - handle_node(&mut txt, node); - } - - txt -} - -/// Parse the contents of a Markdown file into a text format to be sent to the -/// LanguageTool API. -#[cfg(feature = "markdown")] -pub fn parse_markdown(file_content: impl AsRef) -> String { - use pulldown_cmark::{html, Options, Parser}; - - let mut options = Options::empty(); - options.insert(Options::ENABLE_STRIKETHROUGH); - options.insert(Options::ENABLE_TABLES); - - let parser = Parser::new_ext(file_content.as_ref(), options); - let mut html = String::new(); - html::push_html(&mut html, parser); - - parse_html(html) -} - -/// Parse the contents of a Typst file into a text format to be sent to the -/// LanguageTool API. -#[cfg(feature = "typst")] -pub fn parse_typst(file_content: impl AsRef) -> Data<'static> { - use typst_syntax::{parse, SyntaxKind, SyntaxNode}; - - let mut annotations: Vec = vec![]; - - let parent = parse(file_content.as_ref()); - let mut nodes: Vec<&SyntaxNode> = parent.children().rev().collect(); - - while let Some(node) = nodes.pop() { - let kind = node.kind(); - - // MARKUP NODES - match kind { - // Pure markup - SyntaxKind::SetRule - | SyntaxKind::Ident - | SyntaxKind::ShowRule - | SyntaxKind::Raw - | SyntaxKind::Code - | SyntaxKind::CodeBlock - | SyntaxKind::Math - | SyntaxKind::Equation - | SyntaxKind::Ref - | SyntaxKind::LetBinding - | SyntaxKind::FieldAccess - | SyntaxKind::FuncCall - | SyntaxKind::Args => { - let mut markup = node.text().to_string(); - if markup.is_empty() { - let mut stack: Vec<&SyntaxNode> = node.children().rev().collect(); - while let Some(n) = stack.pop() { - if n.text().is_empty() { - stack.extend(n.children().rev()); - } else { - markup += n.text(); - } - } - } - - annotations.push(DataAnnotation::new_interpreted_markup( - markup, - // This pattern is ignored by LanguageTool, and allows us to avoid whitespace - // issues. The following sentence would give an error for - // repeated whitespace otherwise: This has ``` `backticks` - // ``` in it - "_ignore_".to_string(), - )); - continue; - }, - // Markup with valid text interpretations - SyntaxKind::Heading - | SyntaxKind::Markup - | SyntaxKind::EnumItem - | SyntaxKind::ListItem - | SyntaxKind::Emph - | SyntaxKind::Strong => { - let (mut full_text, mut interpreted_as) = (String::new(), String::new()); - let mut stack: Vec<&SyntaxNode> = node.children().rev().collect(); - - while let Some(n) = stack.pop() { - if n.text().is_empty() { - stack.extend(n.children().rev()); - } else { - if matches!(n.kind(), SyntaxKind::Text | SyntaxKind::Space) { - interpreted_as += n.text(); - } - full_text += n.text(); - } - } - - annotations.push(DataAnnotation::new_interpreted_markup( - full_text, - interpreted_as, - )); - continue; - }, - _ => {}, - } - - // NESTED NODES - if node.children().count() > 0 && !matches!(kind, SyntaxKind::Args | SyntaxKind::FuncCall) { - nodes.extend(node.children().rev()); - continue; - } - - // TEXT - if matches!( - kind, - SyntaxKind::Text - | SyntaxKind::SmartQuote - | SyntaxKind::BlockComment - | SyntaxKind::LineComment - | SyntaxKind::Space - | SyntaxKind::Parbreak - ) { - annotations.push(DataAnnotation::new_text(node.text().to_string())); - }; - } - - Data::from_iter(annotations) -} diff --git a/src/parsers/html.rs b/src/parsers/html.rs new file mode 100644 index 0000000..2230777 --- /dev/null +++ b/src/parsers/html.rs @@ -0,0 +1,105 @@ +//! Parse the contents of HTML files into a format parseable by the LanguageTool +//! API. + +use ego_tree::NodeRef; +use scraper::{Html, Node}; + +use crate::{ + api::check::{Data, DataAnnotation}, + parsers::IGNORE, +}; + +/// Parse the contents of an HTML file into a text format to be sent to the +/// LanguageTool API. +#[must_use] +pub fn parse_html(file_content: &str) -> Data<'static> { + let mut annotations: Vec = vec![]; + + fn handle_node(annotations: &mut Vec, node: NodeRef<'_, Node>) { + let n = node.value(); + match n { + Node::Element(el) => { + match el.name() { + "head" | "script" | "style" => {}, + + "code" => { + annotations.push(DataAnnotation::new_interpreted_markup( + "...", + IGNORE, + )); + }, + + "img" => { + annotations.push(DataAnnotation::new_interpreted_markup("", IGNORE)); + }, + + s => { + match s { + "p" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "li" | "td" | "th" + | "div" => { + annotations.push(DataAnnotation::new_interpreted_markup( + format!("<{s}>"), + "\n\n", + )); + for node in node.children() { + handle_node(annotations, node); + } + annotations.push(DataAnnotation::new_interpreted_markup( + format!(""), + "\n\n", + )); + }, + _ => { + annotations.push(DataAnnotation::new_markup(format!("<{s}>"))); + for node in node.children() { + handle_node(annotations, node); + } + annotations.push(DataAnnotation::new_markup(format!(""))); + }, + } + }, + } + }, + + Node::Text(t) => { + let mut text = t.trim().to_owned(); + if !text.is_empty() { + let mut chars = t.chars(); + + // Maintain leading/trailing white space, but only a single space + if chars.next().is_some_and(|c| c.is_whitespace()) { + while text.chars().last().is_some_and(|c| c.is_whitespace()) { + text.pop(); + } + text.insert(0, ' '); + } + if chars.last().is_some_and(|c| c.is_whitespace()) { + text.push(' '); + } + + annotations.push(DataAnnotation::new_text(text)) + } else { + annotations.push(DataAnnotation::new_text("\n\n")); + } + }, + + Node::Comment(c) => { + let comment = c.to_string(); + + annotations.push(DataAnnotation::new_interpreted_markup( + format!("",), + format!("\n\n{comment}\n\n"), + )); + }, + + _ => {}, + } + } + + let document = Html::parse_document(file_content); + for node in document.root_element().children() { + handle_node(&mut annotations, node); + } + + Data::from_iter(annotations) +} diff --git a/src/parsers/markdown.rs b/src/parsers/markdown.rs new file mode 100644 index 0000000..88ddd61 --- /dev/null +++ b/src/parsers/markdown.rs @@ -0,0 +1,151 @@ +//! Parse the contents of Markdown files into a format parseable by the +//! LanguageTool API. + +use crate::{ + api::check::{Data, DataAnnotation}, + parsers::IGNORE, +}; + +/// Parse the contents of a Markdown file into a text format to be sent to the +/// LanguageTool API. +#[must_use] +pub fn parse_markdown(file_content: &str) -> Data<'_> { + use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd}; + + let mut annotations: Vec = vec![]; + + // Stack to keep track of the current "tag" context + let mut tags = vec![]; + + Parser::new_ext(file_content, Options::all()).for_each(|event| { + match event { + Event::Start(tag) => { + match tag { + // Start list items + Tag::List(_) | Tag::Item => { + annotations.push(DataAnnotation::new_text("- ")); + }, + _ => {}, + } + + tags.push(tag); + }, + Event::End(tag) => { + match tag { + // Separate list items and table cells + TagEnd::List(_) | TagEnd::Item | TagEnd::TableRow | TagEnd::TableHead => { + annotations.push(DataAnnotation::new_text("\n")); + }, + TagEnd::TableCell => { + annotations.push(DataAnnotation::new_text(" | ")); + }, + _ => {}, + }; + + if tags + .last() + .is_some_and(|t| TagEnd::from(t.to_owned()) == tag) + { + tags.pop(); + }; + }, + + Event::Html(s) | Event::InlineHtml(s) => { + let data = super::html::parse_html(s.as_ref()).annotation.into_iter(); + annotations.extend(data); + }, + + Event::Text(mut s) => { + // Add space between sentences + if s.chars() + .last() + .is_some_and(|c| matches!(c, '.' | '!' | '?')) + { + s = pulldown_cmark::CowStr::from(s.to_string() + " "); + } + + let Some(tag) = tags.last() else { + annotations.push(DataAnnotation::new_text(s.to_owned())); + return; + }; + + match tag { + Tag::Heading { level, .. } => { + let s = format!("{s}\n"); + annotations.push(DataAnnotation::new_text(format!( + "{} {s}\n", + "#".repeat(*level as usize) + ))); + }, + + Tag::Emphasis => { + annotations + .push(DataAnnotation::new_interpreted_markup(format!("_{s}_"), s)) + }, + Tag::Strong => { + annotations.push(DataAnnotation::new_interpreted_markup( + format!("**{s}**"), + s, + )) + }, + Tag::Strikethrough => { + annotations + .push(DataAnnotation::new_interpreted_markup(format!("~{s}~"), s)) + }, + + Tag::Link { + title, dest_url, .. + } => { + annotations.push(DataAnnotation::new_interpreted_markup( + format!("[{title}]({dest_url})"), + title.to_string(), + )); + }, + + // No changes necessary + Tag::Paragraph + | Tag::List(_) + | Tag::Item + | Tag::BlockQuote + | Tag::TableCell => { + annotations.push(DataAnnotation::new_text(s)); + }, + + // Just markup + Tag::CodeBlock(_) | Tag::Image { .. } => { + annotations.push(DataAnnotation::new_markup(s)); + }, + _ => {}, + } + }, + Event::Code(s) => { + annotations.push(DataAnnotation::new_interpreted_markup(s, IGNORE)); + }, + + Event::HardBreak => { + annotations.push(DataAnnotation::new_text("\n\n")); + }, + Event::SoftBreak => { + if let Some(last) = annotations.last() { + // Don't add space if the last text already ends with a space + if last + .text + .as_ref() + .is_some_and(|t| t.chars().last().is_some_and(|c| c.is_ascii_whitespace())) + || last.interpret_as.as_ref().is_some_and(|t| { + t.chars().last().is_some_and(|c| c.is_ascii_whitespace()) + }) + { + return; + }; + } + + annotations.push(DataAnnotation::new_text(" ")); + }, + + Event::FootnoteReference(_) | Event::TaskListMarker(_) | Event::Rule => {}, + }; + }); + + Data::from_iter(annotations) +} diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs new file mode 100644 index 0000000..bc4f51a --- /dev/null +++ b/src/parsers/mod.rs @@ -0,0 +1,14 @@ +//! Utilities for parsing the contents of different file types into a format +//! representation that can be parsed by the LanguageTool API. + +#![cfg(feature = "html")] +pub mod html; + +#[cfg(feature = "markdown")] +pub mod markdown; + +#[cfg(feature = "typst")] +pub mod typst; + +/// Pattern that is ignored by the LanguageTool API. +const IGNORE: &str = "_"; diff --git a/src/parsers/typst.rs b/src/parsers/typst.rs new file mode 100644 index 0000000..812bf24 --- /dev/null +++ b/src/parsers/typst.rs @@ -0,0 +1,101 @@ +//! Parse the contents of Typst files into a format parseable by the +//! LanguageTool API. + +use crate::api::check::{Data, DataAnnotation}; + +/// Parse the contents of a Typst file into a text format to be sent to the +/// LanguageTool API. +pub fn parse_typst(file_content: impl AsRef) -> Data<'static> { + use typst_syntax::{parse, SyntaxKind, SyntaxNode}; + + let mut annotations: Vec = vec![]; + + let parent = parse(file_content.as_ref()); + let mut nodes: Vec<&SyntaxNode> = parent.children().rev().collect(); + + while let Some(node) = nodes.pop() { + let kind = node.kind(); + + // MARKUP NODES + match kind { + // Pure markup + SyntaxKind::SetRule + | SyntaxKind::Ident + | SyntaxKind::ShowRule + | SyntaxKind::Raw + | SyntaxKind::Code + | SyntaxKind::CodeBlock + | SyntaxKind::Math + | SyntaxKind::Equation + | SyntaxKind::Ref + | SyntaxKind::LetBinding + | SyntaxKind::FieldAccess + | SyntaxKind::FuncCall + | SyntaxKind::Args => { + let mut markup = node.text().to_string(); + if markup.is_empty() { + let mut stack: Vec<&SyntaxNode> = node.children().rev().collect(); + while let Some(n) = stack.pop() { + if n.text().is_empty() { + stack.extend(n.children().rev()); + } else { + markup += n.text(); + } + } + } + + annotations.push(DataAnnotation::new_markup(markup)); + continue; + }, + // Markup with valid text interpretations + SyntaxKind::Heading + | SyntaxKind::Markup + | SyntaxKind::EnumItem + | SyntaxKind::ListItem + | SyntaxKind::Emph + | SyntaxKind::Strong => { + let (mut full_text, mut interpreted_as) = (String::new(), String::new()); + let mut stack: Vec<&SyntaxNode> = node.children().rev().collect(); + + while let Some(n) = stack.pop() { + if n.text().is_empty() { + stack.extend(n.children().rev()); + } else { + if matches!(n.kind(), SyntaxKind::Text | SyntaxKind::Space) { + interpreted_as += n.text(); + } + full_text += n.text(); + } + } + + annotations.push(DataAnnotation::new_interpreted_markup( + full_text, + interpreted_as, + )); + continue; + }, + _ => {}, + } + + // NESTED NODES + if node.children().count() > 0 && !matches!(kind, SyntaxKind::Args | SyntaxKind::FuncCall) { + nodes.extend(node.children().rev()); + continue; + } + + // TEXT + if matches!( + kind, + SyntaxKind::Text + | SyntaxKind::SmartQuote + | SyntaxKind::BlockComment + | SyntaxKind::LineComment + | SyntaxKind::Space + | SyntaxKind::Parbreak + ) { + annotations.push(DataAnnotation::new_text(node.text().to_string())); + }; + } + + Data::from_iter(annotations) +} diff --git a/tests/cli.rs b/tests/cli.rs index f385ead..3ca3e98 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -1,13 +1,35 @@ +use std::path::PathBuf; + use assert_cmd::Command; use predicates::{ boolean::OrPredicate, str::{contains, is_empty, is_match}, }; +lazy_static::lazy_static! { + static ref PATH_ROOT: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + static ref PATH_SAMPLE_FILES: PathBuf = PATH_ROOT.join("tests").join("sample_files"); +} + +const PATH_FILTERS: [(&str, &str); 1] = [(r" --> .*[\/].*\n", " --> [path]\n")]; +macro_rules! assert_snapshot { + ($label: expr, $snap: expr) => { + insta::with_settings!({filters => PATH_FILTERS}, { + insta::assert_snapshot!( + $label, + $snap + ); + }); + }; +} + +fn get_cmd() -> Command { + Command::cargo_bin("ltrs").unwrap() +} + #[test] fn test_basic_check_text() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -17,11 +39,11 @@ fn test_basic_check_text() { #[test] fn test_basic_check_no_errors() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") - .arg("-t") + .arg("-l") .arg("en-US") + .arg("-t") .arg("\"I am a star.\"") .assert(); assert @@ -31,8 +53,7 @@ fn test_basic_check_no_errors() { #[test] fn test_basic_check_empty_text() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd.arg("check").arg("--text=").assert(); + let assert = get_cmd().arg("check").arg("--text=").assert(); assert .success() .stderr(is_match(r".*WARN.* No input text was provided, skipping.").unwrap()); @@ -40,8 +61,7 @@ fn test_basic_check_empty_text() { #[test] fn test_basic_check_data() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-d") .arg( @@ -54,8 +74,7 @@ fn test_basic_check_data() { #[test] fn test_basic_check_wrong_data_1() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-d") .arg("\"some text that is given as text\"") @@ -65,15 +84,13 @@ fn test_basic_check_wrong_data_1() { #[test] fn test_basic_check_wrong_data_2() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd.arg("check").arg("-d").arg("\"{}\"").assert(); + let assert = get_cmd().arg("check").arg("-d").arg("\"{}\"").assert(); assert.failure().stderr(contains("invalid value")); } #[test] fn test_basic_check_wrong_data_3() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-d") .arg("\"some text that is given as text\"") @@ -83,8 +100,7 @@ fn test_basic_check_wrong_data_3() { #[test] fn test_basic_check_piped() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .write_stdin("some text that is written to stdin") .assert(); @@ -93,8 +109,7 @@ fn test_basic_check_piped() { #[test] fn test_basic_check_stdin_verbose() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-v") .arg("-l") @@ -112,8 +127,10 @@ fn test_basic_check_file() { let mut file = tempfile::NamedTempFile::new().unwrap(); writeln!(file, "Some text with a error inside.").unwrap(); - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd.arg("check").arg(file.path().to_str().unwrap()).assert(); + let assert = get_cmd() + .arg("check") + .arg(file.path().to_str().unwrap()) + .assert(); assert.success(); } @@ -127,8 +144,7 @@ fn test_basic_check_files() { let mut file2 = tempfile::NamedTempFile::new().unwrap(); writeln!(file2, "Another text with an eror.").unwrap(); - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg(file1.path().to_str().unwrap()) .arg(file2.path().to_str().unwrap()) @@ -145,8 +161,7 @@ fn test_basic_check_files_with_empty_file() { let file2 = tempfile::NamedTempFile::new().unwrap(); - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-v") .arg(file1.path().to_str().unwrap()) @@ -159,8 +174,7 @@ fn test_basic_check_files_with_empty_file() { #[test] fn test_basic_check_unexisting_file() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("some_file_path_that_should_not_exist.txt") .assert(); @@ -169,8 +183,7 @@ fn test_basic_check_unexisting_file() { #[test] fn test_check_with_language() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -182,8 +195,7 @@ fn test_check_with_language() { #[test] fn test_check_with_wrong_language() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -195,8 +207,7 @@ fn test_check_with_wrong_language() { #[test] fn test_check_with_unexisting_language() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -211,8 +222,7 @@ fn test_check_with_unexisting_language() { #[test] fn test_check_with_username_and_key() { // TODO: remove the "invalid request" predicate as of LT 6.0 - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -229,8 +239,7 @@ fn test_check_with_username_and_key() { #[test] fn test_check_with_username_only() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -244,8 +253,7 @@ fn test_check_with_username_only() { #[test] fn test_check_with_key_only() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -259,8 +267,7 @@ fn test_check_with_key_only() { #[test] fn test_check_with_dict() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -272,8 +279,7 @@ fn test_check_with_dict() { #[test] fn test_check_with_dicts() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -285,8 +291,7 @@ fn test_check_with_dicts() { #[test] fn test_check_with_preferred_variant() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -298,8 +303,7 @@ fn test_check_with_preferred_variant() { #[test] fn test_check_with_preferred_variants() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -311,8 +315,7 @@ fn test_check_with_preferred_variants() { #[test] fn test_check_with_language_and_preferred_variant() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -329,8 +332,7 @@ fn test_check_with_language_and_preferred_variant() { #[test] fn test_check_with_enabled_rule() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -342,8 +344,7 @@ fn test_check_with_enabled_rule() { #[test] fn test_check_with_enabled_rules() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -355,8 +356,7 @@ fn test_check_with_enabled_rules() { #[test] fn test_check_with_disabled_rule() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -368,8 +368,7 @@ fn test_check_with_disabled_rule() { #[test] fn test_check_with_disabled_rules() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -381,8 +380,7 @@ fn test_check_with_disabled_rules() { #[test] fn test_check_with_enabled_category() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -394,8 +392,7 @@ fn test_check_with_enabled_category() { #[test] fn test_check_with_enabled_categories() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -407,8 +404,7 @@ fn test_check_with_enabled_categories() { #[test] fn test_check_with_disabled_category() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -420,8 +416,7 @@ fn test_check_with_disabled_category() { #[test] fn test_check_with_disabled_categories() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -433,8 +428,7 @@ fn test_check_with_disabled_categories() { #[test] fn test_check_with_enabled_only_rule() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -447,8 +441,7 @@ fn test_check_with_enabled_only_rule() { #[test] fn test_check_with_enabled_only_category() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -461,8 +454,7 @@ fn test_check_with_enabled_only_category() { #[test] fn test_check_with_enabled_only_without_enabled() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -473,8 +465,7 @@ fn test_check_with_enabled_only_without_enabled() { #[test] fn test_check_with_picky_level() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -486,8 +477,7 @@ fn test_check_with_picky_level() { #[test] fn test_check_with_unexisting_level() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("check") .arg("-t") .arg("\"some text that is given as text\"") @@ -499,23 +489,20 @@ fn test_check_with_unexisting_level() { #[test] fn test_languages() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd.arg("languages").assert(); + let assert = get_cmd().arg("languages").assert(); assert.success(); } #[test] fn test_ping() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd.arg("ping").assert(); + let assert = get_cmd().arg("ping").assert(); assert.success().stdout(contains("PONG! Delay: ")); } #[test] fn test_words() { // TODO: remove the "invalid request" predicate as of LT 6.0 - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("words") .arg("--username") .arg("user") @@ -531,8 +518,7 @@ fn test_words() { #[test] fn test_words_add() { // TODO: remove the "invalid request" predicate as of LT 6.0 - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("words") .arg("add") .arg("--username") @@ -549,8 +535,7 @@ fn test_words_add() { #[test] fn test_words_delete() { - let mut cmd = Command::cargo_bin("ltrs").unwrap(); - let assert = cmd + let assert = get_cmd() .arg("words") .arg("delete") .arg("--username") @@ -564,3 +549,45 @@ fn test_words_delete() { contains("invalid request"), )); } + +#[cfg_attr(not(feature = "snapshots"), ignore)] +#[test] +fn test_check_file_typst() { + let output = get_cmd() + .arg("check") + .arg(PATH_SAMPLE_FILES.join("example.typ")) + .output() + .unwrap(); + assert_snapshot!( + "autodetect_typst_file", + String::from_utf8(output.stdout).unwrap() + ); +} + +#[cfg_attr(not(feature = "snapshots"), ignore)] +#[test] +fn test_check_file_html() { + let output = get_cmd() + .arg("check") + .arg(PATH_SAMPLE_FILES.join("example.html")) + .output() + .unwrap(); + assert_snapshot!( + "autodetect_html_file", + String::from_utf8(output.stdout).unwrap() + ); +} + +#[cfg_attr(not(feature = "snapshots"), ignore)] +#[test] +fn test_check_file_markdown() { + let output = get_cmd() + .arg("check") + .arg(PATH_ROOT.join("README.md")) + .output() + .unwrap(); + assert_snapshot!( + "autodetect_markdown_file", + String::from_utf8(output.stdout).unwrap() + ); +} diff --git a/tests/sample_files/example.html b/tests/sample_files/example.html new file mode 100644 index 0000000..d0de15a --- /dev/null +++ b/tests/sample_files/example.html @@ -0,0 +1,123 @@ + + + + +/home/rolv/Documents/zk/program/i4BJbPUbyR_stremio.md + + + + + +al + + + + + +

+ Fork me? Fork you, @octocat! Here is a link: makarainen.net +

+ + +#[cfg(feature = "html")] +pub fn parse_html(file_content: impl AsRef<str>) -> String { + use html_parser::{ElementVariant, Node}; + use select::{document::Document, node::Data, predicate}; + + let mut txt = String::new(); + + + +

Hello world

+ + +
+
+ Task +
+
+
+
+ Test an incorect spling +
+
+
+
+ +
+ +

+ Sean made a change +

+
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Header 1Header 2Header 3Header 4
Row 1 Col 1Row 1 Col 2Row 1 Col 3Row 1 Col 4
Row 2 Col 1Row 2 Col 2Row 2 Col 3Row 2 Col 4
Row 3 Col 1Row 3 Col 2Row 3 Col 3Row 3 Col 4
+
+ + + diff --git a/tests/sample_files/example.typ b/tests/sample_files/example.typ new file mode 100644 index 0000000..8aae2ad --- /dev/null +++ b/tests/sample_files/example.typ @@ -0,0 +1,147 @@ +#set page(width: 10cm, height: auto) + += Introduction +In this report, we will explore the +various factors that influence _fluid +dynamics_ in glaciers and how they +contribute to the formation and +behaviour of these natural structures. + ++ The climate + - Temperatre + - Precipitation ++ The topography ++ The geology + +Glaciers as the one shown in +@glaciers will cease to exist if +we don't take action soon! + +#figure( + image("glacier.jpg", width: 70%), + caption: [ + _Glaciers_ form an important part + of the earth's climate system. + ], +) + + += Methods +We follow the glacier melting models +established in @glacier-melt. + +#bibliography("works.bib") + +The flow rate of a glacier is given +by the following equation: + +$ Q = rho A v + "time offset" $ + +Total displaced soil by glacial flow: + +$ 7.32 beta + + sum_(i=0)^nabla + (Q_i (a_i - epsilon)) / 2 $ + += Tables + +/* Text in a comment +* block. */ +// Text in a regular comment. + +#table( + columns: (1fr, auto, auto), + inset: 10pt, + align: horizon, + table.header( + [], [*Volume*], [*Parameters*], + ), + image("cylinder.svg"), + $ pi h (D^2 - d^2) / 4 $, + [ + $h$: height \ + $D$: outer radius \ + $d$: inner radius + ], + image("tetrahedron.svg"), + $ sqrt(2) / 12 a^3 $, + [$a$: edge length] +) + +#set table( + stroke: none, + gutter: 0.2em, + fill: (x, y) => + if x == 0 or y == 0 { gray }, + inset: (right: 1.5em), +) + +#show table.cell: it => { + if it.x == 0 or it.y == 0 { + set text(white) + strong(it) + } else if it.body == [] { + // Replace empty cells with 'N/A' + pad(..it.inset)[_N/A_] + } else { + it + } +} + +#let a = table.cell( + fill: green.lighten(60%), +)[A] +#let b = table.cell( + fill: aqua.lighten(60%), +)[B] + +#table( + columns: 4, + [], [Exam 1], [Exam 2], [Exam 3], + + [John], [], a, [], + [Mary], [], a, a, + [Robert], b, a, b, +) + += Code blocks + +Adding `rbx` to `rcx` gives +the desired result. + +What is ```rust fn main()``` in Rust +would be ```c int main()``` in C. + +```rust +fn main() { + println!("Hello World!"); +} +``` + +This has ``` `backticks` ``` in it +(but the spaces are trimmed). And +``` here``` the leading space is +also trimmed. + += Fibonacci sequence +The Fibonacci sequence is defined through the +recurrence relation $F_n = F_(n-1) + F_(n-2)$. +It can also be expressed in _closed form:_ + +$ F_n = round(1 / sqrt(5) phi.alt^n), quad + phi.alt = (1 + sqrt(5)) / 2 $ + +#let count = 8 +#let nums = range(1, count + 1) +#let fib(n) = ( + if n <= 2 { 1 } + else { fib(n - 1) + fib(n - 2) } +) + +The first #count numbers of the sequence are: + +#align(center, table( + columns: count, + ..nums.map(n => $F_#n$), + ..nums.map(n => str(fib(n))), +)) diff --git a/tests/snapshots/cli__autodetect_html_file.snap b/tests/snapshots/cli__autodetect_html_file.snap new file mode 100644 index 0000000..43fb0bb --- /dev/null +++ b/tests/snapshots/cli__autodetect_html_file.snap @@ -0,0 +1,25 @@ +--- +source: tests/cli.rs +expression: "String::from_utf8(output.stdout).unwrap()" +--- +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> [path] + | +4 | [path] + | +5 | ...ask
Test an incorect spling
... + | ^^^^^^^^ Possible spelling mistake + | -------- help: incorrect + | +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> [path] + | +5 | ...>
Test an incorect spling
... + | ^^^^^^ Possible spelling mistake + | ------ help: spring, spying, sling, spline, splint, ... (2 not shown) + | diff --git a/tests/snapshots/cli__autodetect_markdown_file.snap b/tests/snapshots/cli__autodetect_markdown_file.snap new file mode 100644 index 0000000..56f083c --- /dev/null +++ b/tests/snapshots/cli__autodetect_markdown_file.snap @@ -0,0 +1,109 @@ +--- +source: tests/cli.rs +expression: "String::from_utf8(output.stdout).unwrap()" +--- +error[COMMA_PARENTHESIS_WHITESPACE]: Don’t put a space before the full stop. + --> [path] + | +6 | ...ges and is free to use, more on that on_ [](https://languagetool.org/)_. __There is a public API (with a free tie... + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Use of whitespace before comma and before/after parentheses + | --------------------------------- help: . + | +error[WHITESPACE_RULE]: Possible typo: you repeated a whitespace + --> [path] + | +11 | ...rs very easily via Rust code! _Crates.io docs.rs codecov- - [](#about) - [](#cli-referenc... + | ^^^^^^^^^ Whitespace repetition (bad formatting) + | --------- help: + | +error[COMMA_PARENTHESIS_WHITESPACE]: Don’t put a space before the full stop. + --> [path] + | +13 | ...on. Installation guidelines can be found [](https://www.docker.com/get-started/). On Linux platforms, you might need to c... + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Use of whitespace before comma and before/after parentheses + | ----------------------------------------- help: . + | +error[COMMA_PARENTHESIS_WHITESPACE]: Don’t put a space before the full stop. + --> [path] + | +14 | ...vent the _sudo privilege issue_ by doing [](https://docs.docker.com/engine/install/linux-postinstall/). ## API Reference + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Use of whitespace before comma and before/after parentheses + | --------------------------------------------------------------- help: . + | +error[COMMA_PARENTHESIS_WHITESPACE]: Don’t put a space before the full stop. + --> [path] + | +5 | ... or crate, then we recommend reading the [](https://docs.rs/languagetool-rust). To use LanguageTool-Rust in your Rust p... + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Use of whitespace before comma and before/after parentheses + | --------------------------------------- help: . + | +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> [path] + | +5 | ...piling LTRS. #### Default Features - - **cli**: Adds command-line related methods for ... + | ^^^^^^^ Possible spelling mistake + | ------- help: CLI, Clip, CGI, CPI, CSI, ... (1428 not shown) + | +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> [path] + | +9 | ...ate**, **color**, **multithreaded**. - **native-tls**: Enables TLS functionality provided by ... + | ^^^^^^^^^^^^^^ Possible spelling mistake + | -------------- help: natives, natively, nativists + | +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> [path] + | +3 | ...annotate results from check request. - **cli-complete**: Adds commands to generate completion f... + | ^^^^^^^^^^^^^^^^ Possible spelling mistake + | ---------------- help: incomplete + | +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> [path] + | +6 | ...shells. This feature also activates the **cli** feature. Enter ltrs completions --help ... + | ^^^^^^^ Possible spelling mistake + | ------- help: CLI, clip, CGI, CPI, CSI, ... (1461 not shown) + | +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> [path] + | +10 | ...es color outputting in the terminal. If **cli** feature is also enabled, the --color= [path] + | +13 | ...d**: Enables multithreaded requests. - **native-tls-vendored**: Enables the vendored feature of native... + | ^^^^^^^^^^^^^^^^^^^^^^^ Possible spelling mistake + | ----------------------- help: native-TLS-vendored + | +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> [path] + | +14 | ...u are planning to use HTTPS servers. - **undoc**: Adds more fields to JSON responses tha... + | ^^^^^^^^^ Possible spelling mistake + | --------- help: undo, undock, undos + | +error[THE_CC]: It appears that a noun is missing after “the”. + --> [path] + | +16 | ... JSON responses that are not present in the [](https://languagetool.org/http-api/sw... + | ^^^ the and + | --- + | +error[CONSECUTIVE_SPACES]: It seems like there are too many consecutive spaces here. + --> [path] + | +16 | ...ON responses that are not present in the [](https://languagetool.org/http-api/swagger-ui/#!/default/) but might be present in some cases. All ... + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Two consecutive spaces + | -------------------------------------------------------------- help: + | +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> [path] + | +20 | ...cases. All added fields are stored in a hashmap as JSON values. ## Related Projects ... + | ^^^^^^^ Possible spelling mistake + | ------- help: hash map + | diff --git a/tests/snapshots/cli__autodetect_typst_file.snap b/tests/snapshots/cli__autodetect_typst_file.snap new file mode 100644 index 0000000..f63887b --- /dev/null +++ b/tests/snapshots/cli__autodetect_typst_file.snap @@ -0,0 +1,74 @@ +--- +source: tests/cli.rs +expression: "String::from_utf8(output.stdout).unwrap()" +--- +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake. ‘behaviour’ is British English. + --> [path] + | +7 | ...ow they contribute to the formation and behaviour of these natural structures. + The cli... + | ^^^^^^^^^ Possible spelling mistake + | --------- help: behavior + | +error[MORFOLOGIK_RULE_EN_US]: Possible spelling mistake found. + --> [path] + | +9 | ...behaviour of these natural structures. + The climate - Temperatre - Precipitation + The topography + The... + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Possible spelling mistake + | ----------------------------- help: Temperature, Temperate + | +error[COMMA_PARENTHESIS_WHITESPACE]: Don’t put a space before the full stop. + --> [path] + | +8 | ...he glacier melting models established in @glacier-melt. bibliography("works.bib") The flow ra... + | ^^^^^^^^^^^^^^^ Use of whitespace before comma and before/after parentheses + | --------------- help: . + | +error[CONSECUTIVE_SPACES]: It seems like there are too many consecutive spaces here. + --> [path] + | +1 | = Code blocks Adding `rbx` to `rcx` gives the desired result. What... + | ^^^^^^^ Two consecutive spaces + | ------- help: + | +error[CONSECUTIVE_SPACES]: It seems like there are too many consecutive spaces here. + --> [path] + | +1 | = Code blocks Adding `rbx` to `rcx` gives the desired result. What is ```ru... + | ^^^^^^^ Two consecutive spaces + | ------- help: + | +error[TO_NON_BASE]: The verb after “to” should be in the base form as part of the to-infinitive. A verb can take many forms, but the base form is always used in the to-infinitive. + --> [path] + | +2 | = Code blocks Adding `rbx` to `rcx` gives the desired result. What is ```rust fn... + | ^^^^^ 'to' + non-base form + | ----- help: give + | +error[CONSECUTIVE_SPACES]: It seems like there are too many consecutive spaces here. + --> [path] + | +4 | ...`rcx` gives the desired result. What is ```rust fn main()``` in Rust would be ```c int main()``` in C... + | ^^^^^^^^^^^^^^^^^^^^^^ Two consecutive spaces + | ---------------------- help: + | +error[CONSECUTIVE_SPACES]: It seems like there are too many consecutive spaces here. + --> [path] + | +5 | ...is ```rust fn main()``` in Rust would be ```c int main()``` in C. ```rust fn main() { println!(... + | ^^^^^^^^^^^^^^^^^^^^ Two consecutive spaces + | -------------------- help: + | +error[CONSECUTIVE_SPACES]: It seems like there are too many consecutive spaces here. + --> [path] + | +8 | ...rintln!("Hello World!"); } ``` This has ``` `backticks` ``` in it (but the spaces are trimmed). And ... + | ^^^^^^^^^^^^^^^^^^^^^ Two consecutive spaces + | --------------------- help: + | +error[COMMA_PARENTHESIS_WHITESPACE]: Don’t put a space before the full stop. + --> [path] + | +18 | ... defined through the recurrence relation $F_n = F_(n-1) + F_(n-2)$. It can also be expressed in _closed for... + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ Use of whitespace before comma and before/after parentheses + | --------------------------- help: . + |