Skip to content

Commit 8964f6e

Browse files
maxcnunesmaxclaus
authored andcommitted
Add lint for broken doc links
Fix false positives on broken link detection Refactor variable names Fix doc comment about broken link lint Refactor, remove not used variable Improve broken link to catch more cases and span point to whole link Include reason why a link is considered broken Drop some checker because rustdoc already warn about them Refactor to use a single enum instead of multiple bool variables Fix lint warnings Rename function to collect broken links Warn directly instead of collecting all entries first Iterate directly rather than collecting Temporary change to confirm with code reviewer the next steps Handle broken links as part of the fake_broken_link_callback handler Simplify broken link detection without state machine usage Fix typos Add url check to reduce false positives Drop reason enum as there is only one reason Fix duplicated diagnostics Fix linter
1 parent 0ca62d1 commit 8964f6e

File tree

6 files changed

+229
-7
lines changed

6 files changed

+229
-7
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5571,6 +5571,7 @@ Released 2018-09-13
55715571
[`disallowed_type`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_type
55725572
[`disallowed_types`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_types
55735573
[`diverging_sub_expression`]: https://rust-lang.github.io/rust-clippy/master/index.html#diverging_sub_expression
5574+
[`doc_broken_link`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_broken_link
55745575
[`doc_comment_double_space_linebreaks`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_comment_double_space_linebreaks
55755576
[`doc_include_without_cfg`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_include_without_cfg
55765577
[`doc_lazy_continuation`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_lazy_continuation

clippy_lints/src/declared_lints.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ pub static LINTS: &[&crate::LintInfo] = &[
139139
crate::disallowed_names::DISALLOWED_NAMES_INFO,
140140
crate::disallowed_script_idents::DISALLOWED_SCRIPT_IDENTS_INFO,
141141
crate::disallowed_types::DISALLOWED_TYPES_INFO,
142+
crate::doc::DOC_BROKEN_LINK_INFO,
142143
crate::doc::DOC_COMMENT_DOUBLE_SPACE_LINEBREAKS_INFO,
143144
crate::doc::DOC_INCLUDE_WITHOUT_CFG_INFO,
144145
crate::doc::DOC_LAZY_CONTINUATION_INFO,

clippy_lints/src/doc/broken_link.rs

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
use clippy_utils::diagnostics::span_lint;
2+
use pulldown_cmark::BrokenLink as PullDownBrokenLink;
3+
use rustc_lint::LateContext;
4+
use rustc_resolve::rustdoc::{DocFragment, source_span_for_markdown_range};
5+
use rustc_span::{BytePos, Pos, Span};
6+
7+
use super::DOC_BROKEN_LINK;
8+
9+
/// Scan and report broken link on documents.
10+
/// It ignores false positives detected by `pulldown_cmark`, and only
11+
/// warns users when the broken link is consider a URL.
12+
// NOTE: We don't check these other cases because
13+
// rustdoc itself will check and warn about it:
14+
// - When a link url is broken across multiple lines in the URL path part
15+
// - When a link tag is missing the close parenthesis character at the end.
16+
// - When a link has whitespace within the url link.
17+
pub fn check(cx: &LateContext<'_>, bl: &PullDownBrokenLink<'_>, doc: &str, fragments: &[DocFragment]) {
18+
warn_if_broken_link(cx, bl, doc, fragments);
19+
}
20+
21+
fn warn_if_broken_link(cx: &LateContext<'_>, bl: &PullDownBrokenLink<'_>, doc: &str, fragments: &[DocFragment]) {
22+
if let Some(span) = source_span_for_markdown_range(cx.tcx, doc, &bl.span, fragments) {
23+
let mut len = 0;
24+
25+
// grab raw link data
26+
let (_, raw_link) = doc.split_at(bl.span.start);
27+
28+
// strip off link text part
29+
let raw_link = match raw_link.split_once(']') {
30+
None => return,
31+
Some((prefix, suffix)) => {
32+
len += prefix.len() + 1;
33+
suffix
34+
},
35+
};
36+
37+
let raw_link = match raw_link.split_once('(') {
38+
None => return,
39+
Some((prefix, suffix)) => {
40+
if !prefix.is_empty() {
41+
// there is text between ']' and '(' chars, so it is not a valid link
42+
return;
43+
}
44+
len += prefix.len() + 1;
45+
suffix
46+
},
47+
};
48+
49+
if raw_link.starts_with("(http") {
50+
// reduce chances of false positive reports
51+
// by limiting this checking only to http/https links.
52+
return;
53+
}
54+
55+
for c in raw_link.chars() {
56+
if c == ')' {
57+
// it is a valid link
58+
return;
59+
}
60+
61+
if c == '\n' {
62+
report_broken_link(cx, span, len);
63+
break;
64+
}
65+
66+
len += 1;
67+
}
68+
}
69+
}
70+
71+
fn report_broken_link(cx: &LateContext<'_>, frag_span: Span, offset: usize) {
72+
let start = frag_span.lo();
73+
let end = start + BytePos::from_usize(offset);
74+
75+
let span = Span::new(start, end, frag_span.ctxt(), frag_span.parent());
76+
77+
span_lint(
78+
cx,
79+
DOC_BROKEN_LINK,
80+
span,
81+
"possible broken doc link: broken across multiple lines",
82+
);
83+
}

clippy_lints/src/doc/mod.rs

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use rustc_span::edition::Edition;
2525
use std::ops::Range;
2626
use url::Url;
2727

28+
mod broken_link;
2829
mod doc_comment_double_space_linebreaks;
2930
mod include_in_doc_without_cfg;
3031
mod lazy_continuation;
@@ -292,6 +293,34 @@ declare_clippy_lint! {
292293
"possible typo for an intra-doc link"
293294
}
294295

296+
declare_clippy_lint! {
297+
/// ### What it does
298+
/// Checks the doc comments have unbroken links, mostly caused
299+
/// by bad formatted links such as broken across multiple lines.
300+
///
301+
/// ### Why is this bad?
302+
/// Because documentation generated by rustdoc will be broken
303+
/// since expected links won't be links and just text.
304+
///
305+
/// ### Examples
306+
/// This link is broken:
307+
/// ```no_run
308+
/// /// [example of a bad link](https://
309+
/// /// github.com/rust-lang/rust-clippy/)
310+
/// pub fn do_something() {}
311+
/// ```
312+
///
313+
/// It shouldn't be broken across multiple lines to work:
314+
/// ```no_run
315+
/// /// [example of a good link](https://github.com/rust-lang/rust-clippy/)
316+
/// pub fn do_something() {}
317+
/// ```
318+
#[clippy::version = "1.84.0"]
319+
pub DOC_BROKEN_LINK,
320+
pedantic,
321+
"broken document link"
322+
}
323+
295324
declare_clippy_lint! {
296325
/// ### What it does
297326
/// Checks for the doc comments of publicly visible
@@ -625,6 +654,7 @@ impl Documentation {
625654
impl_lint_pass!(Documentation => [
626655
DOC_LINK_CODE,
627656
DOC_LINK_WITH_QUOTES,
657+
DOC_BROKEN_LINK,
628658
DOC_MARKDOWN,
629659
DOC_NESTED_REFDEFS,
630660
MISSING_SAFETY_DOC,
@@ -751,9 +781,9 @@ struct DocHeaders {
751781
/// back in the various late lint pass methods if they need the final doc headers, like "Safety" or
752782
/// "Panics" sections.
753783
fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet<String>, attrs: &[Attribute]) -> Option<DocHeaders> {
754-
/// We don't want the parser to choke on intra doc links. Since we don't
755-
/// actually care about rendering them, just pretend that all broken links
756-
/// point to a fake address.
784+
// We don't want the parser to choke on intra doc links. Since we don't
785+
// actually care about rendering them, just pretend that all broken links
786+
// point to a fake address.
757787
#[expect(clippy::unnecessary_wraps)] // we're following a type signature
758788
fn fake_broken_link_callback<'a>(_: BrokenLink<'_>) -> Option<(CowStr<'a>, CowStr<'a>)> {
759789
Some(("fake".into(), "fake".into()))
@@ -793,14 +823,12 @@ fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet<String>, attrs: &[
793823
return Some(DocHeaders::default());
794824
}
795825

796-
let mut cb = fake_broken_link_callback;
797-
798826
check_for_code_clusters(
799827
cx,
800828
pulldown_cmark::Parser::new_with_broken_link_callback(
801829
&doc,
802830
main_body_opts() - Options::ENABLE_SMART_PUNCTUATION,
803-
Some(&mut cb),
831+
Some(&mut fake_broken_link_callback),
804832
)
805833
.into_offset_iter(),
806834
&doc,
@@ -810,9 +838,17 @@ fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet<String>, attrs: &[
810838
},
811839
);
812840

841+
// NOTE: check_doc uses it own cb function,
842+
// to avoid causing duplicated diagnostics for the broken link checker.
843+
let mut full_fake_broken_link_callback = |bl: BrokenLink<'_>| -> Option<(CowStr<'_>, CowStr<'_>)> {
844+
broken_link::check(cx, &bl, &doc, &fragments);
845+
Some(("fake".into(), "fake".into()))
846+
};
847+
813848
// disable smart punctuation to pick up ['link'] more easily
814849
let opts = main_body_opts() - Options::ENABLE_SMART_PUNCTUATION;
815-
let parser = pulldown_cmark::Parser::new_with_broken_link_callback(&doc, opts, Some(&mut cb));
850+
let parser =
851+
pulldown_cmark::Parser::new_with_broken_link_callback(&doc, opts, Some(&mut full_fake_broken_link_callback));
816852

817853
Some(check_doc(
818854
cx,

tests/ui/doc_broken_link.rs

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#![warn(clippy::doc_broken_link)]
2+
3+
fn main() {}
4+
5+
pub struct FakeType {}
6+
7+
/// This might be considered a link false positive
8+
/// and should be ignored by this lint rule:
9+
/// Example of referencing some code with brackets [FakeType].
10+
pub fn doc_ignore_link_false_positive_1() {}
11+
12+
/// This might be considered a link false positive
13+
/// and should be ignored by this lint rule:
14+
/// [`FakeType`]. Continue text after brackets,
15+
/// then (something in
16+
/// parenthesis).
17+
pub fn doc_ignore_link_false_positive_2() {}
18+
19+
/// Test valid link, whole link single line.
20+
/// [doc valid link](https://test.fake/doc_valid_link)
21+
pub fn doc_valid_link() {}
22+
23+
/// Test valid link, whole link single line but it has special chars such as brackets and
24+
/// parenthesis. [doc invalid link url invalid char](https://test.fake/doc_valid_link_url_invalid_char?foo[bar]=1&bar(foo)=2)
25+
pub fn doc_valid_link_url_invalid_char() {}
26+
27+
/// Test valid link, text tag broken across multiple lines.
28+
/// [doc valid link broken
29+
/// text](https://test.fake/doc_valid_link_broken_text)
30+
pub fn doc_valid_link_broken_text() {}
31+
32+
/// Test valid link, url tag broken across multiple lines, but
33+
/// the whole url part in a single line.
34+
/// [doc valid link broken url tag two lines first](https://test.fake/doc_valid_link_broken_url_tag_two_lines_first
35+
/// )
36+
pub fn doc_valid_link_broken_url_tag_two_lines_first() {}
37+
38+
/// Test valid link, url tag broken across multiple lines, but
39+
/// the whole url part in a single line.
40+
/// [doc valid link broken url tag two lines second](
41+
/// https://test.fake/doc_valid_link_broken_url_tag_two_lines_second)
42+
pub fn doc_valid_link_broken_url_tag_two_lines_second() {}
43+
44+
/// Test valid link, url tag broken across multiple lines, but
45+
/// the whole url part in a single line, but the closing pharentesis
46+
/// in a third line.
47+
/// [doc valid link broken url tag three lines](
48+
/// https://test.fake/doc_valid_link_broken_url_tag_three_lines
49+
/// )
50+
pub fn doc_valid_link_broken_url_tag_three_lines() {}
51+
52+
/// Test invalid link, url part broken across multiple lines.
53+
/// [doc invalid link broken url scheme part](https://
54+
/// test.fake/doc_invalid_link_broken_url_scheme_part)
55+
//~^^ ERROR: possible broken doc link: broken across multiple lines
56+
pub fn doc_invalid_link_broken_url_scheme_part() {}
57+
58+
/// Test invalid link, url part broken across multiple lines.
59+
/// [doc invalid link broken url host part](https://test
60+
/// .fake/doc_invalid_link_broken_url_host_part)
61+
//~^^ ERROR: possible broken doc link: broken across multiple lines
62+
pub fn doc_invalid_link_broken_url_host_part() {}
63+
64+
/// Test invalid link, for multiple urls in the same block of comment.
65+
/// There is a [fist link - invalid](https://test
66+
/// .fake) then it continues
67+
//~^^ ERROR: possible broken doc link: broken across multiple lines
68+
/// with a [second link - valid](https://test.fake/doc_valid_link) and another [third link - invalid](https://test
69+
/// .fake). It ends with another
70+
//~^^ ERROR: possible broken doc link: broken across multiple lines
71+
/// line of comment.
72+
pub fn doc_multiple_invalid_link_broken_url() {}

tests/ui/doc_broken_link.stderr

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
error: possible broken doc link: broken across multiple lines
2+
--> tests/ui/doc_broken_link.rs:53:5
3+
|
4+
LL | /// [doc invalid link broken url scheme part](https://
5+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
6+
|
7+
= note: `-D clippy::doc-broken-link` implied by `-D warnings`
8+
= help: to override `-D warnings` add `#[allow(clippy::doc_broken_link)]`
9+
10+
error: possible broken doc link: broken across multiple lines
11+
--> tests/ui/doc_broken_link.rs:59:5
12+
|
13+
LL | /// [doc invalid link broken url host part](https://test
14+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
15+
16+
error: possible broken doc link: broken across multiple lines
17+
--> tests/ui/doc_broken_link.rs:65:16
18+
|
19+
LL | /// There is a [fist link - invalid](https://test
20+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
21+
22+
error: possible broken doc link: broken across multiple lines
23+
--> tests/ui/doc_broken_link.rs:68:80
24+
|
25+
LL | /// with a [second link - valid](https://test.fake/doc_valid_link) and another [third link - invalid](https://test
26+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
27+
28+
error: aborting due to 4 previous errors
29+

0 commit comments

Comments
 (0)