Skip to content

Commit defd1b3

Browse files
committed
rustbuild: Add a link checker for documentation
Add a script to get run which verifies that `href` links in documents are correct. We're always getting a steady stream of "fix a broken link" PRs and issue reports, and we should probably just nip them all in the bud.
1 parent f7b7535 commit defd1b3

File tree

7 files changed

+280
-2
lines changed

7 files changed

+280
-2
lines changed

src/bootstrap/bootstrap.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ def download_rust_nightly(self):
7373

7474
if self.rustc().startswith(self.bin_root()) and \
7575
(not os.path.exists(self.rustc()) or self.rustc_out_of_date()):
76-
shutil.rmtree(self.bin_root())
76+
if os.path.exists(self.bin_root()):
77+
shutil.rmtree(self.bin_root())
7778
filename = "rust-std-nightly-" + self.build + ".tar.gz"
7879
url = "https://static.rust-lang.org/dist/" + self.snap_rustc_date()
7980
tarball = os.path.join(rustc_cache, filename)

src/bootstrap/build/check.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
use std::process::Command;
12+
13+
use build::{Build, Compiler};
14+
15+
pub fn linkcheck(build: &Build, stage: u32, host: &str) {
16+
println!("Linkcheck stage{} ({})", stage, host);
17+
let compiler = Compiler::new(stage, host);
18+
let linkchecker = build.tool(&compiler, "linkchecker");
19+
build.run(Command::new(&linkchecker)
20+
.arg(build.out.join(host).join("doc")));
21+
}

src/bootstrap/build/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ macro_rules! t {
3030

3131
mod cc;
3232
mod channel;
33+
mod check;
3334
mod clean;
3435
mod compile;
3536
mod config;
@@ -171,6 +172,9 @@ impl Build {
171172
Rustc { stage } => {
172173
compile::assemble_rustc(self, stage, target.target);
173174
}
175+
ToolLinkchecker { stage } => {
176+
compile::tool(self, stage, target.target, "linkchecker");
177+
}
174178
ToolRustbook { stage } => {
175179
compile::tool(self, stage, target.target, "rustbook");
176180
}
@@ -195,6 +199,10 @@ impl Build {
195199
doc::rustc(self, stage, target.target, &doc_out);
196200
}
197201

202+
CheckLinkcheck { stage } => {
203+
check::linkcheck(self, stage, target.target);
204+
}
205+
198206
Doc { .. } | // pseudo-steps
199207
Check { .. } => {}
200208
}

src/bootstrap/build/step.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ macro_rules! targets {
4646
}),
4747

4848
// Various tools that we can build as part of the build.
49+
(tool_linkchecker, ToolLinkchecker { stage: u32 }),
4950
(tool_rustbook, ToolRustbook { stage: u32 }),
5051

5152
// Steps for long-running native builds. Ideally these wouldn't
@@ -71,6 +72,7 @@ macro_rules! targets {
7172
// Steps for running tests. The 'check' target is just a pseudo
7273
// target to depend on a bunch of others.
7374
(check, Check { stage: u32, compiler: Compiler<'a> }),
75+
(check_linkcheck, CheckLinkcheck { stage: u32 }),
7476
}
7577
}
7678
}
@@ -200,6 +202,8 @@ fn add_steps<'a>(build: &'a Build,
200202
}
201203

202204
targets!(add_step);
205+
206+
panic!("unknown step: {}", step);
203207
}
204208
}
205209

@@ -273,7 +277,15 @@ impl<'a> Step<'a> {
273277
self.doc_std(stage)]
274278
}
275279
Source::Check { stage, compiler: _ } => {
276-
vec![]
280+
vec![self.check_linkcheck(stage)]
281+
}
282+
Source::CheckLinkcheck { stage } => {
283+
vec![self.tool_linkchecker(stage), self.doc(stage)]
284+
}
285+
286+
Source::ToolLinkchecker { stage } => {
287+
vec![self.libstd(stage, self.compiler(stage))]
288+
}
277289
Source::ToolRustbook { stage } => {
278290
vec![self.librustc(stage, self.compiler(stage))]
279291
}

src/tools/linkchecker/Cargo.lock

Lines changed: 64 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/tools/linkchecker/Cargo.toml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
[package]
2+
name = "linkchecker"
3+
version = "0.1.0"
4+
authors = ["Alex Crichton <alex@alexcrichton.com>"]
5+
6+
[dependencies]
7+
url = "0.5"
8+
9+
[[bin]]
10+
name = "linkchecker"
11+
path = "main.rs"

src/tools/linkchecker/main.rs

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
//! Script to check the validity of `href` links in our HTML documentation.
12+
//!
13+
//! In the past we've been quite error prone to writing in broken links as most
14+
//! of them are manually rather than automatically added. As files move over
15+
//! time or apis change old links become stale or broken. The purpose of this
16+
//! script is to check all relative links in our documentation to make sure they
17+
//! actually point to a valid place.
18+
//!
19+
//! Currently this doesn't actually do any HTML parsing or anything fancy like
20+
//! that, it just has a simple "regex" to search for `href` tags. These values
21+
//! are then translated to file URLs if possible and then the destination is
22+
//! asserted to exist.
23+
//!
24+
//! A few whitelisted exceptions are allowed as there's known bugs in rustdoc,
25+
//! but this should catch the majority of "broken link" cases.
26+
27+
extern crate url;
28+
29+
use std::env;
30+
use std::fs::File;
31+
use std::io::prelude::*;
32+
use std::path::Path;
33+
34+
use url::{Url, UrlParser};
35+
36+
macro_rules! t {
37+
($e:expr) => (match $e {
38+
Ok(e) => e,
39+
Err(e) => panic!("{} failed with {}", stringify!($e), e),
40+
})
41+
}
42+
43+
fn main() {
44+
let docs = env::args().nth(1).unwrap();
45+
let docs = env::current_dir().unwrap().join(docs);
46+
let mut url = Url::from_file_path(&docs).unwrap();
47+
let mut errors = false;
48+
walk(&docs, &docs, &mut url, &mut errors);
49+
if errors {
50+
panic!("found some broken links");
51+
}
52+
}
53+
54+
fn walk(root: &Path, dir: &Path, url: &mut Url, errors: &mut bool) {
55+
for entry in t!(dir.read_dir()).map(|e| t!(e)) {
56+
let path = entry.path();
57+
let kind = t!(entry.file_type());
58+
url.path_mut().unwrap().push(entry.file_name().into_string().unwrap());
59+
if kind.is_dir() {
60+
walk(root, &path, url, errors);
61+
} else {
62+
check(root, &path, url, errors);
63+
}
64+
url.path_mut().unwrap().pop();
65+
}
66+
}
67+
68+
fn check(root: &Path, file: &Path, base: &Url, errors: &mut bool) {
69+
// ignore js files as they are not prone to errors as the rest of the
70+
// documentation is and they otherwise bring up false positives.
71+
if file.extension().and_then(|s| s.to_str()) == Some("js") {
72+
return
73+
}
74+
75+
let pretty_file = file.strip_prefix(root).unwrap_or(file);
76+
77+
// Unfortunately we're not 100% full of valid links today to we need a few
78+
// whitelists to get this past `make check` today.
79+
if let Some(path) = pretty_file.to_str() {
80+
// FIXME(#32129)
81+
if path == "std/string/struct.String.html" {
82+
return
83+
}
84+
// FIXME(#32130)
85+
if path.contains("btree_set/struct.BTreeSet.html") ||
86+
path == "collections/struct.BTreeSet.html" {
87+
return
88+
}
89+
// FIXME(#31948)
90+
if path.contains("ParseFloatError") {
91+
return
92+
}
93+
94+
// currently
95+
if path == "std/sys/ext/index.html" {
96+
return
97+
}
98+
99+
// weird reexports, but this module is on its way out, so chalk it up to
100+
// "rustdoc weirdness" and move on from there
101+
if path.contains("scoped_tls") {
102+
return
103+
}
104+
}
105+
106+
let mut parser = UrlParser::new();
107+
parser.base_url(base);
108+
let mut contents = String::new();
109+
if t!(File::open(file)).read_to_string(&mut contents).is_err() {
110+
return
111+
}
112+
113+
for (i, mut line) in contents.lines().enumerate() {
114+
// Search for anything that's the regex 'href[ ]*=[ ]*".*?"'
115+
while let Some(j) = line.find(" href") {
116+
let rest = &line[j + 5..];
117+
line = rest;
118+
let pos_equals = match rest.find("=") {
119+
Some(i) => i,
120+
None => continue,
121+
};
122+
if rest[..pos_equals].trim_left_matches(" ") != "" {
123+
continue
124+
}
125+
let rest = &rest[pos_equals + 1..];
126+
let pos_quote = match rest.find("\"").or_else(|| rest.find("'")) {
127+
Some(i) => i,
128+
None => continue,
129+
};
130+
if rest[..pos_quote].trim_left_matches(" ") != "" {
131+
continue
132+
}
133+
let rest = &rest[pos_quote + 1..];
134+
let url = match rest.find("\"").or_else(|| rest.find("'")) {
135+
Some(i) => &rest[..i],
136+
None => continue,
137+
};
138+
139+
// Once we've plucked out the URL, parse it using our base url and
140+
// then try to extract a file path. If either if these fail then we
141+
// just keep going.
142+
let parsed_url = match parser.parse(url) {
143+
Ok(url) => url,
144+
Err(..) => continue,
145+
};
146+
let path = match parsed_url.to_file_path() {
147+
Ok(path) => path,
148+
Err(..) => continue,
149+
};
150+
151+
// Alright, if we've found a file name then this file had better
152+
// exist! If it doesn't then we register and print an error.
153+
if !path.exists() {
154+
*errors = true;
155+
print!("{}:{}: broken link - ", pretty_file.display(), i + 1);
156+
let pretty_path = path.strip_prefix(root).unwrap_or(&path);
157+
println!("{}", pretty_path.display());
158+
}
159+
}
160+
}
161+
}

0 commit comments

Comments
 (0)