|
| 1 | +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT |
| 2 | +// file at the top-level directory of this distribution and at |
| 3 | +// http://rust-lang.org/COPYRIGHT. |
| 4 | +// |
| 5 | +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 6 | +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 7 | +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 8 | +// option. This file may not be copied, modified, or distributed |
| 9 | +// except according to those terms. |
| 10 | + |
| 11 | +//! Script to check the validity of `href` links in our HTML documentation. |
| 12 | +//! |
| 13 | +//! In the past we've been quite error prone to writing in broken links as most |
| 14 | +//! of them are manually rather than automatically added. As files move over |
| 15 | +//! time or apis change old links become stale or broken. The purpose of this |
| 16 | +//! script is to check all relative links in our documentation to make sure they |
| 17 | +//! actually point to a valid place. |
| 18 | +//! |
| 19 | +//! Currently this doesn't actually do any HTML parsing or anything fancy like |
| 20 | +//! that, it just has a simple "regex" to search for `href` tags. These values |
| 21 | +//! are then translated to file URLs if possible and then the destination is |
| 22 | +//! asserted to exist. |
| 23 | +//! |
| 24 | +//! A few whitelisted exceptions are allowed as there's known bugs in rustdoc, |
| 25 | +//! but this should catch the majority of "broken link" cases. |
| 26 | +
|
| 27 | +extern crate url; |
| 28 | + |
| 29 | +use std::env; |
| 30 | +use std::fs::File; |
| 31 | +use std::io::prelude::*; |
| 32 | +use std::path::Path; |
| 33 | + |
| 34 | +use url::{Url, UrlParser}; |
| 35 | + |
| 36 | +macro_rules! t { |
| 37 | + ($e:expr) => (match $e { |
| 38 | + Ok(e) => e, |
| 39 | + Err(e) => panic!("{} failed with {}", stringify!($e), e), |
| 40 | + }) |
| 41 | +} |
| 42 | + |
| 43 | +fn main() { |
| 44 | + let docs = env::args().nth(1).unwrap(); |
| 45 | + let docs = env::current_dir().unwrap().join(docs); |
| 46 | + let mut url = Url::from_file_path(&docs).unwrap(); |
| 47 | + let mut errors = false; |
| 48 | + walk(&docs, &docs, &mut url, &mut errors); |
| 49 | + if errors { |
| 50 | + panic!("found some broken links"); |
| 51 | + } |
| 52 | +} |
| 53 | + |
| 54 | +fn walk(root: &Path, dir: &Path, url: &mut Url, errors: &mut bool) { |
| 55 | + for entry in t!(dir.read_dir()).map(|e| t!(e)) { |
| 56 | + let path = entry.path(); |
| 57 | + let kind = t!(entry.file_type()); |
| 58 | + url.path_mut().unwrap().push(entry.file_name().into_string().unwrap()); |
| 59 | + if kind.is_dir() { |
| 60 | + walk(root, &path, url, errors); |
| 61 | + } else { |
| 62 | + check(root, &path, url, errors); |
| 63 | + } |
| 64 | + url.path_mut().unwrap().pop(); |
| 65 | + } |
| 66 | +} |
| 67 | + |
| 68 | +fn check(root: &Path, file: &Path, base: &Url, errors: &mut bool) { |
| 69 | + // ignore js files as they are not prone to errors as the rest of the |
| 70 | + // documentation is and they otherwise bring up false positives. |
| 71 | + if file.extension().and_then(|s| s.to_str()) == Some("js") { |
| 72 | + return |
| 73 | + } |
| 74 | + |
| 75 | + let pretty_file = file.strip_prefix(root).unwrap_or(file); |
| 76 | + |
| 77 | + // Unfortunately we're not 100% full of valid links today to we need a few |
| 78 | + // whitelists to get this past `make check` today. |
| 79 | + if let Some(path) = pretty_file.to_str() { |
| 80 | + // FIXME(#32129) |
| 81 | + if path == "std/string/struct.String.html" { |
| 82 | + return |
| 83 | + } |
| 84 | + // FIXME(#32130) |
| 85 | + if path.contains("btree_set/struct.BTreeSet.html") || |
| 86 | + path == "collections/struct.BTreeSet.html" { |
| 87 | + return |
| 88 | + } |
| 89 | + // FIXME(#31948) |
| 90 | + if path.contains("ParseFloatError") { |
| 91 | + return |
| 92 | + } |
| 93 | + |
| 94 | + // currently |
| 95 | + if path == "std/sys/ext/index.html" { |
| 96 | + return |
| 97 | + } |
| 98 | + |
| 99 | + // weird reexports, but this module is on its way out, so chalk it up to |
| 100 | + // "rustdoc weirdness" and move on from there |
| 101 | + if path.contains("scoped_tls") { |
| 102 | + return |
| 103 | + } |
| 104 | + } |
| 105 | + |
| 106 | + let mut parser = UrlParser::new(); |
| 107 | + parser.base_url(base); |
| 108 | + let mut contents = String::new(); |
| 109 | + if t!(File::open(file)).read_to_string(&mut contents).is_err() { |
| 110 | + return |
| 111 | + } |
| 112 | + |
| 113 | + for (i, mut line) in contents.lines().enumerate() { |
| 114 | + // Search for anything that's the regex 'href[ ]*=[ ]*".*?"' |
| 115 | + while let Some(j) = line.find(" href") { |
| 116 | + let rest = &line[j + 5..]; |
| 117 | + line = rest; |
| 118 | + let pos_equals = match rest.find("=") { |
| 119 | + Some(i) => i, |
| 120 | + None => continue, |
| 121 | + }; |
| 122 | + if rest[..pos_equals].trim_left_matches(" ") != "" { |
| 123 | + continue |
| 124 | + } |
| 125 | + let rest = &rest[pos_equals + 1..]; |
| 126 | + let pos_quote = match rest.find("\"").or_else(|| rest.find("'")) { |
| 127 | + Some(i) => i, |
| 128 | + None => continue, |
| 129 | + }; |
| 130 | + if rest[..pos_quote].trim_left_matches(" ") != "" { |
| 131 | + continue |
| 132 | + } |
| 133 | + let rest = &rest[pos_quote + 1..]; |
| 134 | + let url = match rest.find("\"").or_else(|| rest.find("'")) { |
| 135 | + Some(i) => &rest[..i], |
| 136 | + None => continue, |
| 137 | + }; |
| 138 | + |
| 139 | + // Once we've plucked out the URL, parse it using our base url and |
| 140 | + // then try to extract a file path. If either if these fail then we |
| 141 | + // just keep going. |
| 142 | + let parsed_url = match parser.parse(url) { |
| 143 | + Ok(url) => url, |
| 144 | + Err(..) => continue, |
| 145 | + }; |
| 146 | + let path = match parsed_url.to_file_path() { |
| 147 | + Ok(path) => path, |
| 148 | + Err(..) => continue, |
| 149 | + }; |
| 150 | + |
| 151 | + // Alright, if we've found a file name then this file had better |
| 152 | + // exist! If it doesn't then we register and print an error. |
| 153 | + if !path.exists() { |
| 154 | + *errors = true; |
| 155 | + print!("{}:{}: broken link - ", pretty_file.display(), i + 1); |
| 156 | + let pretty_path = path.strip_prefix(root).unwrap_or(&path); |
| 157 | + println!("{}", pretty_path.display()); |
| 158 | + } |
| 159 | + } |
| 160 | + } |
| 161 | +} |
0 commit comments