diff --git a/src/bootstrap/src/core/build_steps/check.rs b/src/bootstrap/src/core/build_steps/check.rs index 3278b55305c8b..efdb6c993b5ac 100644 --- a/src/bootstrap/src/core/build_steps/check.rs +++ b/src/bootstrap/src/core/build_steps/check.rs @@ -628,3 +628,9 @@ tool_check_step!(CoverageDump { mode: Mode::ToolBootstrap, default: false }); + +tool_check_step!(Linkchecker { + path: "src/tools/linkchecker", + mode: Mode::ToolBootstrap, + default: false +}); diff --git a/src/bootstrap/src/core/builder/mod.rs b/src/bootstrap/src/core/builder/mod.rs index 1b75d00b30e4c..504a6890906f2 100644 --- a/src/bootstrap/src/core/builder/mod.rs +++ b/src/bootstrap/src/core/builder/mod.rs @@ -1030,6 +1030,7 @@ impl<'a> Builder<'a> { check::Compiletest, check::FeaturesStatusDump, check::CoverageDump, + check::Linkchecker, // This has special staging logic, it may run on stage 1 while others run on stage 0. // It takes quite some time to build stage 1, so put this at the end. // diff --git a/src/tools/linkchecker/Cargo.toml b/src/tools/linkchecker/Cargo.toml index 7123d43eb564c..fb5bff3fe63ff 100644 --- a/src/tools/linkchecker/Cargo.toml +++ b/src/tools/linkchecker/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "linkchecker" version = "0.1.0" -edition = "2021" +edition = "2024" [[bin]] name = "linkchecker" diff --git a/src/tools/linkchecker/main.rs b/src/tools/linkchecker/main.rs index 84cba3f8c4473..7c2cee2e32e63 100644 --- a/src/tools/linkchecker/main.rs +++ b/src/tools/linkchecker/main.rs @@ -17,12 +17,14 @@ //! should catch the majority of "broken link" cases. use std::cell::{Cell, RefCell}; +use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; +use std::fs; use std::io::ErrorKind; +use std::iter::once; use std::path::{Component, Path, PathBuf}; use std::rc::Rc; use std::time::Instant; -use std::{env, fs}; use html5ever::tendril::ByteTendril; use html5ever::tokenizer::{ @@ -110,10 +112,25 @@ macro_rules! t { }; } +struct Cli { + docs: PathBuf, + link_targets_dirs: Vec, +} + fn main() { - let docs = env::args_os().nth(1).expect("doc path should be first argument"); - let docs = env::current_dir().unwrap().join(docs); - let mut checker = Checker { root: docs.clone(), cache: HashMap::new() }; + let cli = match parse_cli() { + Ok(cli) => cli, + Err(err) => { + eprintln!("error: {err}"); + usage_and_exit(1); + } + }; + + let mut checker = Checker { + root: cli.docs.clone(), + link_targets_dirs: cli.link_targets_dirs, + cache: HashMap::new(), + }; let mut report = Report { errors: 0, start: Instant::now(), @@ -125,7 +142,7 @@ fn main() { intra_doc_exceptions: 0, has_broken_urls: false, }; - checker.walk(&docs, &mut report); + checker.walk(&cli.docs, &mut report); report.report(); if report.errors != 0 { println!("found some broken links"); @@ -133,8 +150,50 @@ fn main() { } } +fn parse_cli() -> Result { + fn to_canonical_path(arg: &str) -> Result { + PathBuf::from(arg).canonicalize().map_err(|e| format!("could not canonicalize {arg}: {e}")) + } + + let mut verbatim = false; + let mut docs = None; + let mut link_targets_dirs = Vec::new(); + + let mut args = std::env::args().skip(1); + while let Some(arg) = args.next() { + if !verbatim && arg == "--" { + verbatim = true; + } else if !verbatim && (arg == "-h" || arg == "--help") { + usage_and_exit(0) + } else if !verbatim && arg == "--link-targets-dir" { + link_targets_dirs.push(to_canonical_path( + &args.next().ok_or("missing value for --link-targets-dir")?, + )?); + } else if !verbatim && let Some(value) = arg.strip_prefix("--link-targets-dir=") { + link_targets_dirs.push(to_canonical_path(value)?); + } else if !verbatim && arg.starts_with('-') { + return Err(format!("unknown flag: {arg}")); + } else if docs.is_none() { + docs = Some(arg); + } else { + return Err("too many positional arguments".into()); + } + } + + Ok(Cli { + docs: to_canonical_path(&docs.ok_or("missing first positional argument")?)?, + link_targets_dirs, + }) +} + +fn usage_and_exit(code: i32) -> ! { + eprintln!("usage: linkchecker PATH [--link-targets-dir=PATH ...]"); + std::process::exit(code) +} + struct Checker { root: PathBuf, + link_targets_dirs: Vec, cache: Cache, } @@ -427,15 +486,23 @@ impl Checker { let pretty_path = file.strip_prefix(&self.root).unwrap_or(file).to_str().unwrap().to_string(); - let entry = - self.cache.entry(pretty_path.clone()).or_insert_with(|| match fs::metadata(file) { + for base in once(&self.root).chain(self.link_targets_dirs.iter()) { + let entry = self.cache.entry(pretty_path.clone()); + if let Entry::Occupied(e) = &entry + && !matches!(e.get(), FileEntry::Missing) + { + break; + } + + let file = base.join(&pretty_path); + entry.insert_entry(match fs::metadata(&file) { Ok(metadata) if metadata.is_dir() => FileEntry::Dir, Ok(_) => { if file.extension().and_then(|s| s.to_str()) != Some("html") { FileEntry::OtherFile } else { report.html_files += 1; - load_html_file(file, report) + load_html_file(&file, report) } } Err(e) if e.kind() == ErrorKind::NotFound => FileEntry::Missing, @@ -451,6 +518,9 @@ impl Checker { panic!("unexpected read error for {}: {}", file.display(), e); } }); + } + + let entry = self.cache.get(&pretty_path).unwrap(); (pretty_path, entry) } }