Skip to content

Commit

Permalink
make tidy fast without compromising case alternation
Browse files Browse the repository at this point in the history
  • Loading branch information
donno2048 committed Jul 7, 2024
1 parent 0ca92de commit 7b9e5cc
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 33 deletions.
46 changes: 27 additions & 19 deletions src/tools/tidy/src/style.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
// ignore-tidy-dbg

use crate::walk::{filter_dirs, walk};
use regex::RegexSet;
use regex::RegexSetBuilder;
use rustc_hash::FxHashMap;
use std::{ffi::OsStr, path::Path};
use std::{ffi::OsStr, path::Path, sync::LazyLock};

#[cfg(test)]
mod tests;
Expand Down Expand Up @@ -110,16 +110,29 @@ const ROOT_PROBLEMATIC_CONSTS: &[u32] = &[
173390526, 721077,
];

const LETTER_DIGIT: &[(char, char)] = &[('A', '4'), ('B', '8'), ('E', '3')];

// Returns all permutations of problematic consts, over 2000 elements.
fn generate_problematic_strings(
consts: &[u32],
letter_digit: &FxHashMap<char, char>,
) -> Vec<String> {
generate_problems(consts, letter_digit)
.flat_map(|v| vec![v.to_string(), format!("{:x}", v), format!("{:X}", v)])
.flat_map(|v| vec![v.to_string(), format!("{:X}", v)])
.collect()
}

static PROBLEMATIC_CONSTS_STRINGS: LazyLock<Vec<String>> = LazyLock::new(|| {
generate_problematic_strings(
ROOT_PROBLEMATIC_CONSTS,
&FxHashMap::from_iter(LETTER_DIGIT.iter().copied()),
)
});

fn contains_problematic_const(trimmed: &str) -> bool {
PROBLEMATIC_CONSTS_STRINGS.iter().any(|s| trimmed.to_uppercase().contains(s))
}

const INTERNAL_COMPILER_DOCS_LINE: &str = "#### This error code is internal to the compiler and will not be emitted with normal Rust code.";

/// Parser states for `line_is_url`.
Expand Down Expand Up @@ -316,14 +329,13 @@ pub fn check(path: &Path, bad: &mut bool) {
// We only check CSS files in rustdoc.
path.extension().map_or(false, |e| e == "css") && !is_in(path, "src", "librustdoc")
}
let problematic_consts_strings = generate_problematic_strings(
ROOT_PROBLEMATIC_CONSTS,
&[('A', '4'), ('B', '8'), ('E', '3')].iter().cloned().collect(),
);

// This creates a RegexSet as regex contains performance optimizations to be able to deal with these over
// 2000 needles efficiently. This runs over the entire source code, so performance matters.
let problematic_regex = RegexSet::new(problematic_consts_strings.as_slice()).unwrap();

let problematic_regex = RegexSetBuilder::new(PROBLEMATIC_CONSTS_STRINGS.as_slice())
.case_insensitive(true)
.build()
.unwrap();
walk(path, skip, &mut |entry, contents| {
let file = entry.path();
let filename = file.file_name().unwrap().to_string_lossy();
Expand Down Expand Up @@ -389,10 +401,10 @@ pub fn check(path: &Path, bad: &mut bool) {
let mut lines = 0;
let mut last_safety_comment = false;
let mut comment_block: Option<(usize, usize)> = None;
let is_test = file.components().any(|c| c.as_os_str() == "tests");
let is_test = file.components().any(|c| c.as_os_str() == "tests") || file.file_stem().unwrap() == "tests";
let problematic = filename != "style.rs" && !is_test && problematic_regex.is_match(contents);
// scanning the whole file for multiple needles at once is more efficient than
// executing lines times needles separate searches.
let any_problematic_line = problematic_regex.is_match(contents);
for (i, line) in contents.split('\n').enumerate() {
if line.is_empty() {
if i == 0 {
Expand Down Expand Up @@ -462,13 +474,9 @@ pub fn check(path: &Path, bad: &mut bool) {
if trimmed.contains("//") && trimmed.contains(" XXX") {
err("Instead of XXX use FIXME")
}
if any_problematic_line {
for s in problematic_consts_strings.iter() {
if trimmed.contains(s) {
err("Don't use magic numbers that spell things (consider 0x12345678)");
}
}
}
}
if problematic && contains_problematic_const(trimmed) {
err("Don't use magic numbers that spell things (consider 0x12345678)");
}
// for now we just check libcore
if trimmed.contains("unsafe {")
Expand Down Expand Up @@ -613,4 +621,4 @@ pub fn check(path: &Path, bad: &mut bool) {
let _unused = skip_line_length;
let _unused = skip_file_length;
})
}
}
21 changes: 7 additions & 14 deletions src/tools/tidy/src/style/tests.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
use super::*;

#[test]
fn test_generate_problematic_strings() {
let problematic_regex = RegexSet::new(
generate_problematic_strings(
ROOT_PROBLEMATIC_CONSTS,
&[('A', '4'), ('B', '8'), ('E', '3'), ('0', 'F')].iter().cloned().collect(), // use "futile" F intentionally
)
.as_slice(),
)
.unwrap();
assert!(problematic_regex.is_match("786357")); // check with no "decimal" hex digits - converted to integer
assert!(problematic_regex.is_match("589701")); // check with "decimal" replacements - converted to integer
assert!(problematic_regex.is_match("8FF85")); // check for hex display
assert!(!problematic_regex.is_match("1193046")); // check for non-matching value
}
fn test_contains_problematic_const() {
assert!(contains_problematic_const("721077")); // check with no "decimal" hex digits - converted to integer
assert!(contains_problematic_const("524421")); // check with "decimal" replacements - converted to integer
assert!(contains_problematic_const(&(285 * 281).to_string())); // check for hex display
assert!(contains_problematic_const(&format!("{:x}B5", 2816))); // check for case-alternating hex display
assert!(!contains_problematic_const("1193046")); // check for non-matching value
}

0 comments on commit 7b9e5cc

Please sign in to comment.