Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make tidy fast without compromising case alternation #127457

Merged
merged 1 commit into from
Jul 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 29 additions & 18 deletions src/tools/tidy/src/style.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
// ignore-tidy-dbg

use crate::walk::{filter_dirs, walk};
use regex::RegexSet;
use regex::RegexSetBuilder;
use rustc_hash::FxHashMap;
use std::{ffi::OsStr, path::Path};
use std::{ffi::OsStr, path::Path, sync::LazyLock};

#[cfg(test)]
mod tests;
Expand Down Expand Up @@ -110,16 +110,26 @@ const ROOT_PROBLEMATIC_CONSTS: &[u32] = &[
173390526, 721077,
];

const LETTER_DIGIT: &[(char, char)] = &[('A', '4'), ('B', '8'), ('E', '3')];

// Returns all permutations of problematic consts, over 2000 elements.
fn generate_problematic_strings(
consts: &[u32],
letter_digit: &FxHashMap<char, char>,
) -> Vec<String> {
generate_problems(consts, letter_digit)
.flat_map(|v| vec![v.to_string(), format!("{:x}", v), format!("{:X}", v)])
.flat_map(|v| vec![v.to_string(), format!("{:X}", v)])
.collect()
}

static PROBLEMATIC_CONSTS_STRINGS: LazyLock<Vec<String>> = LazyLock::new(|| {
generate_problematic_strings(ROOT_PROBLEMATIC_CONSTS, &LETTER_DIGIT.iter().cloned().collect())
});

fn contains_problematic_const(trimmed: &str) -> bool {
PROBLEMATIC_CONSTS_STRINGS.iter().any(|s| trimmed.to_uppercase().contains(s))
}

const INTERNAL_COMPILER_DOCS_LINE: &str = "#### This error code is internal to the compiler and will not be emitted with normal Rust code.";

/// Parser states for `line_is_url`.
Expand Down Expand Up @@ -316,14 +326,14 @@ pub fn check(path: &Path, bad: &mut bool) {
// We only check CSS files in rustdoc.
path.extension().map_or(false, |e| e == "css") && !is_in(path, "src", "librustdoc")
}
let problematic_consts_strings = generate_problematic_strings(
ROOT_PROBLEMATIC_CONSTS,
&[('A', '4'), ('B', '8'), ('E', '3')].iter().cloned().collect(),
);

// This creates a RegexSet as regex contains performance optimizations to be able to deal with these over
// 2000 needles efficiently. This runs over the entire source code, so performance matters.
let problematic_regex = RegexSet::new(problematic_consts_strings.as_slice()).unwrap();

let problematic_regex = RegexSetBuilder::new(PROBLEMATIC_CONSTS_STRINGS.as_slice())
.case_insensitive(true)
.build()
.unwrap();
let style_file = Path::new(file!());
walk(path, skip, &mut |entry, contents| {
let file = entry.path();
let filename = file.file_name().unwrap().to_string_lossy();
Expand Down Expand Up @@ -389,10 +399,15 @@ pub fn check(path: &Path, bad: &mut bool) {
let mut lines = 0;
let mut last_safety_comment = false;
let mut comment_block: Option<(usize, usize)> = None;
let is_test = file.components().any(|c| c.as_os_str() == "tests");
let is_test = file.components().any(|c| c.as_os_str() == "tests")
|| file.file_stem().unwrap() == "tests";
let is_style = file.ends_with(style_file) || style_file.ends_with(file);
let is_style_test =
is_test && file.parent().unwrap().ends_with(style_file.with_extension(""));
// scanning the whole file for multiple needles at once is more efficient than
// executing lines times needles separate searches.
let any_problematic_line = problematic_regex.is_match(contents);
let any_problematic_line =
!is_style && !is_style_test && problematic_regex.is_match(contents);
for (i, line) in contents.split('\n').enumerate() {
if line.is_empty() {
if i == 0 {
Expand Down Expand Up @@ -451,7 +466,7 @@ pub fn check(path: &Path, bad: &mut bool) {
if line.contains('\r') {
suppressible_tidy_err!(err, skip_cr, "CR character");
}
if filename != "style.rs" {
if !is_style {
// Allow using TODO in diagnostic suggestions by marking the
// relevant line with `// ignore-tidy-todo`.
if trimmed.contains("TODO") && !trimmed.contains("ignore-tidy-todo") {
Expand All @@ -462,12 +477,8 @@ pub fn check(path: &Path, bad: &mut bool) {
if trimmed.contains("//") && trimmed.contains(" XXX") {
err("Instead of XXX use FIXME")
}
if any_problematic_line {
for s in problematic_consts_strings.iter() {
if trimmed.contains(s) {
err("Don't use magic numbers that spell things (consider 0x12345678)");
}
}
if any_problematic_line && contains_problematic_const(trimmed) {
err("Don't use magic numbers that spell things (consider 0x12345678)");
}
}
// for now we just check libcore
Expand Down
19 changes: 6 additions & 13 deletions src/tools/tidy/src/style/tests.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
use super::*;

#[test]
fn test_generate_problematic_strings() {
let problematic_regex = RegexSet::new(
generate_problematic_strings(
ROOT_PROBLEMATIC_CONSTS,
&[('A', '4'), ('B', '8'), ('E', '3'), ('0', 'F')].iter().cloned().collect(), // use "futile" F intentionally
)
.as_slice(),
)
.unwrap();
assert!(problematic_regex.is_match("786357")); // check with no "decimal" hex digits - converted to integer
assert!(problematic_regex.is_match("589701")); // check with "decimal" replacements - converted to integer
assert!(problematic_regex.is_match("8FF85")); // check for hex display
assert!(!problematic_regex.is_match("1193046")); // check for non-matching value
fn test_contains_problematic_const() {
assert!(contains_problematic_const("721077")); // check with no "decimal" hex digits - converted to integer
assert!(contains_problematic_const("524421")); // check with "decimal" replacements - converted to integer
assert!(contains_problematic_const(&(285 * 281).to_string())); // check for hex display
assert!(contains_problematic_const(&format!("{:x}B5", 2816))); // check for case-alternating hex display
assert!(!contains_problematic_const("1193046")); // check for non-matching value
}
Loading