diff --git a/casr/Cargo.toml b/casr/Cargo.toml index e8241fbc..bf5898ee 100644 --- a/casr/Cargo.toml +++ b/casr/Cargo.toml @@ -11,6 +11,7 @@ license = "Apache-2.0" exclude = ["/tests"] [dependencies] +shell-words = "1.1" anyhow = "1.0" clap = { version = "4.2", features = ["wrap_help", "cargo"] } chrono = "0.4" @@ -23,6 +24,7 @@ gdb-command = "0.7" nix = "0.26" rayon = "1.7" num_cpus = "1.15" +is_executable = "1.0" linux-personality = "1.0" colored = "2.0" serde = { version = "1.0", features = ["derive"] } diff --git a/casr/src/bin/casr-afl.rs b/casr/src/bin/casr-afl.rs index 829a5a4e..44514962 100644 --- a/casr/src/bin/casr-afl.rs +++ b/casr/src/bin/casr-afl.rs @@ -1,94 +1,16 @@ +use casr::triage::{fuzzing_crash_triage_pipeline, CrashInfo}; use casr::util; -use anyhow::{bail, Context, Result}; +use anyhow::Result; use clap::{ error::{ContextKind, ContextValue, ErrorKind}, Arg, ArgAction, }; -use log::{debug, error, info, warn}; -use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; -use walkdir::WalkDir; +use log::error; use std::collections::HashMap; use std::fs; use std::path::{Path, PathBuf}; -use std::process::Command; -use std::sync::RwLock; - -#[derive(Debug, Clone, Default)] -/// Information about crash to reproduce it. -struct AflCrashInfo { - /// Path to crash input. - pub path: PathBuf, - /// Target command line args. - pub target_args: Vec, - /// Input file argument index starting from argv\[1\], None for stdin. - pub at_index: Option, - /// ASAN. - pub is_asan: bool, -} - -impl<'a> AflCrashInfo { - /// Generate Casr report for crash. - /// - /// # Arguments - /// - /// * `output_dir` - save report to specified directory or use the same directory as crash - /// - /// * `timeout` - target program timeout (in seconds) - pub fn run_casr>>( - &self, - output_dir: T, - timeout: u64, - ) -> anyhow::Result<()> { - let mut args: Vec = vec!["-o".to_string()]; - let report_path = if let Some(out) = output_dir.into() { - out.join(self.path.file_name().unwrap()) - } else { - self.path.clone() - }; - if self.is_asan { - args.push(format!("{}.casrep", report_path.display())); - } else { - args.push(format!("{}.gdb.casrep", report_path.display())); - } - - if self.at_index.is_none() { - args.push("--stdin".to_string()); - args.push(self.path.to_str().unwrap().to_string()); - } - if timeout != 0 { - args.append(&mut vec!["-t".to_string(), timeout.to_string()]); - } - args.push("--".to_string()); - args.extend_from_slice(&self.target_args); - if let Some(at_index) = self.at_index { - let input = args[at_index + 4].replace("@@", self.path.to_str().unwrap()); - args[at_index + 4] = input; - } - - let tool = if self.is_asan { "casr-san" } else { "casr-gdb" }; - let mut casr_cmd = Command::new(tool); - casr_cmd.args(&args); - debug!("{:?}", casr_cmd); - - // Get output - let casr_output = casr_cmd - .output() - .with_context(|| format!("Couldn't launch {casr_cmd:?}"))?; - - if !casr_output.status.success() { - let err = String::from_utf8_lossy(&casr_output.stderr); - if err.contains("Program terminated (no crash)") { - warn!("{}: no crash on input {}", tool, self.path.display()); - } else { - error!("{} for input: {}", err.trim(), self.path.display()); - } - } - - Ok(()) - } -} fn main() -> Result<()> { let matches = clap::Command::new("casr-afl") @@ -115,9 +37,10 @@ fn main() -> Result<()> { .short('t') .long("timeout") .action(ArgAction::Set) + .default_value("0") .value_name("SECONDS") - .help("Timeout (in seconds) for target execution [default: disabled]") - .value_parser(clap::value_parser!(u64).range(1..)) + .help("Timeout (in seconds) for target execution, 0 value means that timeout is disabled") + .value_parser(clap::value_parser!(u64).range(0..)) ) .arg( Arg::new("input") @@ -171,25 +94,11 @@ fn main() -> Result<()> { // Init log. util::initialize_logging(&matches); - // Get output dir - let output_dir = matches.get_one::("output").unwrap(); - if !output_dir.exists() { - fs::create_dir_all(output_dir).with_context(|| { - format!("Couldn't create output directory {}", output_dir.display()) - })?; - } else if output_dir.read_dir()?.next().is_some() { - bail!("Output directory is not empty."); - } - - // Get optional gdb fuzz target args. - let gdb_argv: Vec = if let Some(argvs) = matches.get_many::("ARGS") { - argvs.cloned().collect() - } else { - Vec::new() - }; + let casr_san = util::get_path("casr-san")?; + let casr_gdb = util::get_path("casr-gdb")?; // Get all crashes. - let mut crashes: HashMap = HashMap::new(); + let mut crashes: HashMap = HashMap::new(); for node_dir in fs::read_dir(matches.get_one::("input").unwrap())? { let path = node_dir?.path(); if !path.is_dir() { @@ -197,7 +106,10 @@ fn main() -> Result<()> { } // Get crashes from one node. - let mut crash_info = AflCrashInfo::default(); + let mut crash_info = casr::triage::CrashInfo { + casr_tool: casr_gdb.clone(), + ..Default::default() + }; let cmdline_path = path.join("cmdline"); if let Ok(cmdline) = fs::read_to_string(&cmdline_path) { crash_info.target_args = cmdline.split_whitespace().map(|s| s.to_string()).collect(); @@ -205,26 +117,20 @@ fn main() -> Result<()> { .target_args .iter() .skip(1) - .position(|s| s.contains("@@")); + .position(|s| s.contains("@@")) + .map(|x| x + 1); if let Some(target) = crash_info.target_args.first() { - if let Ok(buffer) = fs::read(Path::new(target)) { - if let Ok(elf) = goblin::elf::Elf::parse(&buffer) { - for sym in elf.syms.iter() { - if let Some(name) = elf.strtab.get_at(sym.st_name) { - if name.contains("__asan") { - crash_info.is_asan = true; - break; - } - } + match util::symbols_list(Path::new(target)) { + Ok(list) => { + if list.contains("__asan") { + crash_info.casr_tool = casr_san.clone() } - } else { - error!("Fuzz target: {} must be an ELF executable.", target); + } + Err(e) => { + error!("{e}"); continue; } - } else { - error!("Couldn't read fuzz target binary: {}.", target); - continue; } } else { error!("{} is empty.", cmdline); @@ -251,219 +157,12 @@ fn main() -> Result<()> { } } - // Get timeout - let timeout = if let Some(timeout) = matches.get_one::("timeout") { - *timeout + let gdb_args = if let Some(argv) = matches.get_many::("ARGS") { + argv.cloned().collect() } else { - 0 - }; - - // Get number of threads - let jobs = if let Some(jobs) = matches.get_one::("jobs") { - *jobs as usize - } else { - std::cmp::max(1, num_cpus::get() / 2) + Vec::new() }; - let num_of_threads = jobs.min(crashes.len()).max(1) + 1; - let custom_pool = rayon::ThreadPoolBuilder::new() - .num_threads(num_of_threads) - .build() - .unwrap(); - - // Generate CASR reports. - info!("Generating CASR reports..."); - info!("Using {} threads", num_of_threads - 1); - let counter = RwLock::new(0_usize); - let total = crashes.len(); - custom_pool - .join( - || { - crashes.par_iter().try_for_each(|(_, crash)| { - if let Err(e) = crash.run_casr(output_dir.as_path(), timeout) { - // Disable util::log_progress - *counter.write().unwrap() = total; - bail!(e); - }; - *counter.write().unwrap() += 1; - Ok::<(), anyhow::Error>(()) - }) - }, - || util::log_progress(&counter, total), - ) - .0?; - - // Deduplicate reports. - if output_dir.read_dir()?.count() < 2 { - info!("There are less than 2 CASR reports, nothing to deduplicate."); - return summarize_results(output_dir, &crashes, &gdb_argv, num_of_threads, timeout); - } - info!("Deduplicating CASR reports..."); - let casr_cluster_d = Command::new("casr-cluster") - .arg("-d") - .arg(output_dir.clone().into_os_string()) - .output() - .with_context(|| "Couldn't launch casr-cluster".to_string())?; - - if casr_cluster_d.status.success() { - info!( - "{}", - String::from_utf8_lossy(&casr_cluster_d.stdout) - .lines() - .collect::>() - .join(". ") - ); - } else { - bail!("{}", String::from_utf8_lossy(&casr_cluster_d.stderr)); - } - - if !matches.get_flag("no-cluster") { - if output_dir - .read_dir()? - .flatten() - .map(|e| e.path()) - .filter(|e| e.extension().is_some() && e.extension().unwrap() == "casrep") - .count() - < 2 - { - info!("There are less than 2 CASR reports, nothing to cluster."); - return summarize_results(output_dir, &crashes, &gdb_argv, num_of_threads, timeout); - } - info!("Clustering CASR reports..."); - let casr_cluster_c = Command::new("casr-cluster") - .arg("-c") - .arg(output_dir.clone().into_os_string()) - .output() - .with_context(|| "Couldn't launch casr-cluster".to_string())?; - - if casr_cluster_c.status.success() { - info!( - "{}", - String::from_utf8_lossy(&casr_cluster_c.stdout).trim_end() - ); - } else { - error!("{}", String::from_utf8_lossy(&casr_cluster_c.stderr)); - } - - // Remove reports from deduplication phase. They are in clusters now. - for casrep in fs::read_dir(output_dir)?.flatten().map(|e| e.path()) { - if let Some(ext) = casrep.extension() { - if ext == "casrep" { - let _ = fs::remove_file(casrep); - } - } - } - } - - summarize_results(output_dir, &crashes, &gdb_argv, num_of_threads, timeout) -} - -/// Copy crashes next to reports and print summary. -/// Run casr-gdb on uninstrumented binary if specified in ARGS. -/// -/// # Arguments -/// -/// * `dir` - directory with casr reports -/// -/// * `crashes` - crashes info -/// -/// * `gdb_args` - run casr-gdb on uninstrumented binary if specified -/// -/// * `jobs` - number of threads for casr-gdb reports generation -/// -/// * `timeout` - target program timeout -fn summarize_results( - dir: &Path, - crashes: &HashMap, - gdb_args: &Vec, - jobs: usize, - timeout: u64, -) -> Result<()> { - // Copy crashes next to reports - copy_crashes(dir, crashes)?; - - if !gdb_args.is_empty() { - // Run casr-gdb on uninstrumented binary. - let crashes: Vec<_> = WalkDir::new(dir) - .into_iter() - .flatten() - .map(|e| e.into_path()) - .filter(|e| e.is_file()) - .filter(|e| e.extension().is_none() || e.extension().unwrap() != "casrep") - .filter(|e| !Path::new(format!("{}.gdb.casrep", e.display()).as_str()).exists()) - .collect(); - let num_of_threads = jobs.min(crashes.len() + 1); - if num_of_threads > 1 { - info!("casr-gdb: adding crash reports..."); - info!("Using {} threads", num_of_threads - 1); - let counter = RwLock::new(0_usize); - let total = crashes.len(); - let custom_pool = rayon::ThreadPoolBuilder::new() - .num_threads(num_of_threads) - .build() - .unwrap(); - let at_index = gdb_args.iter().skip(1).position(|s| s.contains("@@")); - custom_pool - .join( - || { - crashes.par_iter().try_for_each(|crash| { - if let Err(e) = (AflCrashInfo { - path: crash.to_path_buf(), - target_args: gdb_args.clone(), - at_index, - is_asan: false, - }) - .run_casr(None, timeout) - { - // Disable util::log_progress - *counter.write().unwrap() = total; - bail!(e); - }; - *counter.write().unwrap() += 1; - Ok::<(), anyhow::Error>(()) - }) - }, - || util::log_progress(&counter, total), - ) - .0?; - } - } - - // Print summary - let status = Command::new("casr-cli") - .arg(dir) - .stderr(std::process::Stdio::inherit()) - .stdout(std::process::Stdio::inherit()) - .status() - .with_context(|| "Couldn't launch casr-cli".to_string())?; - - if !status.success() { - error!("casr-cli exited with status {status}"); - } - - Ok(()) -} - -/// Copy recursively crash inputs next to casr reports -/// -/// # Arguments -/// -/// `dir` - directory with casr reports -/// `crashes` - crashes info -fn copy_crashes(dir: &Path, crashes: &HashMap) -> Result<()> { - for e in fs::read_dir(dir)?.flatten().map(|x| x.path()) { - if e.is_dir() && e.file_name().unwrap().to_str().unwrap().starts_with("cl") { - copy_crashes(&e, crashes)?; - } else if e.is_file() && e.extension().is_some() && e.extension().unwrap() == "casrep" { - let mut e = e.with_extension(""); - if e.extension().is_some() && e.extension().unwrap() == "gdb" { - e = e.with_extension(""); - } - let fname = e.file_name().unwrap().to_str().unwrap(); - if let Some(crash) = crashes.get(fname) { - let _ = fs::copy(&crash.path, e); - } - } - } - Ok(()) + // Generate reports + fuzzing_crash_triage_pipeline(&matches, &crashes, &gdb_args) } diff --git a/casr/src/bin/casr-cli.rs b/casr/src/bin/casr-cli.rs index 0fda29bc..b921dda8 100644 --- a/casr/src/bin/casr-cli.rs +++ b/casr/src/bin/casr-cli.rs @@ -945,11 +945,6 @@ fn print_summary(dir: &Path, unique_crash_line: bool) { } else { println!("{} ->{}", "SUMMARY".magenta(), classes); } - - if !corrupted_reports.is_empty() { - println!("{} reports were found:", "Corrupted".red()); - corrupted_reports.iter().for_each(|x| println!("{x}")); - } } /// Function processes report and returns summary diff --git a/casr/src/bin/casr-gdb.rs b/casr/src/bin/casr-gdb.rs index 1acafe50..5befc064 100644 --- a/casr/src/bin/casr-gdb.rs +++ b/casr/src/bin/casr-gdb.rs @@ -65,9 +65,10 @@ fn main() -> Result<()> { .short('t') .long("timeout") .action(ArgAction::Set) + .default_value("0") .value_name("SECONDS") - .help("Timeout (in seconds) for target execution [default: disabled]") - .value_parser(clap::value_parser!(u64).range(1..)) + .help("Timeout (in seconds) for target execution, 0 value means that timeout is disabled") + .value_parser(clap::value_parser!(u64).range(0..)) ) .arg( Arg::new("ignore") @@ -94,11 +95,7 @@ fn main() -> Result<()> { }; // Get timeout - let timeout = if let Some(timeout) = matches.get_one::("timeout") { - *timeout - } else { - 0 - }; + let timeout = *matches.get_one::("timeout").unwrap(); init_ignored_frames!("cpp", "rust"); if let Some(path) = matches.get_one::("ignore") { diff --git a/casr/src/bin/casr-java.rs b/casr/src/bin/casr-java.rs index 1e69ebc1..0fe1c08b 100644 --- a/casr/src/bin/casr-java.rs +++ b/casr/src/bin/casr-java.rs @@ -53,9 +53,10 @@ fn main() -> Result<()> { .short('t') .long("timeout") .action(ArgAction::Set) + .default_value("0") .value_name("SECONDS") - .help("Timeout (in seconds) for target execution [default: disabled]") - .value_parser(clap::value_parser!(u64).range(1..)) + .help("Timeout (in seconds) for target execution, 0 value means that timeout is disabled") + .value_parser(clap::value_parser!(u64).range(0..)) ) .arg( Arg::new("ignore") @@ -65,17 +66,6 @@ fn main() -> Result<()> { .value_name("FILE") .help("File with regular expressions for functions and file paths that should be ignored"), ) - .arg( - Arg::new("sub-tool") - .long("sub-tool") - .default_value("casr-san") - .action(ArgAction::Set) - .value_parser(clap::value_parser!(PathBuf)) - .value_name("PATH") - .help( - "Path to sub tool for crash analysis that will be called when main tool fails to detect a crash", - ), - ) .arg( Arg::new("ARGS") .action(ArgAction::Set) @@ -100,11 +90,7 @@ fn main() -> Result<()> { let stdin_file = util::stdin_from_matches(&matches)?; // Get timeout - let timeout = if let Some(timeout) = matches.get_one::("timeout") { - *timeout - } else { - 0 - }; + let timeout = *matches.get_one::("timeout").unwrap(); // Run program. let mut java_cmd = Command::new(argv[0]); @@ -160,8 +146,8 @@ fn main() -> Result<()> { report.execution_class = exception; } } else { - // Call sub tool - return util::call_sub_tool(&matches, &argv, "casr-java"); + // Call casr-san + return util::call_casr_san(&matches, &argv, "casr-java"); } if let Ok(crash_line) = JavaStacktrace::parse_stacktrace(&report.stacktrace)?.crash_line() { diff --git a/casr/src/bin/casr-libfuzzer.rs b/casr/src/bin/casr-libfuzzer.rs index 61d3670c..465a4657 100644 --- a/casr/src/bin/casr-libfuzzer.rs +++ b/casr/src/bin/casr-libfuzzer.rs @@ -1,17 +1,15 @@ +use casr::triage::{fuzzing_crash_triage_pipeline, CrashInfo}; use casr::util; -use anyhow::{bail, Context, Result}; +use anyhow::{bail, Result}; use clap::{ error::{ContextKind, ContextValue, ErrorKind}, Arg, ArgAction, }; -use log::{debug, error, info, warn}; -use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; +use std::collections::HashMap; use std::fs; use std::path::{Path, PathBuf}; -use std::process::{Command, Stdio}; -use std::sync::RwLock; fn main() -> Result<()> { let matches = clap::Command::new("casr-libfuzzer") @@ -38,9 +36,10 @@ fn main() -> Result<()> { .short('t') .long("timeout") .action(ArgAction::Set) + .default_value("0") .value_name("SECONDS") - .help("Timeout (in seconds) for target execution [default: disabled]") - .value_parser(clap::value_parser!(u64).range(1..)) + .help("Timeout (in seconds) for target execution, 0 means that timeout is disabled") + .value_parser(clap::value_parser!(u64).range(0..)) ) .arg( Arg::new("input") @@ -81,6 +80,12 @@ fn main() -> Result<()> { .long("no-cluster") .help("Do not cluster CASR reports") ) + .arg( + Arg::new("casr-gdb-args") + .long("casr-gdb-args") + .action(ArgAction::Set) + .help("Add \"--casr-gdb-args \'./gdb_fuzz_target \'\" to generate additional crash reports with casr-gdb (e.g., test whether program crashes without sanitizers)"), + ) .arg( Arg::new("ARGS") .action(ArgAction::Set) @@ -96,239 +101,63 @@ fn main() -> Result<()> { // Get input dir let input_dir = matches.get_one::("input").unwrap().as_path(); - // Get output dir - let output_dir = matches.get_one::("output").unwrap(); - if !output_dir.exists() { - fs::create_dir_all(output_dir).with_context(|| { - format!("Couldn't create output directory {}", output_dir.display()) - })?; - } else if output_dir.read_dir()?.next().is_some() { - bail!("Output directory is not empty."); - } - // Get fuzz target args. - let argv: Vec<&str> = if let Some(argvs) = matches.get_many::("ARGS") { + let mut argv: Vec<&str> = if let Some(argvs) = matches.get_many::("ARGS") { argvs.map(|v| v.as_str()).collect() } else { bail!("Invalid fuzz target arguments"); }; + let at_index = if let Some(idx) = argv.iter().skip(1).position(|s| s.contains("@@")) { + idx + 1 + } else { + argv.push("@@"); + argv.len() - 1 + }; - let mut atheris_asan_lib = String::new(); - if argv[0].ends_with(".py") { - // Get Atheris asan_with_fuzzer library path. - let mut cmd = Command::new("python3"); - cmd.arg("-c") - .arg("import atheris; print(atheris.path(), end='')") - .stdout(Stdio::piped()) - .stderr(Stdio::piped()); - let output = cmd - .output() - .with_context(|| format!("Couldn't launch {cmd:?}"))?; - let out = String::from_utf8_lossy(&output.stdout); - let err = String::from_utf8_lossy(&output.stderr); - if !err.is_empty() { - bail!("Failed to get Atheris path: {}", err); + let mut envs = HashMap::new(); + + let tool = if argv[0].ends_with(".py") { + envs.insert("LD_PRELOAD".to_string(), util::get_atheris_lib()?); + "casr-python" + } else if argv[0].ends_with("jazzer") || argv[0].ends_with("java") { + "casr-java" + } else { + let sym_list = util::symbols_list(Path::new(argv[0]))?; + if sym_list.contains("__asan") || sym_list.contains("runtime.go") { + "casr-san" + } else { + "casr-gdb" } - atheris_asan_lib = format!("{out}/asan_with_fuzzer.so"); - } + }; + let tool = util::get_path(tool)?; // Get all crashes. - let crashes: Vec<_> = fs::read_dir(input_dir)? + let crashes: HashMap = fs::read_dir(input_dir)? .flatten() .map(|p| p.path()) .filter(|p| p.is_file()) - .map(|p| { + .map(|p| (p.file_name().unwrap().to_str().unwrap().to_string(), p)) + .filter(|(fname, _)| fname.starts_with("crash-") || fname.starts_with("leak-")) + .map(|(fname, p)| { ( - p.clone(), - p.file_name().unwrap().to_str().unwrap().to_string(), + fname, + CrashInfo { + path: p, + target_args: argv.iter().map(|x| x.to_string()).collect(), + envs: envs.clone(), + at_index: Some(at_index), + casr_tool: tool.clone(), + }, ) }) - .filter(|(_, fname)| fname.starts_with("crash-") || fname.starts_with("leak-")) .collect(); - // Get timeout - let timeout = if let Some(timeout) = matches.get_one::("timeout") { - *timeout - } else { - 0 - }; - - // Get number of threads - let jobs = if let Some(jobs) = matches.get_one::("jobs") { - *jobs as usize - } else { - std::cmp::max(1, num_cpus::get() / 2) - }; - let num_of_threads = jobs.min(crashes.len()).max(1) + 1; - let custom_pool = rayon::ThreadPoolBuilder::new() - .num_threads(num_of_threads) - .build() - .unwrap(); - - // Generate CASR reports. - info!("Generating CASR reports..."); - info!("Using {} threads", num_of_threads - 1); - let counter = RwLock::new(0_usize); - let total = crashes.len(); - let tool = if !atheris_asan_lib.is_empty() { - "casr-python" - } else if argv[0].ends_with("jazzer") || argv[0].ends_with("java") { - "casr-java" + let gdb_args = if let Some(argv) = matches.get_one::("casr-gdb-args") { + shell_words::split(argv)? } else { - "casr-san" + Vec::new() }; - custom_pool - .join( - || { - if let Err(e) = crashes.par_iter().try_for_each(|(crash, fname)| { - let mut casr_cmd = Command::new(tool); - if timeout != 0 { - casr_cmd.args(["-t".to_string(), timeout.to_string()]); - } - casr_cmd.args([ - "-o", - format!("{}.casrep", output_dir.join(fname).display()).as_str(), - "--", - ]); - if !atheris_asan_lib.is_empty() { - casr_cmd.arg("python3"); - casr_cmd.env("LD_PRELOAD", &atheris_asan_lib); - } - casr_cmd.args(argv.clone()); - casr_cmd.arg(crash); - debug!("{:?}", casr_cmd); - - // Get output - let casr_output = casr_cmd - .output() - .with_context(|| format!("Couldn't launch {casr_cmd:?}"))?; - - if !casr_output.status.success() { - let err = String::from_utf8_lossy(&casr_output.stderr); - if err.contains("Program terminated (no crash)") { - warn!("{}: no crash on input {}", tool, crash.display()); - } else { - error!("{} for input: {}", err.trim(), crash.display()); - } - } - *counter.write().unwrap() += 1; - Ok::<(), anyhow::Error>(()) - }) { - // Disable util::log_progress - *counter.write().unwrap() = total; - bail!(e); - }; - Ok(()) - }, - || util::log_progress(&counter, total), - ) - .0?; - - // Deduplicate reports. - if output_dir.read_dir()?.count() < 2 { - info!("There are less than 2 CASR reports, nothing to deduplicate."); - return summarize_results(input_dir, output_dir); - } - info!("Deduplicating CASR reports..."); - let casr_cluster_d = Command::new("casr-cluster") - .arg("-d") - .arg(output_dir.clone().into_os_string()) - .output() - .with_context(|| "Couldn't launch casr-cluster".to_string())?; - - if casr_cluster_d.status.success() { - info!( - "{}", - String::from_utf8_lossy(&casr_cluster_d.stdout) - .lines() - .collect::>() - .join(". ") - ); - } else { - bail!("{}", String::from_utf8_lossy(&casr_cluster_d.stderr)); - } - - if !matches.get_flag("no-cluster") { - if output_dir - .read_dir()? - .flatten() - .map(|e| e.path()) - .filter(|e| e.extension().is_some() && e.extension().unwrap() == "casrep") - .count() - < 2 - { - info!("There are less than 2 CASR reports, nothing to cluster."); - return summarize_results(input_dir, output_dir); - } - info!("Clustering CASR reports..."); - let casr_cluster_c = Command::new("casr-cluster") - .arg("-c") - .arg(output_dir.clone().into_os_string()) - .output() - .with_context(|| "Couldn't launch casr-cluster".to_string())?; - - if casr_cluster_c.status.success() { - info!( - "{}", - String::from_utf8_lossy(&casr_cluster_c.stdout).trim_end() - ); - } else { - error!("{}", String::from_utf8_lossy(&casr_cluster_c.stderr)); - } - - // Remove reports from deduplication phase. They are in clusters now. - for casrep in fs::read_dir(output_dir)?.flatten().map(|e| e.path()) { - if let Some(ext) = casrep.extension() { - if ext == "casrep" { - let _ = fs::remove_file(casrep); - } - } - } - } - - summarize_results(input_dir, output_dir) -} - -/// Copy crashes next to reports and print summary. -/// -/// # Arguments -/// -/// `input` - directory containing crashes found by libFuzzer -/// `output` - output directory with triaged reports -fn summarize_results(input: &Path, output: &Path) -> Result<()> { - // Copy crashes next to reports - copy_crashes(input, output)?; - - // Print summary - let status = Command::new("casr-cli") - .arg(output) - .stderr(std::process::Stdio::inherit()) - .stdout(std::process::Stdio::inherit()) - .status() - .with_context(|| "Couldn't launch casr-cli".to_string())?; - - if !status.success() { - error!("casr-cli exited with status {status}"); - } - - Ok(()) -} - -/// Copy recursively crash inputs next to casr reports -/// -/// # Arguments -/// -/// `input` - directory containing crashes found by libFuzzer -/// `output` - output directory with triaged reports -fn copy_crashes(input: &Path, output: &Path) -> Result<()> { - for e in fs::read_dir(output)?.flatten().map(|x| x.path()) { - if e.is_dir() && e.file_name().unwrap().to_str().unwrap().starts_with("cl") { - copy_crashes(input, &e)?; - } else if e.is_file() && e.extension().is_some() && e.extension().unwrap() == "casrep" { - let e = e.with_extension(""); - let _ = fs::copy(input.join(e.file_name().unwrap()), e); - } - } - Ok(()) + // Generate reports + fuzzing_crash_triage_pipeline(&matches, &crashes, &gdb_args) } diff --git a/casr/src/bin/casr-python.rs b/casr/src/bin/casr-python.rs index a4102533..a69ecbf5 100644 --- a/casr/src/bin/casr-python.rs +++ b/casr/src/bin/casr-python.rs @@ -52,9 +52,10 @@ fn main() -> Result<()> { .short('t') .long("timeout") .action(ArgAction::Set) + .default_value("0") .value_name("SECONDS") - .help("Timeout (in seconds) for target execution [default: disabled]") - .value_parser(clap::value_parser!(u64).range(1..)) + .help("Timeout (in seconds) for target execution, 0 value means that timeout is disabled") + .value_parser(clap::value_parser!(u64).range(0..)) ) .arg( Arg::new("ignore") @@ -64,17 +65,6 @@ fn main() -> Result<()> { .value_name("FILE") .help("File with regular expressions for functions and file paths that should be ignored"), ) - .arg( - Arg::new("sub-tool") - .long("sub-tool") - .default_value("casr-san") - .action(ArgAction::Set) - .value_parser(clap::value_parser!(PathBuf)) - .value_name("PATH") - .help( - "Path to sub tool for crash analysis that will be called when main tool fails to detect a crash", - ), - ) .arg( Arg::new("ARGS") .action(ArgAction::Set) @@ -99,11 +89,7 @@ fn main() -> Result<()> { let stdin_file = util::stdin_from_matches(&matches)?; // Get timeout - let timeout = if let Some(timeout) = matches.get_one::("timeout") { - *timeout - } else { - 0 - }; + let timeout = *matches.get_one::("timeout").unwrap(); // Run program. let mut python_cmd = Command::new(argv[0]); @@ -163,8 +149,8 @@ fn main() -> Result<()> { } } } else { - // Call sub tool - return util::call_sub_tool(&matches, &argv, "casr-python"); + // Call casr-san + return util::call_casr_san(&matches, &argv, "casr-python"); } } else if let Some(report_start) = python_stderr_list .iter() @@ -185,8 +171,8 @@ fn main() -> Result<()> { report.execution_class = exception; } } else { - // Call sub tool - return util::call_sub_tool(&matches, &argv, "casr-python"); + // Call casr-san + return util::call_casr_san(&matches, &argv, "casr-python"); } if let Ok(crash_line) = PythonStacktrace::parse_stacktrace(&report.stacktrace)?.crash_line() { diff --git a/casr/src/bin/casr-san.rs b/casr/src/bin/casr-san.rs index c4a591d6..7316124d 100644 --- a/casr/src/bin/casr-san.rs +++ b/casr/src/bin/casr-san.rs @@ -65,9 +65,10 @@ fn main() -> Result<()> { .short('t') .long("timeout") .action(ArgAction::Set) + .default_value("0") .value_name("SECONDS") - .help("Timeout (in seconds) for target execution [default: disabled]") - .value_parser(clap::value_parser!(u64).range(1..)) + .help("Timeout (in seconds) for target execution, 0 value means that timeout is disabled") + .value_parser(clap::value_parser!(u64).range(0..)) ) .arg( Arg::new("ignore") @@ -102,11 +103,7 @@ fn main() -> Result<()> { let stdin_file = util::stdin_from_matches(&matches)?; // Get timeout - let timeout = if let Some(timeout) = matches.get_one::("timeout") { - *timeout - } else { - 0 - }; + let timeout = *matches.get_one::("timeout").unwrap(); // Set rss limit. if let Ok(asan_options_str) = env::var("ASAN_OPTIONS") { diff --git a/casr/src/bin/casr-ubsan.rs b/casr/src/bin/casr-ubsan.rs index ea07c1a8..80823055 100644 --- a/casr/src/bin/casr-ubsan.rs +++ b/casr/src/bin/casr-ubsan.rs @@ -235,9 +235,10 @@ fn main() -> Result<()> { .short('t') .long("timeout") .action(ArgAction::Set) + .default_value("0") .value_name("SECONDS") - .help("Timeout (in seconds) for target execution [default: disabled]") - .value_parser(clap::value_parser!(u64).range(1..)) + .help("Timeout (in seconds) for target execution, 0 value means that timeout is disabled") + .value_parser(clap::value_parser!(u64).range(0..)) ) .arg( Arg::new("input") @@ -307,11 +308,7 @@ fn main() -> Result<()> { }; // Get timeout - let timeout = if let Some(timeout) = matches.get_one::("timeout") { - *timeout - } else { - 0 - }; + let timeout = *matches.get_one::("timeout").unwrap(); // Get input path list let mut inputs: Vec = vec![]; diff --git a/casr/src/lib.rs b/casr/src/lib.rs index 32bcaa32..4906dc2b 100644 --- a/casr/src/lib.rs +++ b/casr/src/lib.rs @@ -7,4 +7,5 @@ //! Enable `dojo` feature to build `casr-dojo` that can upload new and unique //! CASR reports to [DefectDojo](https://github.com/DefectDojo/django-DefectDojo). +pub mod triage; pub mod util; diff --git a/casr/src/triage.rs b/casr/src/triage.rs new file mode 100644 index 00000000..9f95f778 --- /dev/null +++ b/casr/src/triage.rs @@ -0,0 +1,438 @@ +//! Post-fuzzing crash analysis module: create, deduplicate, cluster CASR reports +//! and print overall summary. +use crate::util::{get_path, initialize_dirs, log_progress}; + +use std::collections::HashMap; +use std::fs; +use std::os::fd::AsFd; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::sync::RwLock; + +use anyhow::{bail, Context, Result}; +use log::{debug, error, info, warn}; +use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; +use walkdir::WalkDir; + +#[derive(Debug, Clone, Default)] +/// Information about crash to reproduce it. +pub struct CrashInfo { + /// Path to crash input. + pub path: PathBuf, + /// Target command line args. + pub target_args: Vec, + /// Target environment variables. + pub envs: HashMap, + /// Input file argument index starting from argv\[1\], None for stdin. + pub at_index: Option, + /// Casr tool that should be run on this crash. + pub casr_tool: PathBuf, +} + +impl<'a> CrashInfo { + /// Generate Casr report for crash. + /// + /// # Arguments + /// + /// * `output_dir` - save report to specified directory or use the same directory as crash + /// + /// * `timeout` - target program timeout (in seconds) + pub fn run_casr>>(&self, output_dir: T, timeout: u64) -> Result<()> { + let tool = &self.casr_tool; + let tool_name = tool.file_name().unwrap().to_str().unwrap(); + let mut args: Vec = vec!["-o".to_string()]; + let (report_path, output_dir) = if let Some(out) = output_dir.into() { + (out.join(self.path.file_name().unwrap()), out) + } else { + (self.path.clone(), self.path.parent().unwrap()) + }; + if tool_name.eq("casr-gdb") { + args.push(format!("{}.gdb.casrep", report_path.display())); + } else { + args.push(format!("{}.casrep", report_path.display())); + } + if self.at_index.is_none() { + args.push("--stdin".to_string()); + args.push(self.path.to_str().unwrap().to_string()); + } + if timeout != 0 { + args.push("-t".to_string()); + args.push(timeout.to_string()); + } + args.push("--".to_string()); + if tool_name.eq("casr-python") { + args.push("python3".to_string()); + } + let offset = args.len(); + args.extend_from_slice(&self.target_args); + if let Some(at_index) = self.at_index { + let input = args[at_index + offset].replace("@@", self.path.to_str().unwrap()); + args[at_index + offset] = input; + } + + let mut casr_cmd = Command::new(tool); + casr_cmd.args(&args); + casr_cmd.envs(&self.envs); + + // Add envs + if self.target_args.iter().any(|x| x.eq("-detect_leaks=0")) { + let asan_options = std::env::var("ASAN_OPTIONS").unwrap_or(String::new()); + casr_cmd.env( + "ASAN_OPTIONS", + if asan_options.is_empty() { + "detect_leaks=0".to_string() + } else { + format!("{asan_options},detect_leaks=0",) + }, + ); + } + + debug!("{:?}", casr_cmd); + + // Get output + let casr_output = casr_cmd + .output() + .with_context(|| format!("Couldn't launch {casr_cmd:?}"))?; + + if !casr_output.status.success() { + let err = String::from_utf8_lossy(&casr_output.stderr); + if err.contains("Timeout") { + let mut timeout_name = self + .path + .file_name() + .unwrap() + .to_os_string() + .into_string() + .unwrap(); + if let Some(idx) = timeout_name.find('-') { + timeout_name.replace_range(..idx, "timeout"); + } + let timeout_path = output_dir.join("timeout").join(timeout_name); + if fs::copy(&self.path, timeout_path).is_err() { + error!("Error occurred while copying the file: {:?}", self.path); + } + } else if err.contains("Out of memory") { + let mut oom_name = self + .path + .file_name() + .unwrap() + .to_os_string() + .into_string() + .unwrap(); + if let Some(idx) = oom_name.find('-') { + oom_name.replace_range(..idx, "oom"); + } + let oom_path = output_dir.join("oom").join(oom_name); + if fs::copy(&self.path, oom_path).is_err() { + error!("Error occurred while copying the file: {:?}", self.path); + } + } else if err.contains("Program terminated (no crash)") { + warn!("{}: No crash on input {}", tool_name, self.path.display()); + } else { + error!("{} for input: {}", err.trim(), self.path.display()); + } + } + + Ok(()) + } +} + +/// Perform crash analysis pipeline: Create, deduplicate and cluster CASR reports. +/// +/// # Arguments +/// +/// * `matches` - casr-afl/casr-libfuzzer arguments +/// +/// * `crashes` - map of crashes, specified as a HashMap, where +/// key is crash input file name and value is CrashInfo structure +/// +/// * `gdb_args` - casr-gdb target arguments. If they are empty, casr-gdb won't be launched. +pub fn fuzzing_crash_triage_pipeline( + matches: &clap::ArgMatches, + crashes: &HashMap, + gdb_args: &Vec, +) -> Result<()> { + // Get casr-cluster path + let casr_cluster = get_path("casr-cluster")?; + + if crashes.is_empty() { + bail!("No crashes found"); + } + + let output_dir = initialize_dirs(matches)?; + + // Get timeout + let timeout = *matches.get_one::("timeout").unwrap(); + + // Get number of threads + let jobs = if let Some(jobs) = matches.get_one::("jobs") { + *jobs as usize + } else { + std::cmp::max(1, num_cpus::get() / 2) + }; + let num_of_threads = jobs.min(crashes.len()).max(1) + 1; + let custom_pool = rayon::ThreadPoolBuilder::new() + .num_threads(num_of_threads) + .build() + .unwrap(); + + info!("Analyzing {} files...", crashes.len()); + if timeout != 0 { + info!("Timeout for target execution is {timeout} seconds"); + } + // Generate CASR reports. + info!("Generating CASR reports..."); + info!("Using {} threads", num_of_threads - 1); + let counter = RwLock::new(0_usize); + let total = crashes.len(); + custom_pool + .join( + || { + crashes.par_iter().try_for_each(|(_, crash)| { + if let Err(e) = crash.run_casr(output_dir.as_path(), timeout) { + // Disable util::log_progress + *counter.write().unwrap() = total; + bail!(e); + }; + *counter.write().unwrap() += 1; + Ok::<(), anyhow::Error>(()) + }) + }, + || log_progress(&counter, total), + ) + .0?; + + // Deduplicate reports. + if output_dir.read_dir()?.count() < 2 { + info!("There are less than 2 CASR reports, nothing to deduplicate."); + return summarize_results(matches, crashes, gdb_args); + } + info!("Deduplicating CASR reports..."); + let casr_cluster_d = Command::new(&casr_cluster) + .arg("-d") + .arg(output_dir.clone().into_os_string()) + .output() + .with_context(|| format!("Couldn't launch {casr_cluster:?}"))?; + + if casr_cluster_d.status.success() { + info!( + "{}", + String::from_utf8_lossy(&casr_cluster_d.stdout) + .lines() + .collect::>() + .join(". ") + ); + } else { + bail!("{}", String::from_utf8_lossy(&casr_cluster_d.stderr)); + } + + if !matches.get_flag("no-cluster") { + if output_dir + .read_dir()? + .flatten() + .map(|e| e.path()) + .filter(|e| e.extension().is_some() && e.extension().unwrap() == "casrep") + .count() + < 2 + { + info!("There are less than 2 CASR reports, nothing to cluster."); + return summarize_results(matches, crashes, gdb_args); + } + info!("Clustering CASR reports..."); + let casr_cluster_c = Command::new(&casr_cluster) + .arg("-c") + .arg(output_dir.clone().into_os_string()) + .output() + .with_context(|| format!("Couldn't launch {casr_cluster:?}"))?; + + if casr_cluster_c.status.success() { + info!( + "{}", + String::from_utf8_lossy(&casr_cluster_c.stdout).trim_end() + ); + } else { + error!("{}", String::from_utf8_lossy(&casr_cluster_c.stderr)); + } + + // Remove reports from deduplication phase. They are in clusters now. + for casrep in fs::read_dir(output_dir)?.flatten().map(|e| e.path()) { + if let Some(ext) = casrep.extension() { + if ext == "casrep" { + let _ = fs::remove_file(casrep); + } + } + } + } + + summarize_results(matches, crashes, gdb_args) +} + +/// Copy crashes next to reports and print summary. +/// Run casr-gdb on uninstrumented binary if specified in ARGS. +/// Print analysis statistic. +/// +/// # Arguments +/// +/// * `matches` - tool arguments +/// +/// * `crashes` - set of crashes, specified as a CrashInfo structure +/// +/// * `gdb_args` - casr-gdb target arguments. If they are empty, casr-gdb won't be launched. +fn summarize_results( + matches: &clap::ArgMatches, + crashes: &HashMap, + gdb_args: &Vec, +) -> Result<()> { + // Get output dir + let dir = matches.get_one::("output").unwrap(); + // Copy crashes next to reports + copy_crashes(dir, crashes)?; + + // Get timeout + let timeout = *matches.get_one::("timeout").unwrap(); + + // Get number of threads + let jobs = if let Some(jobs) = matches.get_one::("jobs") { + *jobs as usize + } else { + std::cmp::max(1, num_cpus::get() / 2) + }; + + if !gdb_args.is_empty() { + let casr_gdb = get_path("casr-gdb")?; + // Run casr-gdb on uninstrumented binary. + let crashes: Vec<_> = WalkDir::new(dir) + .into_iter() + .filter_entry(|e| { + let name = e.file_name().to_str().unwrap(); + !name.eq("oom") && !name.eq("timeout") + }) + .flatten() + .map(|e| e.into_path()) + .filter(|e| e.is_file()) + .filter(|e| e.extension().is_none() || e.extension().unwrap() != "casrep") + .filter(|e| !Path::new(format!("{}.gdb.casrep", e.display()).as_str()).exists()) + .collect(); + if !crashes.is_empty() { + let num_of_threads = jobs.min(crashes.len()) + 1; + info!("casr-gdb: adding crash reports..."); + info!("Using {} threads", num_of_threads - 1); + let counter = RwLock::new(0_usize); + let total = crashes.len(); + let custom_pool = rayon::ThreadPoolBuilder::new() + .num_threads(num_of_threads) + .build() + .unwrap(); + let at_index = gdb_args + .iter() + .skip(1) + .position(|s| s.contains("@@")) + .map(|x| x + 1); + custom_pool + .join( + || { + crashes.par_iter().try_for_each(|crash| { + if let Err(e) = (CrashInfo { + path: crash.to_path_buf(), + target_args: gdb_args.clone(), + envs: HashMap::new(), + at_index, + casr_tool: casr_gdb.clone(), + }) + .run_casr(None, timeout) + { + // Disable util::log_progress + *counter.write().unwrap() = total; + bail!(e); + }; + *counter.write().unwrap() += 1; + Ok::<(), anyhow::Error>(()) + }) + }, + || log_progress(&counter, total), + ) + .0?; + } + } + + let casr_cli = get_path("casr-cli")?; + // Print summary + let status = Command::new(casr_cli) + .arg(dir) + .stderr(std::process::Stdio::inherit()) + .stdout(std::io::stderr().as_fd().try_clone_to_owned()?) + .status() + .with_context(|| "Couldn't launch casr-cli".to_string())?; + + if !status.success() { + error!("casr-cli exited with status {status}"); + } + + // Report ooms. + let oom_dir = dir.join("oom"); + let oom_cnt = fs::read_dir(&oom_dir).unwrap().count(); + if oom_cnt != 0 { + info!( + "{} out of memory seeds are saved to {:?}", + oom_cnt, &oom_dir + ); + } else { + fs::remove_dir_all(&oom_dir)?; + } + + // Report timeouts. + let timeout_dir = dir.join("timeout"); + let timeout_cnt = fs::read_dir(&timeout_dir).unwrap().count(); + if timeout_cnt != 0 { + info!( + "{} timeout seeds are saved to {:?}", + timeout_cnt, &timeout_dir + ); + } else { + fs::remove_dir_all(&timeout_dir)?; + } + + // Check bad reports. + if let Ok(err_dir) = fs::read_dir(dir.join("clerr")) { + warn!( + "{} corrupted reports are saved to {:?}", + err_dir + .filter_map(|x| x.ok()) + .map(|x| x.path().display().to_string()) + .filter(|x| x.ends_with(".casrep")) + .filter(|x| !x.ends_with(".gdb.casrep") + || !PathBuf::from(x.strip_suffix("gdb.casrep").unwrap().to_string() + "casrep") + .exists()) + .count(), + &dir.join("clerr") + ); + } + + Ok(()) +} + +/// Copy recursively crash inputs next to casr reports +/// +/// # Arguments +/// +/// `dir` - directory with casr reports +/// +/// `crashes` - crashes info +fn copy_crashes(dir: &Path, crashes: &HashMap) -> Result<()> { + for e in fs::read_dir(dir)?.flatten().map(|x| x.path()) { + if e.is_dir() && e.file_name().unwrap().to_str().unwrap().starts_with("cl") { + copy_crashes(&e, crashes)?; + } else if e.is_file() && e.extension().is_some() && e.extension().unwrap() == "casrep" { + let mut e = e.with_extension(""); + if e.extension().is_some() && e.extension().unwrap() == "gdb" { + e = e.with_extension(""); + } + let fname = e.file_name().unwrap().to_str().unwrap(); + if let Some(crash) = crashes.get(fname) { + let _ = fs::copy(&crash.path, e); + } + } + } + + Ok(()) +} diff --git a/casr/src/util.rs b/casr/src/util.rs index 405d2251..fbb3f591 100644 --- a/casr/src/util.rs +++ b/casr/src/util.rs @@ -10,19 +10,19 @@ use anyhow::{bail, Context, Result}; use clap::ArgMatches; use log::{info, warn}; use simplelog::*; -use std::fs::OpenOptions; +use std::collections::HashSet; +use std::fs::{self, OpenOptions}; use std::io::Write; use std::io::{BufRead, BufReader}; -use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::process::{Command, Output, Stdio}; use std::sync::RwLock; use std::time::Duration; +use is_executable::IsExecutable; use wait_timeout::ChildExt; -use which::which; -/// Call sub tool with the provided options +/// Call casr-san with the provided options /// /// # Arguments /// @@ -31,20 +31,9 @@ use which::which; /// * `name` - main tool name, that called sub tool /// /// * `argv` - executable file options -pub fn call_sub_tool(matches: &ArgMatches, argv: &[&str], name: &str) -> Result<()> { - let tool = matches.get_one::("sub-tool").unwrap(); - if which(tool).is_err() { - if !tool.exists() { - bail!("Sub tool {tool:?} doesn't exist"); - } - if !tool.is_file() { - bail!("Sub tool {tool:?} isn't a file"); - } - if tool.metadata()?.permissions().mode() & 0o111 == 0 { - bail!("Sub tool {tool:?} isn't executable"); - } - } - let mut cmd = Command::new(tool); +pub fn call_casr_san(matches: &ArgMatches, argv: &[&str], name: &str) -> Result<()> { + let tool = get_path("casr-san")?; + let mut cmd = Command::new(&tool); if let Some(report_path) = matches.get_one::("output") { cmd.args(["--output", report_path.to_str().unwrap()]); } else { @@ -293,3 +282,125 @@ pub fn get_output(command: &mut Command, timeout: u64, error_on_timeout: bool) - .with_context(|| format!("Couldn't launch {command:?}")) } } + +/// Get Atheris asan_with_fuzzer library path. +pub fn get_atheris_lib() -> Result { + let mut cmd = Command::new("python3"); + cmd.arg("-c") + .arg("import atheris; print(atheris.path(), end='')") + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + let output = cmd + .output() + .with_context(|| format!("Couldn't launch {cmd:?}"))?; + let out = String::from_utf8_lossy(&output.stdout); + let err = String::from_utf8_lossy(&output.stderr); + if !err.is_empty() { + bail!("Failed to get Atheris path: {}", err); + } + Ok(format!("{out}/asan_with_fuzzer.so")) +} + +/// Create output, timeout and oom directories +/// +/// # Arguments +/// +/// * `matches` - tool arguments +/// +/// # Return value +/// +/// Path to output directory +pub fn initialize_dirs(matches: &clap::ArgMatches) -> Result<&PathBuf> { + // Get output dir + let output_dir = matches.get_one::("output").unwrap(); + if !output_dir.exists() { + fs::create_dir_all(output_dir).with_context(|| { + format!("Couldn't create output directory {}", output_dir.display()) + })?; + } else if output_dir.read_dir()?.next().is_some() { + bail!("Output directory is not empty."); + } + // Get oom dir + let oom_dir = output_dir.join("oom"); + if fs::create_dir_all(&oom_dir).is_err() { + bail!("Failed to create dir {}", &oom_dir.to_str().unwrap()); + } + // Get timeout dir + let timeout_dir = output_dir.join("timeout"); + if fs::create_dir_all(&timeout_dir).is_err() { + bail!("Failed to create dir {}", &timeout_dir.to_str().unwrap()); + } + + Ok(output_dir) +} + +/// Method checks whether binary file contains predefined symbols. +/// +/// # Arguments +/// +/// * `path` - path to binary to check. +/// +/// # Return value +/// +/// Set of important symbols +pub fn symbols_list(path: &Path) -> Result> { + let mut found_symbols = HashSet::new(); + if let Ok(buffer) = fs::read(path) { + if let Ok(elf) = goblin::elf::Elf::parse(&buffer) { + let symbols = [ + "__asan", + "__ubsan", + "__tsan", + "__msan", + "__llvm_profile", + "runtime.go", + ]; + for sym in elf.syms.iter() { + if let Some(name) = elf.strtab.get_at(sym.st_name) { + for symbol in symbols.iter() { + if name.contains(symbol) { + found_symbols.insert(*symbol); + break; + } + } + } + } + } else { + bail!("Fuzz target: {} must be an ELF executable.", path.display()); + } + } else { + bail!("Couldn't read fuzz target binary: {}.", path.display()); + } + + Ok(found_symbols) +} + +/// Function searches for path to the tool +/// +/// # Arguments +/// +/// * 'tool' - tool name +/// +/// # Return value +/// +/// Path to the tool +pub fn get_path(tool: &str) -> Result { + let mut path_to_tool = std::env::current_exe()?; + let current_tool = path_to_tool + .file_name() + .unwrap() + .to_str() + .unwrap() + .to_string(); + path_to_tool.pop(); + path_to_tool.push(tool); + if path_to_tool.is_executable() { + Ok(path_to_tool) + } else if let Ok(path_to_tool) = which::which(tool) { + Ok(path_to_tool) + } else { + bail!( + "{path_to_tool:?}: No {tool} next to {current_tool}. And there is no {tool} in PATH." + ); + } +} diff --git a/casr/tests/tests.rs b/casr/tests/tests.rs index 5e28043b..0adfe083 100644 --- a/casr/tests/tests.rs +++ b/casr/tests/tests.rs @@ -3660,23 +3660,21 @@ fn test_casr_libfuzzer() { String::from_utf8_lossy(&output.stdout), String::from_utf8_lossy(&output.stderr) ); - let out = String::from_utf8_lossy(&output.stdout); let err = String::from_utf8_lossy(&output.stderr); - assert!(!out.is_empty()); assert!(!err.is_empty()); - assert!(err.contains("casr-san: no crash on input")); - assert!(err.contains("Error: Out of memory for input")); - assert!(out.contains("EXPLOITABLE")); - assert!(out.contains("NOT_EXPLOITABLE")); - assert!(out.contains("PROBABLY_EXPLOITABLE")); - assert!(out.contains("heap-buffer-overflow(read)")); - assert!(out.contains("heap-buffer-overflow(write)")); - assert!(out.contains("DestAvNearNull")); - assert!(out.contains("xml::serialization")); - assert!(out.contains("AbortSignal")); - assert!(out.contains("compound_document.hpp:83")); + assert!(err.contains("casr-san: No crash on input")); + assert!(err.contains("1 out of memory seeds are saved to")); + assert!(err.contains("EXPLOITABLE")); + assert!(err.contains("NOT_EXPLOITABLE")); + assert!(err.contains("PROBABLY_EXPLOITABLE")); + assert!(err.contains("heap-buffer-overflow(read)")); + assert!(err.contains("heap-buffer-overflow(write)")); + assert!(err.contains("DestAvNearNull")); + assert!(err.contains("xml::serialization")); + assert!(err.contains("AbortSignal")); + assert!(err.contains("compound_document.hpp:83")); let re = Regex::new(r"Number of reports after deduplication: (?P\d+)").unwrap(); let unique_cnt = re @@ -3772,18 +3770,16 @@ fn test_casr_libfuzzer_atheris() { String::from_utf8_lossy(&output.stdout), String::from_utf8_lossy(&output.stderr) ); - let out = String::from_utf8_lossy(&output.stdout); let err = String::from_utf8_lossy(&output.stderr); - assert!(!out.is_empty()); assert!(!err.is_empty()); - assert!(out.contains("NOT_EXPLOITABLE")); - assert!(!out.contains("PROBABLY_EXPLOITABLE")); - assert!(out.contains("KeyError")); - assert!(out.contains("TypeError")); - assert!(out.contains("resolver.py")); - assert!(out.contains("constructor.py")); + assert!(err.contains("NOT_EXPLOITABLE")); + assert!(!err.contains("PROBABLY_EXPLOITABLE")); + assert!(err.contains("KeyError")); + assert!(err.contains("TypeError")); + assert!(err.contains("resolver.py")); + assert!(err.contains("constructor.py")); let re = Regex::new(r"Number of reports after deduplication: (?P\d+)").unwrap(); let unique_cnt = re diff --git a/docs/usage.md b/docs/usage.md index 9d12f4a8..ce8ab8b7 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -25,15 +25,16 @@ Create CASR reports (.casrep) from gdb execution [ARGS]... Add "-- ./binary " to run executable Options: - -o, --output Path to save report. Path can be a directory, then report name is - generated - --stdout Print CASR report to stdout - --stdin Stdin file for program - -t, --timeout Timeout (in seconds) for target execution [default: disabled] - --ignore File with regular expressions for functions and file paths that - should be ignored - -h, --help Print help - -V, --version Print version + -o, --output Path to save report. Path can be a directory, then report name + is generated + --stdout Print CASR report to stdout + --stdin Stdin file for program + -t, --timeout Timeout (in seconds) for target execution, 0 value means that + timeout is disabled [default: 0] + --ignore File with regular expressions for functions and file paths that + should be ignored + -h, --help Print help + -V, --version Print version Example: @@ -49,15 +50,16 @@ Create CASR reports (.casrep) from AddressSanitizer reports [ARGS]... Add "-- ./binary " to run executable Options: - -o, --output Path to save report. Path can be a directory, then report name is - generated - --stdout Print CASR report to stdout - --stdin Stdin file for program - -t, --timeout Timeout (in seconds) for target execution [default: disabled] - --ignore File with regular expressions for functions and file paths that - should be ignored - -h, --help Print help - -V, --version Print version + -o, --output Path to save report. Path can be a directory, then report name + is generated + --stdout Print CASR report to stdout + --stdin Stdin file for program + -t, --timeout Timeout (in seconds) for target execution, 0 value means that + timeout is disabled [default: 0] + --ignore File with regular expressions for functions and file paths that + should be ignored + -h, --help Print help + -V, --version Print version Compile binary with ASAN: @@ -84,8 +86,8 @@ Triage errors found by UndefinedBehaviorSanitizer and create CASR reports (.casr debug] -j, --jobs Number of parallel jobs for generating CASR reports [default: half of cpu cores] - -t, --timeout Timeout (in seconds) for target execution [default: - disabled] + -t, --timeout Timeout (in seconds) for target execution, 0 value means + that timeout is disabled [default: 0] -i, --input ... Target input directory list -o, --output Output directory with triaged reports -h, --help Print help @@ -116,17 +118,16 @@ Create CASR reports (.casrep) from python reports [ARGS]... Add "-- " to run Options: - -o, --output Path to save report. Path can be a directory, then report name is - generated - --stdout Print CASR report to stdout - --stdin Stdin file for program - -t, --timeout Timeout (in seconds) for target execution [default: disabled] - --ignore File with regular expressions for functions and file paths that - should be ignored - --sub-tool Path to sub tool for crash analysis that will be called when main - tool fails to detect a crash [default: casr-san] - -h, --help Print help - -V, --version Print version + -o, --output Path to save report. Path can be a directory, then report name + is generated + --stdout Print CASR report to stdout + --stdin Stdin file for program + -t, --timeout Timeout (in seconds) for target execution, 0 value means that + timeout is disabled [default: 0] + --ignore File with regular expressions for functions and file paths that + should be ignored + -h, --help Print help + -V, --version Print version Example: @@ -142,17 +143,16 @@ Create CASR reports (.casrep) from java reports [ARGS]... Add "-- " to run Options: - -o, --output Path to save report. Path can be a directory, then report name is - generated - --stdout Print CASR report to stdout - --stdin Stdin file for program - -t, --timeout Timeout (in seconds) for target execution [default: disabled] - --ignore File with regular expressions for functions and file paths that - should be ignored - --sub-tool Path to sub tool for crash analysis that will be called when main - tool fails to detect a crash [default: casr-san] - -h, --help Print help - -V, --version Print version + -o, --output Path to save report. Path can be a directory, then report name + is generated + --stdout Print CASR report to stdout + --stdin Stdin file for program + -t, --timeout Timeout (in seconds) for target execution, 0 value means that + timeout is disabled [default: 0] + --ignore File with regular expressions for functions and file paths that + should be ignored + -h, --help Print help + -V, --version Print version Run casr-java: @@ -355,8 +355,8 @@ Triage crashes found by AFL++ debug] -j, --jobs Number of parallel jobs for generating CASR reports [default: half of cpu cores] - -t, --timeout Timeout (in seconds) for target execution [default: - disabled] + -t, --timeout Timeout (in seconds) for target execution, 0 value means + that timeout is disabled [default: 0] -i, --input AFL++ work directory -o, --output Output directory with triaged reports --no-cluster Do not cluster CASR reports @@ -454,18 +454,27 @@ Triage crashes found by libFuzzer based fuzzer (C/C++/go-fuzz/Atheris/Jazzer) [ARGS]... Add "-- ./fuzz_target " Options: - -l, --log-level Logging level [default: info] [possible values: info, - debug] - -j, --jobs Number of parallel jobs for generating CASR reports - [default: half of cpu cores] - -t, --timeout Timeout (in seconds) for target execution [default: - disabled] - -i, --input Directory containing crashes found by libFuzzer [default: - .] - -o, --output Output directory with triaged reports - --no-cluster Do not cluster CASR reports - -h, --help Print help - -V, --version Print version + -l, --log-level + Logging level [default: info] [possible values: info, debug] + -j, --jobs + Number of parallel jobs for generating CASR reports [default: half of cpu cores] + -t, --timeout + Timeout (in seconds) for target execution, 0 means that timeout is disabled + [default: 0] + -i, --input + Directory containing crashes found by libFuzzer [default: .] + -o, --output + Output directory with triaged reports + --no-cluster + Do not cluster CASR reports + --casr-gdb-args + Add "--casr-gdb-args './gdb_fuzz_target '" to generate additional + crash reports with casr-gdb (e.g., test whether program crashes without + sanitizers) + -h, --help + Print help + -V, --version + Print version `casr-libfuzzer` provides integration with [libFuzzer](https://www.llvm.org/docs/LibFuzzer.html) based fuzzers @@ -477,6 +486,11 @@ libFuzzer example: $ casr-libfuzzer -i casr/tests/casr_tests/casrep/libfuzzer_crashes_xlnt -o casr/tests/tmp_tests_casr/casr_libfuzzer_out -- casr/tests/casr_tests/bin/load_fuzzer +You may also run `casr-libfuzzer` with additional report generation for non-instrumented +binary with `casr-gdb`: + + $ casr-libfuzzer -i casr/tests/casr_tests/casrep/libfuzzer_crashes_xlnt -o casr/tests/tmp_tests_casr/casr_libfuzzer_out --casr-gdb-args 'casr/tests/casr_tests/bin/load_sydr @@' -- casr/tests/casr_tests/bin/load_fuzzer + Atheris example: $ unzip casr/tests/casr_tests/python/ruamel.zip