diff --git a/crates/bins/src/bin/datadog-static-analyzer.rs b/crates/bins/src/bin/datadog-static-analyzer.rs index 3f595cde..80ad443e 100644 --- a/crates/bins/src/bin/datadog-static-analyzer.rs +++ b/crates/bins/src/bin/datadog-static-analyzer.rs @@ -3,13 +3,7 @@ use getopts::Options; use indicatif::ProgressBar; use itertools::Itertools; use rayon::prelude::*; -use std::collections::HashMap; -use std::io::prelude::*; use std::path::PathBuf; -use std::process::exit; -use std::sync::Arc; -use std::time::{Duration, Instant, SystemTime}; -use std::{env, fs}; use cli::config_file::get_config; use cli::constants::{ @@ -25,7 +19,7 @@ use cli::datadog_utils::{ }; use cli::file_utils::{ are_subdirectories_safe, filter_files_by_diff_aware_info, filter_files_by_size, - filter_files_for_language, get_files, read_files_from_gitignore, + filter_files_for_language, get_files, get_language_for_file, read_files_from_gitignore, }; use cli::model::cli_configuration::CliConfiguration; use cli::model::datadog_api::DiffAwareData; @@ -54,6 +48,13 @@ use secrets::model::secret_result::{SecretResult, SecretValidationStatus}; use secrets::scanner::{build_sds_scanner, find_secrets}; use secrets::secret_files::should_ignore_file_for_secret; use std::cell::Cell; +use std::collections::HashMap; +use std::io::prelude::*; +use std::path::Path; +use std::process::exit; +use std::sync::Arc; +use std::time::{Duration, Instant, SystemTime}; +use std::{env, fs}; fn print_usage(program: &str, opts: Options) { let brief = format!("Usage: {} FILE [options]", program); @@ -751,6 +752,7 @@ fn main() -> Result<()> { // Secrets detection let mut secrets_results: Vec = vec![]; if secrets_enabled { + let path_metadata; let secrets_start = Instant::now(); let secrets_files: Vec = files_to_analyze @@ -768,36 +770,70 @@ fn main() -> Result<()> { let nb_secrets_rules: usize = secrets_rules.len(); let nb_secrets_files = secrets_files.len(); - secrets_results = secrets_files - .par_iter() - .flat_map(|path| { - let relative_path = path - .strip_prefix(directory_path) - .unwrap() - .to_str() - .expect("path contains non-Unicode characters"); - let res = if let Ok(file_content) = read_file(path) { - let file_content = Arc::from(file_content); - find_secrets( - &sds_scanner, - &secrets_rules, - relative_path, - &file_content, - &analysis_options, - ) - } else { - // this is generally because the file is binary. - if use_debug { - eprintln!("error when getting content of path {}", &path.display()); + (secrets_results, path_metadata) = secrets_files + .into_par_iter() + .fold( + || (Vec::new(), HashMap::new()), + |(_, mut path_metadata), path| { + let relative_path = path + .strip_prefix(directory_path) + .expect("cannot strip prefix from path") + .to_str() + .expect("path contains non-Unicode characters"); + let res = if let Ok(file_content) = read_file(&path) { + let file_content = Arc::from(file_content); + let secrets = find_secrets( + &sds_scanner, + &secrets_rules, + relative_path, + &file_content, + &analysis_options, + ); + + if !secrets.is_empty() { + let cloned_path_str = relative_path.to_string(); + let language_opt = get_language_for_file(&path); + + if let Some(language) = language_opt { + let metadata = if is_test_file( + language, + file_content.as_ref(), + Path::new(&cloned_path_str), + None, + ) { + Some(ArtifactClassification { is_test_file: true }) + } else { + None + }; + path_metadata.insert(cloned_path_str.clone(), metadata); + } + } + + secrets + } else { + // this is generally because the file is binary. + if use_debug { + eprintln!("error when getting content of path {}", path.display()); + } + vec![] + }; + if let Some(pb) = &progress_bar { + pb.inc(1); } - vec![] - }; - if let Some(pb) = &progress_bar { - pb.inc(1); - } - res - }) - .collect(); + (res, path_metadata) + }, + ) + .reduce( + || (Vec::new(), HashMap::new()), + |mut base, other| { + let (other_results, other_classifications) = other; + base.0.extend(other_results); + for (k, v) in other_classifications { + base.1.insert(k, v); + } + base + }, + ); let nb_secrets_found: u32 = secrets_results.iter().map(|x| x.matches.len() as u32).sum(); let nb_secrets_validated: u32 = secrets_results @@ -811,6 +847,15 @@ fn main() -> Result<()> { }) .sum(); + // adding metadata from secrets + for (k, v) in path_metadata { + if let Some(artifact_classification) = v { + all_path_metadata + .entry(k) + .or_insert(artifact_classification); + } + } + if let Some(pb) = &progress_bar { pb.finish(); } diff --git a/crates/cli/src/file_utils.rs b/crates/cli/src/file_utils.rs index 4d9d29b8..df98ccb4 100644 --- a/crates/cli/src/file_utils.rs +++ b/crates/cli/src/file_utils.rs @@ -78,6 +78,47 @@ fn get_prefix_for_language(language: &Language) -> Option> { None } +/// Find the language for a given file. +pub fn get_language_for_file(path: &Path) -> Option { + // match for extensions (myfile.c, myfile.php, etc). + for (language, extensions) in FILE_EXTENSIONS_PER_LANGUAGE_LIST { + let extensions_string = extensions + .to_vec() + .iter() + .map(|x| x.to_string()) + .collect::>(); + if match_extension(path, extensions_string.as_slice()) { + return Some(*language); + } + } + + // match for exact match (e.g. BUILD.bazel, Dockerfile, etc) + for (language, filenames) in FILE_EXACT_MATCH_PER_LANGUAGE_LIST { + let filename_strings = filenames + .to_vec() + .iter() + .map(|x| x.to_string()) + .collect::>(); + if match_exact_filename(path, filename_strings.as_slice()) { + return Some(*language); + } + } + + // match for prefix (e.g. Dockerfile.something) + for (language, prefixes) in FILE_PREFIX_PER_LANGUAGE_LIST { + let prefix_string = prefixes + .to_vec() + .iter() + .map(|x| x.to_string()) + .collect::>(); + if match_prefix_filename(path, prefix_string.as_slice()) { + return Some(*language); + } + } + + None +} + // Read the .gitignore file in a directory and return the lines that are not commented // or empty. // We ignore pattern that start with # (comments) or contains ! (cause repositories @@ -862,4 +903,28 @@ mod tests { .len() ); } + + #[test] + fn test_get_language_for_file() { + // extension + assert_eq!( + get_language_for_file(&PathBuf::from("path/to/foo.java")), + Some(Language::Java) + ); + + // exact filename + assert_eq!( + get_language_for_file(&PathBuf::from("BUILD.bazel")), + Some(Language::Starlark) + ); + + // prefix + assert_eq!( + get_language_for_file(&PathBuf::from("Dockerfile.foobar")), + Some(Language::Dockerfile) + ); + + // none + assert_eq!(get_language_for_file(&PathBuf::from("wefwefwef")), None); + } } diff --git a/misc/integration-test-secrets.sh b/misc/integration-test-secrets.sh index a2c87cbd..11f01cf5 100755 --- a/misc/integration-test-secrets.sh +++ b/misc/integration-test-secrets.sh @@ -22,8 +22,10 @@ RES=`jq '.runs[0].results | length ' "${REPO_DIR}/results1.json"` echo "Found $RES errors on first run" -if [ "$RES" -ne "3" ]; then - echo "incorrect number of errors found" +EXPECTING=5 + +if [ "$RES" -ne "$EXPECTING" ]; then + echo "incorrect number of errors found, found $RES, expected $EXPECTING" exit 1 fi