From 19653b80de1cd043effafb8145ebdbe58d2e5b18 Mon Sep 17 00:00:00 2001 From: Julien Delange Date: Mon, 6 May 2024 15:47:15 -0400 Subject: [PATCH 1/6] add debug information about the branch being read --- DEVELOPMENT.md | 1 + .../bins/src/bin/datadog-static-analyzer.rs | 51 +++++++++++-------- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index af21c807..415b9f25 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -8,6 +8,7 @@ in `kernel` for the analysis to work. The `build` process will take care of this cargo build ``` +**NOTE**: you may need extra dependencies to install. Read [these instructions](crates/vectorscan-sys/README.md). ## Analyze a directory diff --git a/crates/bins/src/bin/datadog-static-analyzer.rs b/crates/bins/src/bin/datadog-static-analyzer.rs index c98f0ca2..d088db2c 100644 --- a/crates/bins/src/bin/datadog-static-analyzer.rs +++ b/crates/bins/src/bin/datadog-static-analyzer.rs @@ -486,31 +486,38 @@ fn main() -> Result<()> { // check if we do a diff-aware scan let diff_aware_parameters: Option = if diff_aware_requested { match configuration.generate_diff_aware_request_data() { - Ok(params) => match get_diff_aware_information(¶ms) { - Ok(d) => { - if configuration.use_debug { - println!( - "diff aware enabled, base sha: {}, files to scan {}", - d.base_sha, - d.files.join(",") - ); - } else { - println!( - "diff-aware enabled, based sha {}, scanning only {}/{} files", - d.base_sha, - d.files.len(), - files_in_repository.len() - ) - } - Some(d) + Ok(params) => { + + if configuration.use_debug { + println!("Diff-aware request with repository url {}, sha {}, branch {}, config hash {}", params.repository_url, params.sha, params.branch, params.config_hash); } - Err(e) => { - eprintln!("diff aware not enabled (error when receiving diff-aware data from Datadog with config hash {}, sha {}), proceeding with full scan.", ¶ms.config_hash, ¶ms.sha); - if configuration.use_debug { - eprintln!("error when trying to enabled diff-aware scanning: {:?}", e); + + match get_diff_aware_information(¶ms) { + Ok(d) => { + if configuration.use_debug { + println!( + "diff aware enabled, base sha: {}, files to scan {}", + d.base_sha, + d.files.join(",") + ); + } else { + println!( + "diff-aware enabled, based sha {}, scanning only {}/{} files", + d.base_sha, + d.files.len(), + files_in_repository.len() + ) + } + Some(d) } + Err(e) => { + eprintln!("diff aware not enabled (error when receiving diff-aware data from Datadog with config hash {}, sha {}), proceeding with full scan.", ¶ms.config_hash, ¶ms.sha); + if configuration.use_debug { + eprintln!("error when trying to enabled diff-aware scanning: {:?}", e); + } - None + None + } } }, Err(e) => { From 668e098b8cde482ce8929d4974ff729a659d17b7 Mon Sep 17 00:00:00 2001 From: Julien Delange Date: Mon, 6 May 2024 16:18:34 -0400 Subject: [PATCH 2/6] update how the default branch is found --- crates/cli/src/model/cli_configuration.rs | 24 ++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/crates/cli/src/model/cli_configuration.rs b/crates/cli/src/model/cli_configuration.rs index 6a9b2136..48d81c52 100644 --- a/crates/cli/src/model/cli_configuration.rs +++ b/crates/cli/src/model/cli_configuration.rs @@ -1,7 +1,8 @@ +use std::any::Any; use crate::model::datadog_api::DiffAwareRequestArguments; use anyhow::anyhow; -use git2::Repository; +use git2::{Repository}; use kernel::config_file::ArgumentProvider; use kernel::model::common::OutputFormat; use kernel::model::config_file::PathConfig; @@ -91,16 +92,21 @@ impl CliConfiguration { // let's get the latest commit let head = repository.head()?; - let oid = head.target(); + let head_name = head.shorthand(); + match (oid, head_name) { + (Some(o), Some(h)) => { + if h == "HEAD" { + return Err(anyhow!("branch is HEAD, cannot generate diff-aware scanning")) + } - match (oid, head.name()) { - (Some(o), Some(h)) => Ok(DiffAwareRequestArguments { - repository_url, - config_hash, - sha: o.to_string(), - branch: h.to_string(), - }), + Ok(DiffAwareRequestArguments { + repository_url, + config_hash, + sha: o.to_string(), + branch:h.to_string(), + }) + }, _ => { if self.use_debug { println!( From 4ee4f8ec6445309c8664b30ddd81daec10e0bce2 Mon Sep 17 00:00:00 2001 From: Julien Delange Date: Mon, 6 May 2024 16:22:20 -0400 Subject: [PATCH 3/6] fix linter issues --- crates/cli/src/model/cli_configuration.rs | 1 - crates/static-analysis-kernel/src/analysis/tree_sitter.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/cli/src/model/cli_configuration.rs b/crates/cli/src/model/cli_configuration.rs index 48d81c52..a20ef073 100644 --- a/crates/cli/src/model/cli_configuration.rs +++ b/crates/cli/src/model/cli_configuration.rs @@ -1,4 +1,3 @@ -use std::any::Any; use crate::model::datadog_api::DiffAwareRequestArguments; use anyhow::anyhow; diff --git a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs index 4c44616c..d1d89a75 100644 --- a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs +++ b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs @@ -89,7 +89,7 @@ pub fn get_query_nodes( captures.insert(capture_name.to_string(), node.clone()); captures_list.entry(capture_name.to_string()).or_default(); captures_list - .get_mut(&capture_name.to_string()) + .get_mut(&capture_name) .unwrap() .push(node.clone()); } From 6a597f1922e19afdc11cc23eaf2ed794b6f864a4 Mon Sep 17 00:00:00 2001 From: Julien Delange Date: Mon, 6 May 2024 16:23:38 -0400 Subject: [PATCH 4/6] update --- crates/static-analysis-kernel/src/analysis/tree_sitter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs index d1d89a75..4c44616c 100644 --- a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs +++ b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs @@ -89,7 +89,7 @@ pub fn get_query_nodes( captures.insert(capture_name.to_string(), node.clone()); captures_list.entry(capture_name.to_string()).or_default(); captures_list - .get_mut(&capture_name) + .get_mut(&capture_name.to_string()) .unwrap() .push(node.clone()); } From bd9a6cfa6395110e6cb0241504f7af6af86778ac Mon Sep 17 00:00:00 2001 From: Julien Delange Date: Mon, 6 May 2024 16:28:48 -0400 Subject: [PATCH 5/6] update --- .../bins/src/bin/datadog-static-analyzer.rs | 44 +++++++++---------- crates/cli/src/model/cli_configuration.rs | 20 +++++---- .../src/analysis/tree_sitter.rs | 3 +- 3 files changed, 36 insertions(+), 31 deletions(-) diff --git a/crates/bins/src/bin/datadog-static-analyzer.rs b/crates/bins/src/bin/datadog-static-analyzer.rs index d088db2c..30aaa91d 100644 --- a/crates/bins/src/bin/datadog-static-analyzer.rs +++ b/crates/bins/src/bin/datadog-static-analyzer.rs @@ -1,4 +1,18 @@ +use std::{env, fs}; +use std::collections::HashMap; +use std::io::prelude::*; +use std::process::exit; +use std::time::{Instant, SystemTime}; + +use anyhow::{Context, Result}; +use getopts::Options; +use indicatif::ProgressBar; +use itertools::Itertools; +use rayon::prelude::*; + use cli::config_file::read_config_file; +use cli::constants::DEFAULT_MAX_FILE_SIZE_KB; +use cli::csv; use cli::datadog_utils::{ get_all_default_rulesets, get_diff_aware_information, get_rules_from_rulesets, }; @@ -6,38 +20,25 @@ use cli::file_utils::{ are_subdirectories_safe, filter_files_by_diff_aware_info, filter_files_by_size, filter_files_for_language, get_files, read_files_from_gitignore, }; +use cli::model::cli_configuration::CliConfiguration; +use cli::model::datadog_api::DiffAwareData; use cli::rule_utils::{ count_violations_by_severities, get_languages_for_rules, get_rulesets_from_file, }; -use itertools::Itertools; -use kernel::analysis::analyze::analyze; -use kernel::constants::{CARGO_VERSION, VERSION}; -use kernel::model::analysis::{AnalysisOptions, ERROR_RULE_TIMEOUT}; -use kernel::model::common::{Language, OutputFormat}; -use kernel::model::rule::{Rule, RuleInternal, RuleResult, RuleSeverity}; - -use anyhow::{Context, Result}; -use cli::constants::DEFAULT_MAX_FILE_SIZE_KB; -use cli::csv; -use cli::model::cli_configuration::CliConfiguration; -use cli::model::datadog_api::DiffAwareData; use cli::sarif::sarif_utils::{ generate_sarif_report, SarifReportMetadata, SarifRule, SarifRuleResult, }; use cli::secrets::{SecretResult, SecretRule}; use cli::violations_table; -use getopts::Options; -use indicatif::ProgressBar; +use kernel::analysis::analyze::analyze; use kernel::config_file::ArgumentProvider; +use kernel::constants::{CARGO_VERSION, VERSION}; +use kernel::model::analysis::{AnalysisOptions, ERROR_RULE_TIMEOUT}; +use kernel::model::common::{Language, OutputFormat}; use kernel::model::config_file::{ConfigFile, PathConfig}; +use kernel::model::rule::{Rule, RuleInternal, RuleResult, RuleSeverity}; use kernel::path_restrictions::PathRestrictions; use kernel::rule_overrides::RuleOverrides; -use rayon::prelude::*; -use std::collections::HashMap; -use std::io::prelude::*; -use std::process::exit; -use std::time::{Instant, SystemTime}; -use std::{env, fs}; fn print_usage(program: &str, opts: Options) { let brief = format!("Usage: {} FILE [options]", program); @@ -487,7 +488,6 @@ fn main() -> Result<()> { let diff_aware_parameters: Option = if diff_aware_requested { match configuration.generate_diff_aware_request_data() { Ok(params) => { - if configuration.use_debug { println!("Diff-aware request with repository url {}, sha {}, branch {}, config hash {}", params.repository_url, params.sha, params.branch, params.config_hash); } @@ -519,7 +519,7 @@ fn main() -> Result<()> { None } } - }, + } Err(e) => { eprintln!("diff aware not enabled (unable to generate diff-aware request data), proceeding with full scan."); eprintln!("Make sure the user running the scan owns the repository (use git config --global --add safe.directory if needed)"); diff --git a/crates/cli/src/model/cli_configuration.rs b/crates/cli/src/model/cli_configuration.rs index a20ef073..78707b5a 100644 --- a/crates/cli/src/model/cli_configuration.rs +++ b/crates/cli/src/model/cli_configuration.rs @@ -1,13 +1,14 @@ -use crate::model::datadog_api::DiffAwareRequestArguments; - use anyhow::anyhow; -use git2::{Repository}; +use git2::Repository; +use sha2::{Digest, Sha256}; + use kernel::config_file::ArgumentProvider; use kernel::model::common::OutputFormat; use kernel::model::config_file::PathConfig; use kernel::model::rule::Rule; use kernel::path_restrictions::PathRestrictions; -use sha2::{Digest, Sha256}; + +use crate::model::datadog_api::DiffAwareRequestArguments; /// represents the CLI configuration #[derive(Clone)] @@ -96,16 +97,18 @@ impl CliConfiguration { match (oid, head_name) { (Some(o), Some(h)) => { if h == "HEAD" { - return Err(anyhow!("branch is HEAD, cannot generate diff-aware scanning")) + return Err(anyhow!( + "branch is HEAD, cannot generate diff-aware scanning" + )); } Ok(DiffAwareRequestArguments { repository_url, config_hash, sha: o.to_string(), - branch:h.to_string(), + branch: h.to_string(), }) - }, + } _ => { if self.use_debug { println!( @@ -121,11 +124,12 @@ impl CliConfiguration { #[cfg(test)] mod tests { - use super::*; use kernel::model::common::Language; use kernel::model::common::OutputFormat::Sarif; use kernel::model::rule::{RuleCategory, RuleSeverity, RuleType}; + use super::*; + #[test] fn test_generate_diff_aware_hash() { let cli_configuration = CliConfiguration { diff --git a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs index 4c44616c..ca7b3f02 100644 --- a/crates/static-analysis-kernel/src/analysis/tree_sitter.rs +++ b/crates/static-analysis-kernel/src/analysis/tree_sitter.rs @@ -88,8 +88,9 @@ pub fn get_query_nodes( if let (Some(capture_name), Some(node)) = (capture_name_opt, node_opt) { captures.insert(capture_name.to_string(), node.clone()); captures_list.entry(capture_name.to_string()).or_default(); + captures_list - .get_mut(&capture_name.to_string()) + .get_mut(&capture_name.to_string().clone()) .unwrap() .push(node.clone()); } From 0f393ce9d0eb19fe2ce9f61e1e164c436bf52f75 Mon Sep 17 00:00:00 2001 From: Julien Delange Date: Mon, 6 May 2024 16:37:22 -0400 Subject: [PATCH 6/6] update --- .../bins/src/bin/datadog-static-analyzer.rs | 41 +++++++++---------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/crates/bins/src/bin/datadog-static-analyzer.rs b/crates/bins/src/bin/datadog-static-analyzer.rs index 30aaa91d..dd4f74ef 100644 --- a/crates/bins/src/bin/datadog-static-analyzer.rs +++ b/crates/bins/src/bin/datadog-static-analyzer.rs @@ -1,18 +1,4 @@ -use std::{env, fs}; -use std::collections::HashMap; -use std::io::prelude::*; -use std::process::exit; -use std::time::{Instant, SystemTime}; - -use anyhow::{Context, Result}; -use getopts::Options; -use indicatif::ProgressBar; -use itertools::Itertools; -use rayon::prelude::*; - use cli::config_file::read_config_file; -use cli::constants::DEFAULT_MAX_FILE_SIZE_KB; -use cli::csv; use cli::datadog_utils::{ get_all_default_rulesets, get_diff_aware_information, get_rules_from_rulesets, }; @@ -20,25 +6,38 @@ use cli::file_utils::{ are_subdirectories_safe, filter_files_by_diff_aware_info, filter_files_by_size, filter_files_for_language, get_files, read_files_from_gitignore, }; -use cli::model::cli_configuration::CliConfiguration; -use cli::model::datadog_api::DiffAwareData; use cli::rule_utils::{ count_violations_by_severities, get_languages_for_rules, get_rulesets_from_file, }; +use itertools::Itertools; +use kernel::analysis::analyze::analyze; +use kernel::constants::{CARGO_VERSION, VERSION}; +use kernel::model::analysis::{AnalysisOptions, ERROR_RULE_TIMEOUT}; +use kernel::model::common::{Language, OutputFormat}; +use kernel::model::rule::{Rule, RuleInternal, RuleResult, RuleSeverity}; + +use anyhow::{Context, Result}; +use cli::constants::DEFAULT_MAX_FILE_SIZE_KB; +use cli::csv; +use cli::model::cli_configuration::CliConfiguration; +use cli::model::datadog_api::DiffAwareData; use cli::sarif::sarif_utils::{ generate_sarif_report, SarifReportMetadata, SarifRule, SarifRuleResult, }; use cli::secrets::{SecretResult, SecretRule}; use cli::violations_table; -use kernel::analysis::analyze::analyze; +use getopts::Options; +use indicatif::ProgressBar; use kernel::config_file::ArgumentProvider; -use kernel::constants::{CARGO_VERSION, VERSION}; -use kernel::model::analysis::{AnalysisOptions, ERROR_RULE_TIMEOUT}; -use kernel::model::common::{Language, OutputFormat}; use kernel::model::config_file::{ConfigFile, PathConfig}; -use kernel::model::rule::{Rule, RuleInternal, RuleResult, RuleSeverity}; use kernel::path_restrictions::PathRestrictions; use kernel::rule_overrides::RuleOverrides; +use rayon::prelude::*; +use std::collections::HashMap; +use std::io::prelude::*; +use std::process::exit; +use std::time::{Instant, SystemTime}; +use std::{env, fs}; fn print_usage(program: &str, opts: Options) { let brief = format!("Usage: {} FILE [options]", program);