Skip to content

[STAL-2007] debug static analyzer #348

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ in `kernel` for the analysis to work. The `build` process will take care of this
cargo build
```

**NOTE**: you may need extra dependencies to install. Read [these instructions](crates/vectorscan-sys/README.md).

## Analyze a directory

Expand Down
93 changes: 50 additions & 43 deletions crates/bins/src/bin/datadog-static-analyzer.rs
Original file line number Diff line number Diff line change
@@ -1,43 +1,44 @@
use std::{env, fs};
use std::collections::HashMap;
use std::io::prelude::*;
use std::process::exit;
use std::time::{Instant, SystemTime};

use anyhow::{Context, Result};
use getopts::Options;
use indicatif::ProgressBar;
use itertools::Itertools;
use rayon::prelude::*;

use cli::config_file::read_config_file;
use cli::constants::DEFAULT_MAX_FILE_SIZE_KB;
use cli::csv;
use cli::datadog_utils::{
get_all_default_rulesets, get_diff_aware_information, get_rules_from_rulesets,
};
use cli::file_utils::{
are_subdirectories_safe, filter_files_by_diff_aware_info, filter_files_by_size,
filter_files_for_language, get_files, read_files_from_gitignore,
};
use cli::model::cli_configuration::CliConfiguration;
use cli::model::datadog_api::DiffAwareData;
use cli::rule_utils::{
count_violations_by_severities, get_languages_for_rules, get_rulesets_from_file,
};
use itertools::Itertools;
use kernel::analysis::analyze::analyze;
use kernel::constants::{CARGO_VERSION, VERSION};
use kernel::model::analysis::{AnalysisOptions, ERROR_RULE_TIMEOUT};
use kernel::model::common::{Language, OutputFormat};
use kernel::model::rule::{Rule, RuleInternal, RuleResult, RuleSeverity};

use anyhow::{Context, Result};
use cli::constants::DEFAULT_MAX_FILE_SIZE_KB;
use cli::csv;
use cli::model::cli_configuration::CliConfiguration;
use cli::model::datadog_api::DiffAwareData;
use cli::sarif::sarif_utils::{
generate_sarif_report, SarifReportMetadata, SarifRule, SarifRuleResult,
};
use cli::secrets::{SecretResult, SecretRule};
use cli::violations_table;
use getopts::Options;
use indicatif::ProgressBar;
use kernel::analysis::analyze::analyze;
use kernel::config_file::ArgumentProvider;
use kernel::constants::{CARGO_VERSION, VERSION};
use kernel::model::analysis::{AnalysisOptions, ERROR_RULE_TIMEOUT};
use kernel::model::common::{Language, OutputFormat};
use kernel::model::config_file::{ConfigFile, PathConfig};
use kernel::model::rule::{Rule, RuleInternal, RuleResult, RuleSeverity};
use kernel::path_restrictions::PathRestrictions;
use kernel::rule_overrides::RuleOverrides;
use rayon::prelude::*;
use std::collections::HashMap;
use std::io::prelude::*;
use std::process::exit;
use std::time::{Instant, SystemTime};
use std::{env, fs};

fn print_usage(program: &str, opts: Options) {
let brief = format!("Usage: {} FILE [options]", program);
Expand Down Expand Up @@ -486,33 +487,39 @@ fn main() -> Result<()> {
// check if we do a diff-aware scan
let diff_aware_parameters: Option<DiffAwareData> = if diff_aware_requested {
match configuration.generate_diff_aware_request_data() {
Ok(params) => match get_diff_aware_information(&params) {
Ok(d) => {
if configuration.use_debug {
println!(
"diff aware enabled, base sha: {}, files to scan {}",
d.base_sha,
d.files.join(",")
);
} else {
println!(
"diff-aware enabled, based sha {}, scanning only {}/{} files",
d.base_sha,
d.files.len(),
files_in_repository.len()
)
}
Some(d)
Ok(params) => {
if configuration.use_debug {
println!("Diff-aware request with repository url {}, sha {}, branch {}, config hash {}", params.repository_url, params.sha, params.branch, params.config_hash);
}
Err(e) => {
eprintln!("diff aware not enabled (error when receiving diff-aware data from Datadog with config hash {}, sha {}), proceeding with full scan.", &params.config_hash, &params.sha);
if configuration.use_debug {
eprintln!("error when trying to enabled diff-aware scanning: {:?}", e);

match get_diff_aware_information(&params) {
Ok(d) => {
if configuration.use_debug {
println!(
"diff aware enabled, base sha: {}, files to scan {}",
d.base_sha,
d.files.join(",")
);
} else {
println!(
"diff-aware enabled, based sha {}, scanning only {}/{} files",
d.base_sha,
d.files.len(),
files_in_repository.len()
)
}
Some(d)
}
Err(e) => {
eprintln!("diff aware not enabled (error when receiving diff-aware data from Datadog with config hash {}, sha {}), proceeding with full scan.", &params.config_hash, &params.sha);
if configuration.use_debug {
eprintln!("error when trying to enabled diff-aware scanning: {:?}", e);
}

None
None
}
}
},
}
Err(e) => {
eprintln!("diff aware not enabled (unable to generate diff-aware request data), proceeding with full scan.");
eprintln!("Make sure the user running the scan owns the repository (use git config --global --add safe.directory <repo-path> if needed)");
Expand Down
33 changes: 21 additions & 12 deletions crates/cli/src/model/cli_configuration.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
use crate::model::datadog_api::DiffAwareRequestArguments;

use anyhow::anyhow;
use git2::Repository;
use sha2::{Digest, Sha256};

use kernel::config_file::ArgumentProvider;
use kernel::model::common::OutputFormat;
use kernel::model::config_file::PathConfig;
use kernel::model::rule::Rule;
use kernel::path_restrictions::PathRestrictions;
use sha2::{Digest, Sha256};

use crate::model::datadog_api::DiffAwareRequestArguments;

/// represents the CLI configuration
#[derive(Clone)]
Expand Down Expand Up @@ -91,16 +92,23 @@ impl CliConfiguration {

// let's get the latest commit
let head = repository.head()?;

let oid = head.target();
let head_name = head.shorthand();
match (oid, head_name) {
(Some(o), Some(h)) => {
if h == "HEAD" {
return Err(anyhow!(
"branch is HEAD, cannot generate diff-aware scanning"
));
}

match (oid, head.name()) {
(Some(o), Some(h)) => Ok(DiffAwareRequestArguments {
repository_url,
config_hash,
sha: o.to_string(),
branch: h.to_string(),
}),
Ok(DiffAwareRequestArguments {
repository_url,
config_hash,
sha: o.to_string(),
branch: h.to_string(),
})
}
_ => {
if self.use_debug {
println!(
Expand All @@ -116,11 +124,12 @@ impl CliConfiguration {

#[cfg(test)]
mod tests {
use super::*;
use kernel::model::common::Language;
use kernel::model::common::OutputFormat::Sarif;
use kernel::model::rule::{RuleCategory, RuleSeverity, RuleType};

use super::*;

#[test]
fn test_generate_diff_aware_hash() {
let cli_configuration = CliConfiguration {
Expand Down
3 changes: 2 additions & 1 deletion crates/static-analysis-kernel/src/analysis/tree_sitter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,9 @@ pub fn get_query_nodes(
if let (Some(capture_name), Some(node)) = (capture_name_opt, node_opt) {
captures.insert(capture_name.to_string(), node.clone());
captures_list.entry(capture_name.to_string()).or_default();

captures_list
.get_mut(&capture_name.to_string())
.get_mut(&capture_name.to_string().clone())
.unwrap()
.push(node.clone());
}
Expand Down