DataDog
diff --git a/‎guarddog/analyzer/analyzer.py‎
Lines changed: 62 additions & 27 deletions b/‎guarddog/analyzer/analyzer.py‎
Lines changed: 62 additions & 27 deletions
diff --git a/‎guarddog/analyzer/metadata/extension/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎guarddog/analyzer/metadata/extension/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎guarddog/analyzer/metadata/extension/empty_information.py‎
Lines changed: 21 additions & 12 deletions b/‎guarddog/analyzer/metadata/extension/empty_information.py‎
Lines changed: 21 additions & 12 deletions
@@ -67,7 +67,13 @@ def __init__(self, ecosystem=ECOSYSTEM.PYPI) -> None:
             ".semgrep_logs",
         ]
 
-    def analyze(self, path, info=None, rules=None, name: Optional[str] = None, version: Optional[str] = None) -> dict:
+    def analyze(
+            self,
+            path,
+            info=None,
+            rules=None,
+            name: Optional[str] = None,
+            version: Optional[str] = None) -> dict:
         """
         Analyzes a package in the given path
 
@@ -87,23 +93,33 @@ def analyze(self, path, info=None, rules=None, name: Optional[str] = None, versi
         sourcecode_results = None
 
         # populate results, errors, and number of issues
-        metadata_results = self.analyze_metadata(path, info, rules, name, version)
+        metadata_results = self.analyze_metadata(
+            path, info, rules, name, version)
         sourcecode_results = self.analyze_sourcecode(path, rules)
 
         # Concatenate dictionaries together
         issues = metadata_results["issues"] + sourcecode_results["issues"]
         results = metadata_results["results"] | sourcecode_results["results"]
         errors = metadata_results["errors"] | sourcecode_results["errors"]
 
-        output = {"issues": issues, "errors": errors, "results": results, "path": path}
+        output = {
+            "issues": issues,
+            "errors": errors,
+            "results": results,
+            "path": path}
         # Including extension info - pending discussion
         # if info is not None:
         #     output["package_info"] = info
 
         return output
 
-    def analyze_metadata(self, path: str, info, rules=None, name: Optional[str] = None,
-                         version: Optional[str] = None) -> dict:
+    def analyze_metadata(
+            self,
+            path: str,
+            info,
+            rules=None,
+            name: Optional[str] = None,
+            version: Optional[str] = None) -> dict:
         """
         Analyzes the metadata of a given package
 
@@ -132,7 +148,8 @@ def analyze_metadata(self, path: str, info, rules=None, name: Optional[str] = No
         for rule in all_rules:
             try:
                 log.debug(f"Running rule {rule} against package '{name}'")
-                rule_matches, message = self.metadata_detectors[rule].detect(info, path, name, version)
+                rule_matches, message = self.metadata_detectors[rule].detect(
+                    info, path, name, version)
                 results[rule] = None
                 if rule_matches:
                     issues += 1
@@ -162,7 +179,11 @@ def analyze_sourcecode(self, path, rules=None) -> dict:
         results = semgrepscan_results["results"] | yarascan_results["results"]
         errors = semgrepscan_results["errors"] | yarascan_results["errors"]
 
-        return {"issues": issues, "errors": errors, "results": results, "path": path}
+        return {
+            "issues": issues,
+            "errors": errors,
+            "results": results,
+            "path": path}
 
     def analyze_yara(self, path: str, rules: Optional[set] = None) -> dict:
         """
@@ -212,30 +233,32 @@ def analyze_yara(self, path: str, rules: Optional[set] = None) -> dict:
                         continue
 
                     scan_file_target_abspath = os.path.join(root, f)
-                    scan_file_target_relpath = os.path.relpath(scan_file_target_abspath, path)
+                    scan_file_target_relpath = os.path.relpath(
+                        scan_file_target_abspath, path)
 
                     matches = scan_rules.match(scan_file_target_abspath)
                     for m in matches:
                         rule_name = m.rule
-                        
                         if rule_name in verbose_rules:
                             # For verbose rules, we only show that the rule was triggered in the matching file
-                            # We're logging appearances once instead of issue-counting
+                            # We're logging appearances once instead of
+                            # issue-counting
                             file_already_reported = any(
                                 finding["location"].startswith(scan_file_target_relpath + ":")
                                 for finding in rule_results[rule_name]
                             )
-                            
                             if not file_already_reported:
                                 finding = {
                                     "location": f"{scan_file_target_relpath}:1",
-                                    "code": f"Rule triggered in file (matches hidden for brevity)",
-                                    'message': m.meta.get("description", f"{rule_name} rule matched")
-                                }
+                                    "code": f'{"Rule triggered in file (matches hidden for brevity)"}',
+                                    'message': m.meta.get(
+                                        "description",
+                                        f"{rule_name} rule matched")}
                                 issues += 1
                                 rule_results[rule_name].append(finding)
                         else:
-                            # For non-verbose rules, show detailed matches as before
+                            # For non-verbose rules, show detailed matches as
+                            # before
                             for s in m.strings:
                                 for i in s.instances:
                                     finding = {
@@ -259,7 +282,10 @@ def analyze_yara(self, path: str, rules: Optional[set] = None) -> dict:
         except Exception as e:
             errors["rules-all"] = f"failed to run rule: {str(e)}"
 
-        return {"results": results | rule_results, "errors": errors, "issues": issues}
+        return {
+            "results": results | rule_results,
+            "errors": errors,
+            "issues": issues}
 
     def analyze_semgrep(self, path, rules=None) -> dict:
         """
@@ -284,10 +310,8 @@ def analyze_semgrep(self, path, rules=None) -> dict:
         errors = {}
         issues = 0
 
-        rules_path = list(map(
-            lambda rule_name: os.path.join(SOURCECODE_RULES_PATH, f"{rule_name}.yml"),
-            all_rules
-        ))
+        rules_path = list(map(lambda rule_name: os.path.join(
+            SOURCECODE_RULES_PATH, f"{rule_name}.yml"), all_rules))
 
         if len(rules_path) == 0:
             log.debug("No semgrep code rules to run")
@@ -296,7 +320,8 @@ def analyze_semgrep(self, path, rules=None) -> dict:
         try:
             log.debug(f"Running semgrep code rules against {path}")
             response = self._invoke_semgrep(target=path, rules=rules_path)
-            rule_results = self._format_semgrep_response(response, targetpath=targetpath)
+            rule_results = self._format_semgrep_response(
+                response, targetpath=targetpath)
             issues += sum(len(res) for res in rule_results.values())
 
             results = results | rule_results
@@ -308,7 +333,9 @@ def analyze_semgrep(self, path, rules=None) -> dict:
     def _invoke_semgrep(self, target: str, rules: Iterable[str]):
         try:
             SEMGREP_MAX_TARGET_BYTES = int(
-                os.getenv("GUARDDOG_SEMGREP_MAX_TARGET_BYTES", MAX_BYTES_DEFAULT))
+                os.getenv(
+                    "GUARDDOG_SEMGREP_MAX_TARGET_BYTES",
+                    MAX_BYTES_DEFAULT))
             SEMGREP_TIMEOUT = int(
                 os.getenv("GUARDDOG_SEMGREP_TIMEOUT", SEMGREP_TIMEOUT_DEFAULT))
             cmd = ["semgrep"]
@@ -325,7 +352,11 @@ def _invoke_semgrep(self, target: str, rules: Iterable[str]):
             cmd.append(f"--max-target-bytes={SEMGREP_MAX_TARGET_BYTES}")
             cmd.append(target)
             log.debug(f"Invoking semgrep with command line: {' '.join(cmd)}")
-            result = subprocess.run(cmd, capture_output=True, check=True, encoding="utf-8")
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                check=True,
+                encoding="utf-8")
             return json.loads(str(result.stdout))
         except FileNotFoundError:
             raise Exception("unable to find semgrep binary")
@@ -379,9 +410,9 @@ def _format_semgrep_response(self, response, rule=None, targetpath=None):
             file_path = os.path.abspath(result["path"])
             code = self.trim_code_snippet(
                 self.get_snippet(
-                    file_path=file_path, start_line=start_line, end_line=end_line
-                )
-            )
+                    file_path=file_path,
+                    start_line=start_line,
+                    end_line=end_line))
             if targetpath:
                 file_path = os.path.relpath(file_path, targetpath)
 
@@ -400,7 +431,11 @@ def _format_semgrep_response(self, response, rule=None, targetpath=None):
 
         return results
 
-    def get_snippet(self, file_path: str, start_line: int, end_line: int) -> str:
+    def get_snippet(
+            self,
+            file_path: str,
+            start_line: int,
+            end_line: int) -> str:
         """
         Returns the code snippet between start_line and stop_line in a file
 
 
@@ -15,4 +15,4 @@
 
 for detectorClass in classes:
     detectorInstance = detectorClass()  # type: ignore
-    EXTENSION_METADATA_RULES[detectorInstance.get_name()] = detectorInstance 
+    EXTENSION_METADATA_RULES[detectorInstance.get_name()] = detectorInstance
@@ -13,26 +13,35 @@
 class ExtensionEmptyInfoDetector(EmptyInfoDetector):
     """Detects extensions with empty description information"""
 
-    def detect(self, package_info, path: Optional[str] = None, name: Optional[str] = None,
-               version: Optional[str] = None) -> tuple[bool, str]:
-        
-        log.debug(f"Running extension empty description heuristic on extension {name} version {version}")
-        
+    def detect(self,
+               package_info,
+               path: Optional[str] = None,
+               name: Optional[str] = None,
+               version: Optional[str] = None) -> tuple[bool,
+                                                       str]:
+
+        log.debug(
+            f"Running extension empty description heuristic on extension {name} version {version}")
+
         if not package_info or not isinstance(package_info, dict):
             return True, "Extension has no package information"
-        
+
         manifest = package_info.get("manifest", {})
         marketplace = package_info.get("marketplace", {})
         source = package_info.get("source", "unknown")
-        
+
         return self._detect_with_metadata(manifest, marketplace, source)
-    
-    def _detect_with_metadata(self, manifest: dict, marketplace: dict, source: str) -> tuple[bool, str]:
+
+    def _detect_with_metadata(self,
+                              manifest: dict,
+                              marketplace: dict,
+                              source: str) -> tuple[bool,
+                                                    str]:
         """Detect empty information with pre-extracted metadata"""
-        
+
         manifest_description = manifest.get("description", "").strip()
         manifest_display_name = manifest.get("displayName", "").strip()
-        
+
         if not manifest_description and not manifest_display_name:
             return True, self.MESSAGE_TEMPLATE % "Extension Marketplace (manifest)"
-        return False, ""
+        return False, ""