Skip to content

Commit 2e7999d

Browse files
authored
Merge pull request #527 from juliendoutre/julien.doutre/github-actions-support
Add new Github Action ecosystem
2 parents 8109e69 + 2343238 commit 2e7999d

File tree

8 files changed

+130
-20
lines changed

8 files changed

+130
-20
lines changed

README.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,21 @@ Metadata heuristics:
155155
| typosquatting | Identify packages that are named closely to an highly popular package |
156156

157157

158+
### GitHub Action
159+
160+
Source code heuristics:
161+
162+
| **Heuristic** | **Description** |
163+
|:-------------:|:---------------:|
164+
| npm-serialize-environment | Identify when a package serializes 'process.env' to exfiltrate environment variables |
165+
| npm-obfuscation | Identify when a package uses a common obfuscation method often used by malware |
166+
| npm-silent-process-execution | Identify when a package silently executes an executable |
167+
| shady-links | Identify when a package contains an URL to a domain with a suspicious extension |
168+
| npm-exec-base64 | Identify when a package dynamically executes code through 'eval' |
169+
| npm-install-script | Identify when a package has a pre or post-install script automatically running commands |
170+
| npm-steganography | Identify when a package retrieves hidden data from an image and executes it |
171+
| npm-dll-hijacking | Identifies when a malicious package manipulates a trusted application into loading a malicious DLL |
172+
| npm-exfiltrate-sensitive-data | Identify when a package reads and exfiltrates sensitive data from the local system |
158173
<!-- END_RULE_LIST -->
159174

160175
## Custom Rules

guarddog/analyzer/metadata/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from guarddog.analyzer.metadata.npm import NPM_METADATA_RULES
33
from guarddog.analyzer.metadata.pypi import PYPI_METADATA_RULES
44
from guarddog.analyzer.metadata.go import GO_METADATA_RULES
5+
from guarddog.analyzer.metadata.github_action import GITHUB_ACTION_METADATA_RULES
56
from guarddog.ecosystems import ECOSYSTEM
67

78

@@ -13,3 +14,5 @@ def get_metadata_detectors(ecosystem: ECOSYSTEM) -> dict[str, Detector]:
1314
return NPM_METADATA_RULES
1415
case ECOSYSTEM.GO:
1516
return GO_METADATA_RULES
17+
case ECOSYSTEM.GITHUB_ACTION:
18+
return GITHUB_ACTION_METADATA_RULES
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from typing import Type
2+
3+
from guarddog.analyzer.metadata import Detector
4+
5+
GITHUB_ACTION_METADATA_RULES = {}
6+
7+
classes: list[Type[Detector]] = []
8+
9+
for detectorClass in classes:
10+
detectorInstance = detectorClass() # type: ignore
11+
GITHUB_ACTION_METADATA_RULES[detectorInstance.get_name()] = detectorInstance

guarddog/analyzer/sourcecode/__init__.py

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -71,34 +71,36 @@ def get_sourcecode_rules(
7171
data = yaml.load(fd, Loader=SafeLoader)
7272
for rule in data["rules"]:
7373
for lang in rule["languages"]:
74-
ecosystem = None
74+
ecosystems = set()
7575
match lang:
7676
case "python":
77-
ecosystem = ECOSYSTEM.PYPI
77+
ecosystems.add(ECOSYSTEM.PYPI)
7878
case "javascript" | "typescript" | "json":
79-
ecosystem = ECOSYSTEM.NPM
79+
ecosystems.add(ECOSYSTEM.NPM)
80+
ecosystems.add(ECOSYSTEM.GITHUB_ACTION)
8081
case "go":
81-
ecosystem = ECOSYSTEM.GO
82+
ecosystems.add(ECOSYSTEM.GO)
8283
case _:
8384
continue
8485

85-
# avoids duplicates when multiple languages are supported by a rule
86-
if not next(
87-
filter(
88-
lambda r: r.id == rule["id"],
89-
get_sourcecode_rules(ecosystem, SempgrepRule),
90-
),
91-
None,
92-
):
93-
SOURCECODE_RULES.append(
94-
SempgrepRule(
95-
id=rule["id"],
96-
ecosystem=ecosystem,
97-
description=rule.get("metadata", {}).get("description", ""),
98-
file=file_name,
99-
rule_content=rule,
86+
for ecosystem in ecosystems:
87+
# avoids duplicates when multiple languages are supported by a rule
88+
if not next(
89+
filter(
90+
lambda r: r.id == rule["id"],
91+
get_sourcecode_rules(ecosystem, SempgrepRule),
92+
),
93+
None,
94+
):
95+
SOURCECODE_RULES.append(
96+
SempgrepRule(
97+
id=rule["id"],
98+
ecosystem=ecosystem,
99+
description=rule.get("metadata", {}).get("description", ""),
100+
file=file_name,
101+
rule_content=rule,
102+
)
100103
)
101-
)
102104

103105
yara_rule_file_names = list(
104106
filter(lambda x: x.endswith("yar"), os.listdir(current_dir))

guarddog/ecosystems.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ class ECOSYSTEM(Enum):
55
PYPI = "pypi"
66
NPM = "npm"
77
GO = "go"
8+
GITHUB_ACTION = "github-action"
89

910

1011
def get_friendly_name(ecosystem: ECOSYSTEM) -> str:
@@ -15,5 +16,7 @@ def get_friendly_name(ecosystem: ECOSYSTEM) -> str:
1516
return "npm"
1617
case ECOSYSTEM.GO:
1718
return "go"
19+
case ECOSYSTEM.GITHUB_ACTION:
20+
return "GitHub Action"
1821
case _:
1922
return ecosystem.value

guarddog/scanners/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from .pypi_project_scanner import PypiRequirementsScanner
77
from .go_package_scanner import GoModuleScanner
88
from .go_project_scanner import GoDependenciesScanner
9+
from .github_action_scanner import GithubActionScanner
910
from .scanner import PackageScanner, ProjectScanner
1011
from ..ecosystems import ECOSYSTEM
1112

@@ -29,6 +30,8 @@ def get_package_scanner(ecosystem: ECOSYSTEM) -> Optional[PackageScanner]:
2930
return NPMPackageScanner()
3031
case ECOSYSTEM.GO:
3132
return GoModuleScanner()
33+
case ECOSYSTEM.GITHUB_ACTION:
34+
return GithubActionScanner()
3235
return None
3336

3437

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import logging
2+
import os
3+
import pathlib
4+
import typing
5+
from urllib.parse import urlparse
6+
7+
from guarddog.analyzer.analyzer import Analyzer
8+
from guarddog.ecosystems import ECOSYSTEM
9+
from guarddog.scanners.scanner import PackageScanner
10+
11+
log = logging.getLogger("guarddog")
12+
13+
14+
class GithubActionScanner(PackageScanner):
15+
def __init__(self) -> None:
16+
super().__init__(Analyzer(ECOSYSTEM.GITHUB_ACTION))
17+
18+
def download_and_get_package_info(self, directory: str, package_name: str, version=None) -> typing.Tuple[dict, str]:
19+
repo = self._get_repo(package_name)
20+
tarball_url = self._get_git_tarball_url(repo, version)
21+
22+
log.debug(f"Downloading GitHub Action source from {tarball_url}")
23+
24+
file_extension = pathlib.Path(tarball_url).suffix
25+
if file_extension == "":
26+
file_extension = ".zip"
27+
28+
zippath = os.path.join(directory, package_name.replace("/", "-") + file_extension)
29+
unzippedpath = zippath.removesuffix(file_extension)
30+
self.download_compressed(tarball_url, zippath, unzippedpath)
31+
32+
return {}, unzippedpath
33+
34+
def _get_repo(self, url: str) -> str:
35+
parsed_url = urlparse(url)
36+
37+
if parsed_url.hostname and parsed_url.hostname != "github.com":
38+
raise ValueError("Invalid GitHub repo URL: " + url)
39+
40+
path = parsed_url.path.removesuffix(".git").strip("/")
41+
42+
if path.count("/") != 1:
43+
raise ValueError("Invalid GitHub repo name: " + path)
44+
45+
return path
46+
47+
def _get_git_tarball_url(self, repo: str, version=None) -> str:
48+
if not version:
49+
return f"https://api.github.com/repos/{repo}/zipball"
50+
else:
51+
return f"https://github.com/{repo}/archive/refs/tags/{version}.zip"
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import os.path
2+
import tempfile
3+
4+
import pytest
5+
6+
from guarddog.scanners import GithubActionScanner
7+
8+
9+
def test_download_and_get_github_action_by_url():
10+
scanner = GithubActionScanner()
11+
with tempfile.TemporaryDirectory() as tmpdirname:
12+
data, path = scanner.download_and_get_package_info(tmpdirname, "https://github.com/actions/checkout.git", "v4.2.2")
13+
assert not data
14+
assert os.path.exists(os.path.join(tmpdirname, "https:--github.com-actions-checkout.git", "checkout-4.2.2", "package.json"))
15+
16+
17+
def test_download_and_get_github_action_by_name():
18+
scanner = GithubActionScanner()
19+
with tempfile.TemporaryDirectory() as tmpdirname:
20+
data, path = scanner.download_and_get_package_info(tmpdirname, "actions/checkout", "v4.2.2")
21+
assert not data
22+
assert os.path.exists(os.path.join(tmpdirname, "actions-checkout", "checkout-4.2.2", "package.json"))

0 commit comments

Comments
 (0)