Skip to content

Commit 279a999

Browse files
sacca97copernico
authored andcommitted
various refactoring, brought back files extension filtering, unified version/tag interval
1 parent 47a36c6 commit 279a999

File tree

1 file changed

+49
-45
lines changed

1 file changed

+49
-45
lines changed

prospector/client/cli/prospector_client.py

Lines changed: 49 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def prospector( # noqa: C901
4040
repository_url: str,
4141
publication_date: str = "",
4242
vuln_descr: str = "",
43-
tag_interval: str = "",
43+
# tag_interval: str = "",
4444
version_interval: str = "",
4545
modified_files: Set[str] = set(),
4646
advisory_keywords: Set[str] = set(),
@@ -56,6 +56,8 @@ def prospector( # noqa: C901
5656
rules: List[str] = ["ALL"],
5757
) -> Tuple[List[Commit], AdvisoryRecord]:
5858

59+
logger.debug(f"time-limit before: {TIME_LIMIT_BEFORE}")
60+
logger.debug(f"time-limit after: {TIME_LIMIT_AFTER}")
5961
logger.debug("begin main commit and CVE processing")
6062

6163
# construct an advisory record
@@ -67,33 +69,33 @@ def prospector( # noqa: C901
6769
fetch_references,
6870
use_nvd,
6971
publication_date,
70-
advisory_keywords,
71-
modified_files,
72+
set(advisory_keywords),
73+
set(modified_files),
7274
)
7375

7476
# obtain a repository object
7577
repository = Git(repository_url, git_cache)
7678

77-
with ConsoleWriter("Git repository cloning") as _:
78-
logger.info(f"Downloading repository {repository.url} in {repository.path}")
79+
with ConsoleWriter("Git repository cloning") as console:
80+
logger.debug(f"Downloading repository {repository.url} in {repository.path}")
7981
repository.clone()
8082

8183
tags = repository.get_tags()
8284

8385
logger.debug(f"Found tags: {tags}")
8486
logger.info(f"Done retrieving {repository.url}")
8587

86-
if tag_interval is not None:
87-
prev_tag, next_tag = tag_interval.split(":")
88-
elif version_interval is not None:
88+
# if tag_interval and len(tag_interval) > 0:
89+
# prev_tag, next_tag = tag_interval.split(":")
90+
if version_interval and len(version_interval) > 0:
8991
prev_tag, next_tag = get_possible_tags(tags, version_interval)
92+
ConsoleWriter.print(f"Found tags: {prev_tag} - {next_tag}")
93+
ConsoleWriter.print_(MessageStatus.OK)
9094
else:
91-
logger.error("No version/tag interval provided")
95+
logger.info("No version/tag interval provided")
96+
console.print("No interval provided", status=MessageStatus.ERROR)
9297
sys.exit(1)
9398

94-
ConsoleWriter.print(f"Found tags: {prev_tag} - {next_tag}")
95-
ConsoleWriter.print_(MessageStatus.OK)
96-
9799
# retrieve of commit candidates
98100
candidates = get_candidates(
99101
advisory_record,
@@ -105,6 +107,8 @@ def prospector( # noqa: C901
105107
limit_candidates,
106108
)
107109

110+
candidates = filter(candidates)
111+
108112
with ExecutionTimer(
109113
core_statistics.sub_collection("commit preprocessing")
110114
) as timer:
@@ -132,50 +136,41 @@ def prospector( # noqa: C901
132136

133137
if len(missing) > 0:
134138

135-
pbar = tqdm(missing, desc="Preprocessing commits", unit="commit")
139+
pbar = tqdm(
140+
missing,
141+
desc="Preprocessing commits",
142+
unit="commit",
143+
)
136144
with Counter(
137145
timer.collection.sub_collection("commit preprocessing")
138146
) as counter:
139147
counter.initialize("preprocessed commits", unit="commit")
140148
for raw_commit in pbar:
141149
counter.increment("preprocessed commits")
142-
143-
raw_commit.set_tags(next_tag)
144150
preprocessed_commits.append(make_from_raw_commit(raw_commit))
145151
else:
146152
writer.print("\nAll commits found in the backend")
147153

148154
pretty_log(logger, advisory_record)
149-
logger.debug(
150-
f"preprocessed {len(preprocessed_commits)} commits are only composed of test files"
151-
)
155+
152156
payload = [c.to_dict() for c in preprocessed_commits]
153157

154158
if len(payload) > 0 and use_backend != "never":
155159
save_preprocessed_commits(backend_address, payload)
156160
else:
157161
logger.warning("Preprocessed commits are not being sent to backend")
158162

159-
# filter commits
160-
preprocessed_commits = filter(preprocessed_commits)
161-
162-
# apply rules and rank candidates
163163
ranked_candidates = evaluate_commits(preprocessed_commits, advisory_record, rules)
164-
# TODO: if a twin has higher relevance than the one displayed, the relevance should be intherited
165-
twin_branches_map = {
166-
commit.commit_id: commit.get_tag() for commit in ranked_candidates
167-
}
164+
168165
ConsoleWriter.print("Commit ranking and aggregation...")
169-
ranked_candidates = tag_and_aggregate_commits(
170-
ranked_candidates, twin_branches_map, next_tag
171-
)
166+
ranked_candidates = tag_and_aggregate_commits(ranked_candidates, next_tag)
172167
ConsoleWriter.print_(MessageStatus.OK)
173168

174169
return ranked_candidates, advisory_record
175170

176171

177-
def filter(commits: List[Commit]) -> List[Commit]:
178-
with ConsoleWriter("Candidate filtering\n") as console:
172+
def filter(commits: Dict[str, RawCommit]) -> Dict[str, RawCommit]:
173+
with ConsoleWriter("\nCandidate filtering\n") as console:
179174
commits, rejected = filter_commits(commits)
180175
if rejected > 0:
181176
console.print(f"Dropped {rejected} candidates")
@@ -190,20 +185,27 @@ def evaluate_commits(commits: List[Commit], advisory: AdvisoryRecord, rules: Lis
190185
return ranked_commits
191186

192187

193-
def tag_and_aggregate_commits(
194-
commits: List[Commit], mapping_dict: Dict[str, str], next_tag: str
195-
) -> List[Commit]:
188+
def tag_and_aggregate_commits(commits: List[Commit], next_tag: str) -> List[Commit]:
196189
if next_tag is None:
197190
return commits
191+
192+
twin_tags_map = {commit.commit_id: commit.get_tag() for commit in commits}
198193
tagged_commits = list()
199-
# if a twin has higher relevance than the one shown, then the relevance should be inherited
200194
for commit in commits:
201-
if commit.has_tag() and next_tag == commit.get_tag():
195+
if commit.has_tag(next_tag):
196+
commit.tags = [next_tag]
202197
for twin in commit.twins:
203-
twin[0] = mapping_dict[twin[1]]
204-
198+
twin[0] = twin_tags_map.get(twin[1], "no-tag")
205199
tagged_commits.append(commit)
206200

201+
# for commit in commits:
202+
# if commit.has_tag() and next_tag == commit.get_tag():
203+
# for twin in commit.twins:
204+
# twin[0] = mapping_dict[twin[1]]
205+
206+
# tagged_commits.append(commit)
207+
# See the order of the tag list in the commits listed as twin to get the correct tag
208+
207209
return tagged_commits
208210

209211

@@ -237,8 +239,11 @@ def retrieve_preprocessed_commits(
237239
]
238240

239241
logger.error(f"Missing {len(missing)} commits")
240-
241-
return missing, [Commit.parse_obj(rc) for rc in retrieved_commits]
242+
commits = [Commit.parse_obj(rc) for rc in retrieved_commits]
243+
# Sets the tags
244+
# for commit in commits:
245+
# commit.tags = candidates[commit.commit_id].tags
246+
return (missing, commits)
242247

243248

244249
def save_preprocessed_commits(backend_address, payload):
@@ -287,14 +292,13 @@ def get_candidates(
287292
if advisory_record.published_timestamp:
288293
since = advisory_record.published_timestamp - time_limit_before
289294
until = advisory_record.published_timestamp + time_limit_after
290-
# Here i need to strip the github tags of useless stuff
291-
# This is now a list of raw commits
292-
# TODO: get_commits replaced for now
295+
293296
candidates = repository.create_commits(
294297
since=since,
295298
until=until,
296-
ancestors_of=next_tag,
297-
exclude_ancestors_of=prev_tag,
299+
next_tag=next_tag,
300+
prev_tag=prev_tag,
301+
filter_extension=advisory_record.files_extension,
298302
)
299303

300304
core_statistics.record("candidates", len(candidates), unit="commits")

0 commit comments

Comments
 (0)