Skip to content

Commit d2d1494

Browse files
sacca97copernico
authored andcommitted
commented out unused code, other minor changes
1 parent ae1e8cd commit d2d1494

File tree

4 files changed

+85
-96
lines changed

4 files changed

+85
-96
lines changed

prospector/client/cli/prospector_client.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,15 +131,16 @@ def prospector( # noqa: C901
131131
preprocessed_commits: List[Commit] = list()
132132

133133
if len(missing) > 0:
134+
134135
pbar = tqdm(missing, desc="Preprocessing commits", unit="commit")
135136
with Counter(
136137
timer.collection.sub_collection("commit preprocessing")
137138
) as counter:
138139
counter.initialize("preprocessed commits", unit="commit")
139140
for raw_commit in pbar:
140141
counter.increment("preprocessed commits")
142+
141143
raw_commit.set_tags(next_tag)
142-
# TODO: here we need to check twins with the commit not already in the backend and update everything
143144
preprocessed_commits.append(make_from_raw_commit(raw_commit))
144145
else:
145146
writer.print("\nAll commits found in the backend")
@@ -160,7 +161,7 @@ def prospector( # noqa: C901
160161

161162
# apply rules and rank candidates
162163
ranked_candidates = evaluate_commits(preprocessed_commits, advisory_record, rules)
163-
164+
# TODO: if a twin has higher relevance than the one displayed, the relevance should be intherited
164165
twin_branches_map = {
165166
commit.commit_id: commit.get_tag() for commit in ranked_candidates
166167
}

prospector/client/cli/templates/filtering_scripts.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
relevanceRangeOutput = document.getElementById("relevanceRangeOutput");
1616
relevanceRangeOutput.innerHTML = relevance;
1717
for (let card of commit_cards) {
18-
if (parseInt(card.dataset.relevances) > parseInt(relevance)) {
18+
if (parseInt(card.dataset.relevances) >= parseInt(relevance)) {
1919
card.classList.replace('d-none', 'd-flex')
2020
} else {
2121
card.classList.replace('d-flex', 'd-none')
@@ -27,7 +27,7 @@
2727
showFromRelevance(this.value);
2828

2929
}
30-
max = parseInt(document.getElementsByClassName('commit')[0].dataset.relevances) - 1;
30+
max = parseInt(document.getElementsByClassName('commit')[0].dataset.relevances);
3131
relevanceRange.max = max;
3232
relevanceRange.value = max;
3333
showFromRelevance(max);

prospector/git/git.py

Lines changed: 78 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,7 @@
1919
from git.raw_commit import RawCommit
2020
from log.logger import logger
2121
from stats.execution import execution_statistics, measure_execution_time
22-
from util.lsh import (
23-
build_lsh_index,
24-
compute_minhash,
25-
encode_minhash,
26-
get_encoded_minhash,
27-
)
22+
from util.lsh import get_encoded_minhash
2823

2924
# GIT_CACHE = os.getenv("GIT_CACHE")
3025
GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
@@ -118,7 +113,6 @@ def __init__(
118113
self.shallow_clone = shallow
119114
self.exec = Exec(workdir=self.path)
120115
self.storage = None
121-
# self.lsh_index = build_lsh_index()
122116

123117
def execute(self, cmd: str, silent: bool = False):
124118
return self.exec.run(cmd, silent=silent, cache=True)
@@ -202,7 +196,7 @@ def clone(self, shallow=None, skip_existing=False):
202196
else:
203197
logger.debug(f"Found repo {self.url} in {self.path}.\nFetching....")
204198

205-
self.execute("git fetch --progress --all --tags")
199+
self.execute("git fetch --progress --all --tags --force")
206200
return
207201

208202
if os.path.exists(self.path):
@@ -239,10 +233,6 @@ def clone(self, shallow=None, skip_existing=False):
239233
shutil.rmtree(self.path)
240234
raise e
241235

242-
def get_tags():
243-
cmd = "git log --tags --format=%H - %D"
244-
pass
245-
246236
@measure_execution_time(execution_statistics.sub_collection("core"))
247237
def create_commits(
248238
self,
@@ -259,33 +249,22 @@ def create_commits(
259249
if ancestors_of is None or find_twins:
260250
cmd += " --all"
261251

262-
# by filtering the dates of the tags we can reduce the commit range safely (in theory)
263252
if ancestors_of:
264253
if not find_twins:
265254
cmd += f" {ancestors_of}"
266255
until = self.extract_tag_timestamp(ancestors_of)
256+
cmd += f" --until={until}"
267257
# TODO: if find twins is true, we dont need the ancestors, only the timestamps
268258
if exclude_ancestors_of:
269259
if not find_twins:
270260
cmd += f" ^{exclude_ancestors_of}"
271261
since = self.extract_tag_timestamp(exclude_ancestors_of)
272-
273-
if since:
274262
cmd += f" --since={since}"
275263

276-
if until:
277-
cmd += f" --until={until}"
278-
279-
# for ext in FILTERING_EXTENSIONS:
280-
# cmd += f" *.{ext}"
281-
282264
try:
283265
logger.debug(cmd)
284266
out = self.execute(cmd)
285267
# if --all is used, we are traversing all branches and therefore we can check for twins
286-
287-
# TODO: problem -> twins can be merge commits, same commits for different branches, not only security related fixes
288-
289268
return self.parse_git_output(out, find_twins, ancestors_of)
290269

291270
except Exception:
@@ -302,8 +281,9 @@ def parse_git_output(
302281
if line == GIT_SEPARATOR:
303282
if sector == 3:
304283
sector = 1
305-
if 0 < len(commit.changed_files) < 100:
284+
if 0 < len(commit.changed_files) < 100 and len(commit.msg) < 5000:
306285
commit.msg = commit.msg.strip()
286+
307287
# TODO: should work here
308288
# commit.set_tags(next_tag)
309289
if find_twins:
@@ -326,7 +306,14 @@ def parse_git_output(
326306
elif sector == 2:
327307
commit.msg += line + " "
328308
elif sector == 3 and not any(
329-
x in line for x in ("test", ".md", "/docs/")
309+
x in line
310+
for x in (
311+
"test",
312+
".md",
313+
"docs/",
314+
".meta",
315+
".utf8",
316+
) # TODO: build a list for these. If there are no . then is not relevant
330317
):
331318
commit.add_changed_file(line)
332319

@@ -360,71 +347,71 @@ def get_issues(self, since=None) -> Dict[str, str]:
360347
break
361348
r = requests.get(query_url, params=params, headers=headers)
362349

363-
# @measure_execution_time(execution_statistics.sub_collection("core"))
364-
def get_commits(
365-
self,
366-
ancestors_of=None,
367-
exclude_ancestors_of=None,
368-
since=None,
369-
until=None,
370-
find_in_code="",
371-
find_in_msg="",
372-
):
373-
cmd = "git log --format=%H"
374-
375-
if ancestors_of is None:
376-
cmd += " --all"
377-
378-
# by filtering the dates of the tags we can reduce the commit range safely (in theory)
379-
if ancestors_of:
380-
cmd += f" {ancestors_of}"
381-
until = self.extract_tag_timestamp(ancestors_of)
382-
383-
if exclude_ancestors_of:
384-
cmd += f" ^{exclude_ancestors_of}"
385-
since = self.extract_tag_timestamp(exclude_ancestors_of)
386-
387-
if since:
388-
cmd += f" --since={since}"
389-
390-
if until:
391-
cmd += f" --until={until}"
392-
393-
for ext in FILTERING_EXTENSIONS:
394-
cmd += f" *.{ext}"
395-
396-
# What is this??
397-
if find_in_code:
398-
cmd += f" -S{find_in_code}"
399-
400-
if find_in_msg:
401-
cmd += f" --grep={find_in_msg}"
402-
403-
try:
404-
logger.debug(cmd)
405-
out = self.execute(cmd)
406-
407-
except Exception:
408-
logger.error("Git command failed, cannot get commits", exc_info=True)
409-
out = []
410-
411-
return out
412-
413-
def get_commits_between_two_commit(self, commit_from: str, commit_to: str):
414-
"""
415-
Return the commits between the start commit and the end commmit if there are path between them or empty list
416-
"""
417-
try:
418-
cmd = f"git rev-list --ancestry-path {commit_from}..{commit_to}"
419-
420-
path = self.execute(cmd) # ???
421-
if len(path) > 0:
422-
path.pop(0)
423-
path.reverse()
424-
return path
425-
except:
426-
logger.error("Failed to obtain commits, details below:", exc_info=True)
427-
return []
350+
# # @measure_execution_time(execution_statistics.sub_collection("core"))
351+
# def get_commits(
352+
# self,
353+
# ancestors_of=None,
354+
# exclude_ancestors_of=None,
355+
# since=None,
356+
# until=None,
357+
# find_in_code="",
358+
# find_in_msg="",
359+
# ):
360+
# cmd = "git log --format=%H"
361+
362+
# if ancestors_of is None:
363+
# cmd += " --all"
364+
365+
# # by filtering the dates of the tags we can reduce the commit range safely (in theory)
366+
# if ancestors_of:
367+
# cmd += f" {ancestors_of}"
368+
# until = self.extract_tag_timestamp(ancestors_of)
369+
370+
# if exclude_ancestors_of:
371+
# cmd += f" ^{exclude_ancestors_of}"
372+
# since = self.extract_tag_timestamp(exclude_ancestors_of)
373+
374+
# if since:
375+
# cmd += f" --since={since}"
376+
377+
# if until:
378+
# cmd += f" --until={until}"
379+
380+
# for ext in FILTERING_EXTENSIONS:
381+
# cmd += f" *.{ext}"
382+
383+
# # What is this??
384+
# if find_in_code:
385+
# cmd += f" -S{find_in_code}"
386+
387+
# if find_in_msg:
388+
# cmd += f" --grep={find_in_msg}"
389+
390+
# try:
391+
# logger.debug(cmd)
392+
# out = self.execute(cmd)
393+
394+
# except Exception:
395+
# logger.error("Git command failed, cannot get commits", exc_info=True)
396+
# out = []
397+
398+
# return out
399+
400+
# def get_commits_between_two_commit(self, commit_from: str, commit_to: str):
401+
# """
402+
# Return the commits between the start commit and the end commmit if there are path between them or empty list
403+
# """
404+
# try:
405+
# cmd = f"git rev-list --ancestry-path {commit_from}..{commit_to}"
406+
407+
# path = self.execute(cmd) # ???
408+
# if len(path) > 0:
409+
# path.pop(0)
410+
# path.reverse()
411+
# return path
412+
# except:
413+
# logger.error("Failed to obtain commits, details below:", exc_info=True)
414+
# return []
428415

429416
@measure_execution_time(execution_statistics.sub_collection("core"))
430417
def get_commit(self, id):

prospector/git/raw_commit.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ def _get_timing_data(self):
300300
return (tag, tag_timestamp, commit_timestamp, time_delta)
301301

302302
def get_tags(self):
303+
return self.tag
303304
cmd = f"git tag --contains {self.id}"
304305
# cmd = f"git log --format=oneline" # --date=unix --decorate=short"
305306
tags = self.execute(cmd)
@@ -316,7 +317,7 @@ def find_tag(self, relevant_tag: str = ""):
316317
tag = self.execute(cmd)
317318
if tag[0] != "undefined":
318319
return re.sub(r"[~^]\w*", "", tag[0])
319-
return ""
320+
return "no-tag"
320321

321322
def get_next_tag(self):
322323
data = self.get_timing_data()

0 commit comments

Comments
 (0)