@@ -40,7 +40,7 @@ def prospector( # noqa: C901
40
40
repository_url : str ,
41
41
publication_date : str = "" ,
42
42
vuln_descr : str = "" ,
43
- tag_interval : str = "" ,
43
+ # tag_interval: str = "",
44
44
version_interval : str = "" ,
45
45
modified_files : Set [str ] = set (),
46
46
advisory_keywords : Set [str ] = set (),
@@ -56,6 +56,8 @@ def prospector( # noqa: C901
56
56
rules : List [str ] = ["ALL" ],
57
57
) -> Tuple [List [Commit ], AdvisoryRecord ]:
58
58
59
+ logger .debug (f"time-limit before: { TIME_LIMIT_BEFORE } " )
60
+ logger .debug (f"time-limit after: { TIME_LIMIT_AFTER } " )
59
61
logger .debug ("begin main commit and CVE processing" )
60
62
61
63
# construct an advisory record
@@ -67,33 +69,33 @@ def prospector( # noqa: C901
67
69
fetch_references ,
68
70
use_nvd ,
69
71
publication_date ,
70
- advisory_keywords ,
71
- modified_files ,
72
+ set ( advisory_keywords ) ,
73
+ set ( modified_files ) ,
72
74
)
73
75
74
76
# obtain a repository object
75
77
repository = Git (repository_url , git_cache )
76
78
77
- with ConsoleWriter ("Git repository cloning" ) as _ :
78
- logger .info (f"Downloading repository { repository .url } in { repository .path } " )
79
+ with ConsoleWriter ("Git repository cloning" ) as console :
80
+ logger .debug (f"Downloading repository { repository .url } in { repository .path } " )
79
81
repository .clone ()
80
82
81
83
tags = repository .get_tags ()
82
84
83
85
logger .debug (f"Found tags: { tags } " )
84
86
logger .info (f"Done retrieving { repository .url } " )
85
87
86
- if tag_interval is not None :
87
- prev_tag , next_tag = tag_interval .split (":" )
88
- elif version_interval is not None :
88
+ # if tag_interval and len(tag_interval) > 0 :
89
+ # prev_tag, next_tag = tag_interval.split(":")
90
+ if version_interval and len ( version_interval ) > 0 :
89
91
prev_tag , next_tag = get_possible_tags (tags , version_interval )
92
+ ConsoleWriter .print (f"Found tags: { prev_tag } - { next_tag } " )
93
+ ConsoleWriter .print_ (MessageStatus .OK )
90
94
else :
91
- logger .error ("No version/tag interval provided" )
95
+ logger .info ("No version/tag interval provided" )
96
+ console .print ("No interval provided" , status = MessageStatus .ERROR )
92
97
sys .exit (1 )
93
98
94
- ConsoleWriter .print (f"Found tags: { prev_tag } - { next_tag } " )
95
- ConsoleWriter .print_ (MessageStatus .OK )
96
-
97
99
# retrieve of commit candidates
98
100
candidates = get_candidates (
99
101
advisory_record ,
@@ -105,6 +107,8 @@ def prospector( # noqa: C901
105
107
limit_candidates ,
106
108
)
107
109
110
+ candidates = filter (candidates )
111
+
108
112
with ExecutionTimer (
109
113
core_statistics .sub_collection ("commit preprocessing" )
110
114
) as timer :
@@ -132,50 +136,41 @@ def prospector( # noqa: C901
132
136
133
137
if len (missing ) > 0 :
134
138
135
- pbar = tqdm (missing , desc = "Preprocessing commits" , unit = "commit" )
139
+ pbar = tqdm (
140
+ missing ,
141
+ desc = "Preprocessing commits" ,
142
+ unit = "commit" ,
143
+ )
136
144
with Counter (
137
145
timer .collection .sub_collection ("commit preprocessing" )
138
146
) as counter :
139
147
counter .initialize ("preprocessed commits" , unit = "commit" )
140
148
for raw_commit in pbar :
141
149
counter .increment ("preprocessed commits" )
142
-
143
- raw_commit .set_tags (next_tag )
144
150
preprocessed_commits .append (make_from_raw_commit (raw_commit ))
145
151
else :
146
152
writer .print ("\n All commits found in the backend" )
147
153
148
154
pretty_log (logger , advisory_record )
149
- logger .debug (
150
- f"preprocessed { len (preprocessed_commits )} commits are only composed of test files"
151
- )
155
+
152
156
payload = [c .to_dict () for c in preprocessed_commits ]
153
157
154
158
if len (payload ) > 0 and use_backend != "never" :
155
159
save_preprocessed_commits (backend_address , payload )
156
160
else :
157
161
logger .warning ("Preprocessed commits are not being sent to backend" )
158
162
159
- # filter commits
160
- preprocessed_commits = filter (preprocessed_commits )
161
-
162
- # apply rules and rank candidates
163
163
ranked_candidates = evaluate_commits (preprocessed_commits , advisory_record , rules )
164
- # TODO: if a twin has higher relevance than the one displayed, the relevance should be intherited
165
- twin_branches_map = {
166
- commit .commit_id : commit .get_tag () for commit in ranked_candidates
167
- }
164
+
168
165
ConsoleWriter .print ("Commit ranking and aggregation..." )
169
- ranked_candidates = tag_and_aggregate_commits (
170
- ranked_candidates , twin_branches_map , next_tag
171
- )
166
+ ranked_candidates = tag_and_aggregate_commits (ranked_candidates , next_tag )
172
167
ConsoleWriter .print_ (MessageStatus .OK )
173
168
174
169
return ranked_candidates , advisory_record
175
170
176
171
177
- def filter (commits : List [ Commit ]) -> List [ Commit ]:
178
- with ConsoleWriter ("Candidate filtering\n " ) as console :
172
+ def filter (commits : Dict [ str , RawCommit ]) -> Dict [ str , RawCommit ]:
173
+ with ConsoleWriter ("\n Candidate filtering\n " ) as console :
179
174
commits , rejected = filter_commits (commits )
180
175
if rejected > 0 :
181
176
console .print (f"Dropped { rejected } candidates" )
@@ -190,20 +185,27 @@ def evaluate_commits(commits: List[Commit], advisory: AdvisoryRecord, rules: Lis
190
185
return ranked_commits
191
186
192
187
193
- def tag_and_aggregate_commits (
194
- commits : List [Commit ], mapping_dict : Dict [str , str ], next_tag : str
195
- ) -> List [Commit ]:
188
+ def tag_and_aggregate_commits (commits : List [Commit ], next_tag : str ) -> List [Commit ]:
196
189
if next_tag is None :
197
190
return commits
191
+
192
+ twin_tags_map = {commit .commit_id : commit .get_tag () for commit in commits }
198
193
tagged_commits = list ()
199
- # if a twin has higher relevance than the one shown, then the relevance should be inherited
200
194
for commit in commits :
201
- if commit .has_tag () and next_tag == commit .get_tag ():
195
+ if commit .has_tag (next_tag ):
196
+ commit .tags = [next_tag ]
202
197
for twin in commit .twins :
203
- twin [0 ] = mapping_dict [twin [1 ]]
204
-
198
+ twin [0 ] = twin_tags_map .get (twin [1 ], "no-tag" )
205
199
tagged_commits .append (commit )
206
200
201
+ # for commit in commits:
202
+ # if commit.has_tag() and next_tag == commit.get_tag():
203
+ # for twin in commit.twins:
204
+ # twin[0] = mapping_dict[twin[1]]
205
+
206
+ # tagged_commits.append(commit)
207
+ # See the order of the tag list in the commits listed as twin to get the correct tag
208
+
207
209
return tagged_commits
208
210
209
211
@@ -237,8 +239,11 @@ def retrieve_preprocessed_commits(
237
239
]
238
240
239
241
logger .error (f"Missing { len (missing )} commits" )
240
-
241
- return missing , [Commit .parse_obj (rc ) for rc in retrieved_commits ]
242
+ commits = [Commit .parse_obj (rc ) for rc in retrieved_commits ]
243
+ # Sets the tags
244
+ # for commit in commits:
245
+ # commit.tags = candidates[commit.commit_id].tags
246
+ return (missing , commits )
242
247
243
248
244
249
def save_preprocessed_commits (backend_address , payload ):
@@ -287,14 +292,13 @@ def get_candidates(
287
292
if advisory_record .published_timestamp :
288
293
since = advisory_record .published_timestamp - time_limit_before
289
294
until = advisory_record .published_timestamp + time_limit_after
290
- # Here i need to strip the github tags of useless stuff
291
- # This is now a list of raw commits
292
- # TODO: get_commits replaced for now
295
+
293
296
candidates = repository .create_commits (
294
297
since = since ,
295
298
until = until ,
296
- ancestors_of = next_tag ,
297
- exclude_ancestors_of = prev_tag ,
299
+ next_tag = next_tag ,
300
+ prev_tag = prev_tag ,
301
+ filter_extension = advisory_record .files_extension ,
298
302
)
299
303
300
304
core_statistics .record ("candidates" , len (candidates ), unit = "commits" )
0 commit comments