Skip to content

Commit 33f5278

Browse files
author
Mohammed Diaa
authored
Merge pull request #26 from trickest/delay-flag
Delay and adjust delay flags
2 parents 28ea930 + 2b27a2e commit 33f5278

File tree

2 files changed

+184
-90
lines changed

2 files changed

+184
-90
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ docker pull trickest/find-gh-poc
2121
GraphQL search query
2222
-query-file string
2323
File to read GraphQL search query from
24+
-adjust-delay
25+
Automatically adjust time delay between requests
26+
-delay int
27+
Time delay after every GraphQL request [ms]
2428
-silent
2529
Don't print JSON output to stdout
2630
-token-string string

main.go

Lines changed: 180 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"os/signal"
1313
"regexp"
1414
"strings"
15+
"sync"
1516
"syscall"
1617
"time"
1718

@@ -25,14 +26,11 @@ const (
2526
CVERegex = "(?i)cve[-–_][0-9]{4}[-–_][0-9]{4,}"
2627
)
2728

28-
var ReadmeQuery struct {
29-
Repository struct {
30-
Object struct {
31-
Blob struct {
32-
Text string
33-
} `graphql:"... on Blob"`
34-
} `graphql:"object(expression: \"HEAD:README.md\")"`
35-
} `graphql:"repository(owner: $owner, name: $name)"`
29+
type RateLimit struct {
30+
Limit int
31+
Remaining int
32+
Cost int
33+
ResetAt time.Time
3634
}
3735

3836
type Repository struct {
@@ -56,27 +54,24 @@ type RepositoryResult struct {
5654
Readme *string `json:"readme,omitempty"`
5755
}
5856

59-
var CVEQuery struct {
60-
Search struct {
61-
RepositoryCount int
62-
PageInfo struct {
63-
EndCursor githubv4.String
64-
StartCursor githubv4.String
65-
}
66-
Edges []struct {
67-
Node struct {
68-
Repo Repository `graphql:"... on Repository"`
69-
}
70-
}
71-
} `graphql:"search(query: $query, type: REPOSITORY, first: 100)"`
57+
var ReadmeQuery struct {
58+
RateLimit RateLimit `graphql:"rateLimit"`
59+
Repository struct {
60+
Object struct {
61+
Blob struct {
62+
Text string
63+
} `graphql:"... on Blob"`
64+
} `graphql:"object(expression: \"HEAD:README.md\")"`
65+
} `graphql:"repository(owner: $owner, name: $name)"`
7266
}
7367

74-
var CVEPaginationQuery struct {
75-
Search struct {
68+
var CVEQuery struct {
69+
RateLimit RateLimit `graphql:"rateLimit"`
70+
Search struct {
7671
RepositoryCount int
7772
PageInfo struct {
7873
EndCursor githubv4.String
79-
StartCursor githubv4.String
74+
HasNextPage bool
8075
}
8176
Edges []struct {
8277
Node struct {
@@ -94,6 +89,12 @@ var (
9489
githubCreateDate = time.Date(2008, 2, 8, 0, 0, 0, 0, time.UTC)
9590
bar = &progressbar.ProgressBar{}
9691
barInitialized = false
92+
requestDelay int
93+
adjustDelay bool
94+
rateLimit *RateLimit
95+
delayMutex = &sync.Mutex{}
96+
outputFile string
97+
silent bool
9798
)
9899

99100
func getReadme(repoUrl string) string {
@@ -105,11 +106,22 @@ func getReadme(repoUrl string) string {
105106
"name": githubv4.String(strings.Trim(urlSplit[len(urlSplit)-1], " ")),
106107
}
107108

109+
errHandle:
110+
start := time.Now()
108111
err := githubV4Client.Query(context.Background(), &ReadmeQuery, variables)
112+
duration := time.Since(start).Milliseconds() - int64(time.Millisecond)
109113
if err != nil {
110-
fmt.Println(err)
111-
return ""
114+
delayMutex.Lock()
115+
rateLimit = &ReadmeQuery.RateLimit
116+
handleGraphQLAPIError(err)
117+
delayMutex.Unlock()
118+
goto errHandle
112119
}
120+
delayMutex.Lock()
121+
rateLimit = &ReadmeQuery.RateLimit
122+
time.Sleep(time.Duration(int64(requestDelay*rateLimit.Cost)*int64(time.Millisecond) - duration))
123+
delayMutex.Unlock()
124+
113125
return ReadmeQuery.Repository.Object.Blob.Text
114126
} else {
115127
return ""
@@ -122,13 +134,24 @@ func getRepos(query string, startingDate time.Time, endingDate time.Time) {
122134
startingDate.Format(time.RFC3339) + ".." + endingDate.Format(time.RFC3339)
123135
variables := map[string]interface{}{
124136
"query": githubv4.String(query),
137+
"after": (*githubv4.String)(nil),
125138
}
126139

140+
errHandle:
141+
start := time.Now()
127142
err := githubV4Client.Query(context.Background(), &CVEQuery, variables)
143+
duration := time.Since(start).Milliseconds() - int64(time.Millisecond)
128144
if err != nil {
129-
fmt.Println(err)
130-
return
145+
delayMutex.Lock()
146+
rateLimit = &CVEQuery.RateLimit
147+
handleGraphQLAPIError(err)
148+
delayMutex.Unlock()
149+
goto errHandle
131150
}
151+
delayMutex.Lock()
152+
rateLimit = &CVEQuery.RateLimit
153+
time.Sleep(time.Duration(int64(requestDelay*rateLimit.Cost)*int64(time.Millisecond) - duration))
154+
delayMutex.Unlock()
132155

133156
maxRepos := CVEQuery.Search.RepositoryCount
134157
if !barInitialized {
@@ -140,6 +163,32 @@ func getRepos(query string, startingDate time.Time, endingDate time.Time) {
140163
progressbar.OptionOnCompletion(func() { fmt.Println() }),
141164
)
142165
barInitialized = true
166+
if adjustDelay {
167+
go func() {
168+
for {
169+
delayMutex.Lock()
170+
remainingRepos := bar.GetMax() - len(reposResults)
171+
remainingRequests := remainingRepos + remainingRepos/100 + 1
172+
if remainingRequests < rateLimit.Remaining {
173+
requestDelay = 0
174+
delayMutex.Unlock()
175+
break
176+
} else {
177+
if rateLimit.Remaining == 0 {
178+
handleGraphQLAPIError(nil)
179+
delayMutex.Unlock()
180+
continue
181+
}
182+
untilNextReset := rateLimit.ResetAt.Sub(time.Now()).Milliseconds()
183+
if untilNextReset < 0 {
184+
untilNextReset = time.Hour.Milliseconds()
185+
}
186+
requestDelay = int(untilNextReset)/rateLimit.Remaining + 1
187+
}
188+
delayMutex.Unlock()
189+
}
190+
}()
191+
}
143192
}
144193
if maxRepos >= 1000 {
145194
dateDif := endingDate.Sub(startingDate) / 2
@@ -170,20 +219,28 @@ func getRepos(query string, startingDate time.Time, endingDate time.Time) {
170219

171220
variables = map[string]interface{}{
172221
"query": githubv4.String(query),
173-
"after": CVEQuery.Search.PageInfo.EndCursor,
222+
"after": githubv4.NewString(CVEQuery.Search.PageInfo.EndCursor),
174223
}
175224
for reposCnt < maxRepos {
176-
time.Sleep(time.Second)
177-
178-
err = githubV4Client.Query(context.Background(), &CVEPaginationQuery, variables)
225+
start = time.Now()
226+
err = githubV4Client.Query(context.Background(), &CVEQuery, variables)
227+
duration = time.Since(start).Milliseconds() - int64(time.Millisecond)
179228
if err != nil {
180-
fmt.Println(err)
229+
delayMutex.Lock()
230+
rateLimit = &CVEQuery.RateLimit
231+
handleGraphQLAPIError(err)
232+
delayMutex.Unlock()
233+
continue
181234
}
235+
delayMutex.Lock()
236+
rateLimit = &CVEQuery.RateLimit
237+
time.Sleep(time.Duration(int64(requestDelay*rateLimit.Cost)*int64(time.Millisecond) - duration))
238+
delayMutex.Unlock()
182239

183-
if len(CVEPaginationQuery.Search.Edges) == 0 {
240+
if len(CVEQuery.Search.Edges) == 0 {
184241
break
185242
}
186-
for _, nodeStruct := range CVEPaginationQuery.Search.Edges {
243+
for _, nodeStruct := range CVEQuery.Search.Edges {
187244
if nodeStruct.Node.Repo.IsEmpty {
188245
continue
189246
}
@@ -203,7 +260,87 @@ func getRepos(query string, startingDate time.Time, endingDate time.Time) {
203260
_ = bar.Add(1)
204261
}
205262

206-
variables["after"] = CVEPaginationQuery.Search.PageInfo.EndCursor
263+
variables["after"] = githubv4.NewString(CVEQuery.Search.PageInfo.EndCursor)
264+
}
265+
}
266+
267+
func handleGraphQLAPIError(err error) {
268+
if err == nil || strings.Contains(err.Error(), "limit exceeded") {
269+
untilNextReset := rateLimit.ResetAt.Sub(time.Now())
270+
if untilNextReset < time.Minute {
271+
rateLimit.ResetAt = time.Now().Add(untilNextReset).Add(time.Hour)
272+
time.Sleep(untilNextReset + 3*time.Second)
273+
return
274+
} else {
275+
processResults()
276+
writeOutput(outputFile, silent)
277+
fmt.Println("\n" + err.Error())
278+
fmt.Println("Next reset at " + rateLimit.ResetAt.Format(time.RFC1123))
279+
os.Exit(0)
280+
}
281+
}
282+
processResults()
283+
writeOutput(outputFile, silent)
284+
fmt.Println("\n" + err.Error())
285+
os.Exit(0)
286+
}
287+
288+
func writeOutput(fileName string, silent bool) {
289+
if len(reposResults) == 0 {
290+
return
291+
}
292+
output, err := os.Create(fileName)
293+
if err != nil {
294+
fmt.Println(err)
295+
fmt.Println("Couldn't create output file")
296+
}
297+
defer output.Close()
298+
299+
for id, repoURLs := range reposPerCVE {
300+
for _, r := range repoURLs {
301+
_, _ = io.WriteString(output, id+" - "+r+"\n")
302+
}
303+
}
304+
305+
if !silent {
306+
data, _ := json.MarshalIndent(reposResults, "", " ")
307+
fmt.Println(string(data))
308+
}
309+
}
310+
311+
func processResults() {
312+
re := regexp.MustCompile(CVERegex)
313+
314+
for i, repo := range reposResults {
315+
ids := make(map[string]bool, 0)
316+
317+
matches := re.FindAllStringSubmatch(repo.Url, -1)
318+
matches = append(matches, re.FindAllStringSubmatch(repo.Description, -1)...)
319+
matches = append(matches, re.FindAllStringSubmatch(*repo.Readme, -1)...)
320+
for _, topic := range repo.Topics {
321+
matches = append(matches, re.FindAllStringSubmatch(topic, -1)...)
322+
}
323+
324+
for _, m := range matches {
325+
if m != nil && len(m) > 0 {
326+
if m[0] != "" {
327+
m[0] = strings.ToUpper(m[0])
328+
m[0] = strings.ReplaceAll(m[0], "_", "-")
329+
ids[strings.ReplaceAll(m[0], "–", "-")] = true
330+
}
331+
}
332+
}
333+
334+
if len(ids) > 0 {
335+
reposResults[i].CVEIDs = make([]string, 0)
336+
for id := range ids {
337+
reposResults[i].CVEIDs = append(reposResults[i].CVEIDs, id)
338+
reposPerCVE[id] = append(reposPerCVE[id], repo.Url)
339+
}
340+
}
341+
342+
reposResults[i].Readme = nil
343+
reposResults[i].Topics = nil
207344
}
208345
}
209346

@@ -212,8 +349,10 @@ func main() {
212349
tokenFile := flag.String("token-file", "", "File to read Github token from")
213350
query := flag.String("query-string", "", "GraphQL search query")
214351
queryFile := flag.String("query-file", "", "File to read GraphQL search query from")
215-
outputFile := flag.String("o", "", "Output file name")
216-
silent := flag.Bool("silent", false, "Don't print JSON output to stdout")
352+
flag.StringVar(&outputFile, "o", "", "Output file name")
353+
flag.BoolVar(&silent, "silent", false, "Don't print JSON output to stdout")
354+
flag.IntVar(&requestDelay, "delay", 0, "Time delay after every GraphQL request [ms]")
355+
flag.BoolVar(&adjustDelay, "adjust-delay", false, "Automatically adjust time delay between requests")
217356
flag.Parse()
218357

219358
go func() {
@@ -225,7 +364,7 @@ func main() {
225364
os.Exit(0)
226365
}()
227366

228-
if (*token == "" && *tokenFile == "") || *outputFile == "" {
367+
if (*token == "" && *tokenFile == "") || outputFile == "" {
229368
fmt.Println("Token and output file must be specified!")
230369
os.Exit(1)
231370
}
@@ -283,56 +422,7 @@ func main() {
283422
searchQuery += " in:readme in:description in:name"
284423
getRepos(searchQuery, githubCreateDate, time.Now().UTC())
285424

286-
if len(reposResults) > 0 {
287-
re := regexp.MustCompile(CVERegex)
288-
289-
for i, repo := range reposResults {
290-
ids := make(map[string]bool, 0)
291-
292-
matches := re.FindAllStringSubmatch(repo.Url, -1)
293-
matches = append(matches, re.FindAllStringSubmatch(repo.Description, -1)...)
294-
matches = append(matches, re.FindAllStringSubmatch(*repo.Readme, -1)...)
295-
for _, topic := range repo.Topics {
296-
matches = append(matches, re.FindAllStringSubmatch(topic, -1)...)
297-
}
298-
299-
for _, m := range matches {
300-
if m != nil && len(m) > 0 {
301-
if m[0] != "" {
302-
m[0] = strings.ToUpper(m[0])
303-
m[0] = strings.ReplaceAll(m[0], "_", "-")
304-
ids[strings.ReplaceAll(m[0], "–", "-")] = true
305-
}
306-
}
307-
}
308-
309-
if len(ids) > 0 {
310-
reposResults[i].CVEIDs = make([]string, 0)
311-
for id := range ids {
312-
reposResults[i].CVEIDs = append(reposResults[i].CVEIDs, id)
313-
reposPerCVE[id] = append(reposPerCVE[id], repo.Url)
314-
}
315-
}
425+
processResults()
426+
writeOutput(outputFile, silent)
316427

317-
reposResults[i].Readme = nil
318-
reposResults[i].Topics = nil
319-
}
320-
321-
output, err := os.Create(*outputFile)
322-
if err != nil {
323-
fmt.Println("Couldn't create output file")
324-
}
325-
defer output.Close()
326-
327-
for id, repoURLs := range reposPerCVE {
328-
for _, r := range repoURLs {
329-
_, _ = io.WriteString(output, id+" - "+r+"\n")
330-
}
331-
}
332-
333-
if !*silent {
334-
data, _ := json.MarshalIndent(reposResults, "", " ")
335-
fmt.Println(string(data))
336-
}
337-
}
338428
}

0 commit comments

Comments
 (0)