@@ -5,13 +5,11 @@ import (
5
5
"fmt"
6
6
"math/rand/v2"
7
7
"net/http"
8
- "net/url"
9
8
"os"
10
9
"regexp"
11
10
"sort"
12
11
"strings"
13
12
"sync"
14
- "sync/atomic"
15
13
"time"
16
14
17
15
"github.com/gobwas/glob"
@@ -23,10 +21,8 @@ import (
23
21
24
22
"github.com/trufflesecurity/trufflehog/v3/pkg/cache"
25
23
"github.com/trufflesecurity/trufflehog/v3/pkg/cache/simple"
26
- "github.com/trufflesecurity/trufflehog/v3/pkg/common"
27
24
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
28
25
"github.com/trufflesecurity/trufflehog/v3/pkg/giturl"
29
- "github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
30
26
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
31
27
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
32
28
"github.com/trufflesecurity/trufflehog/v3/pkg/sanitizer"
@@ -323,37 +319,6 @@ func (s *Source) visibilityOf(ctx context.Context, repoURL string) source_metada
323
319
return repoInfo .visibility
324
320
}
325
321
326
- // Chunks emits chunks of bytes over a channel.
327
- func (s * Source ) Chunks (ctx context.Context , chunksChan chan * sources.Chunk , targets ... sources.ChunkingTarget ) error {
328
- chunksReporter := sources.ChanReporter {Ch : chunksChan }
329
- // If targets are provided, we're only scanning the data in those targets.
330
- // Otherwise, we're scanning all data.
331
- // This allows us to only scan the commit where a vulnerability was found.
332
- if len (targets ) > 0 {
333
- errs := s .scanTargets (ctx , targets , chunksReporter )
334
- return errors .Join (errs ... )
335
- }
336
-
337
- // Reset consumption and rate limit metrics on each run.
338
- githubNumRateLimitEncountered .WithLabelValues (s .name ).Set (0 )
339
- githubSecondsSpentRateLimited .WithLabelValues (s .name ).Set (0 )
340
- githubReposScanned .WithLabelValues (s .name ).Set (0 )
341
-
342
- // We don't care about handling enumerated values as they happen during
343
- // the normal Chunks flow because we enumerate and scan in two steps.
344
- noopReporter := sources.VisitorReporter {
345
- VisitUnit : func (context.Context , sources.SourceUnit ) error {
346
- return nil
347
- },
348
- }
349
- err := s .Enumerate (ctx , noopReporter )
350
- if err != nil {
351
- return fmt .Errorf ("error enumerating: %w" , err )
352
- }
353
-
354
- return s .scan (ctx , chunksReporter )
355
- }
356
-
357
322
// Enumerate enumerates the GitHub source based on authentication method and
358
323
// user configuration. It populates s.filteredRepoCache, s.repoInfoCache,
359
324
// s.memberCache, s.totalRepoSize, s.orgsCache, and s.repos. Additionally,
@@ -624,47 +589,6 @@ func createGitHubClient(httpClient *http.Client, apiEndpoint string) (*github.Cl
624
589
return github .NewClient (httpClient ).WithEnterpriseURLs (apiEndpoint , apiEndpoint )
625
590
}
626
591
627
- func (s * Source ) scan (ctx context.Context , reporter sources.ChunkReporter ) error {
628
- var scannedCount uint64 = 1
629
-
630
- ctx .Logger ().V (2 ).Info ("Found repos to scan" , "count" , len (s .repos ))
631
-
632
- // If there is resume information available, limit this scan to only the repos that still need scanning.
633
- reposToScan , progressIndexOffset := sources .FilterReposToResume (s .repos , s .GetProgress ().EncodedResumeInfo )
634
- s .repos = reposToScan
635
-
636
- for i , repoURL := range s .repos {
637
- s .jobPool .Go (func () error {
638
- if common .IsDone (ctx ) {
639
- return nil
640
- }
641
- ctx := context .WithValue (ctx , "repo" , repoURL )
642
-
643
- // TODO: set progress complete is being called concurrently with i
644
- s .setProgressCompleteWithRepo (i , progressIndexOffset , repoURL )
645
- // Ensure the repo is removed from the resume info after being scanned.
646
- defer func (s * Source , repoURL string ) {
647
- s .resumeInfoMutex .Lock ()
648
- defer s .resumeInfoMutex .Unlock ()
649
- s .resumeInfoSlice = sources .RemoveRepoFromResumeInfo (s .resumeInfoSlice , repoURL )
650
- }(s , repoURL )
651
-
652
- if err := s .scanRepo (ctx , repoURL , reporter ); err != nil {
653
- ctx .Logger ().Error (err , "error scanning repo" )
654
- return nil
655
- }
656
-
657
- atomic .AddUint64 (& scannedCount , 1 )
658
- return nil
659
- })
660
- }
661
-
662
- _ = s .jobPool .Wait ()
663
- s .SetProgressComplete (len (s .repos ), len (s .repos ), "Completed GitHub scan" , "" )
664
-
665
- return nil
666
- }
667
-
668
592
// scanRepo attempts to scan the provided URL and any associated wiki and
669
593
// comments if configured. An error is returned if we could not find necessary
670
594
// repository metadata or clone the repo, otherwise all errors are reported to
@@ -1500,70 +1424,6 @@ func (s *Source) chunkPullRequestComments(ctx context.Context, repoInfo repoInfo
1500
1424
return nil
1501
1425
}
1502
1426
1503
- func (s * Source ) scanTargets (ctx context.Context , targets []sources.ChunkingTarget , reporter sources.ChunkReporter ) []error {
1504
- var errs []error
1505
- for _ , tgt := range targets {
1506
- if err := s .scanTarget (ctx , tgt , reporter ); err != nil {
1507
- ctx .Logger ().Error (err , "error scanning target" )
1508
- errs = append (errs , & sources.TargetedScanError {Err : err , SecretID : tgt .SecretID })
1509
- }
1510
- }
1511
-
1512
- return errs
1513
- }
1514
-
1515
- func (s * Source ) scanTarget (ctx context.Context , target sources.ChunkingTarget , reporter sources.ChunkReporter ) error {
1516
- metaType , ok := target .QueryCriteria .GetData ().(* source_metadatapb.MetaData_Github )
1517
- if ! ok {
1518
- return fmt .Errorf ("unable to cast metadata type for targeted scan" )
1519
- }
1520
- meta := metaType .Github
1521
-
1522
- u , err := url .Parse (meta .GetLink ())
1523
- if err != nil {
1524
- return fmt .Errorf ("unable to parse GitHub URL: %w" , err )
1525
- }
1526
-
1527
- // The owner is the second segment and the repo is the third segment of the path.
1528
- // Ex: https://github.com/owner/repo/.....
1529
- segments := strings .Split (u .Path , "/" )
1530
- if len (segments ) < 3 {
1531
- return fmt .Errorf ("invalid GitHub URL" )
1532
- }
1533
-
1534
- readCloser , resp , err := s .connector .APIClient ().Repositories .DownloadContents (
1535
- ctx ,
1536
- segments [1 ],
1537
- segments [2 ],
1538
- meta .GetFile (),
1539
- & github.RepositoryContentGetOptions {Ref : meta .GetCommit ()})
1540
- // As of this writing, if the returned readCloser is not nil, it's just the Body of the returned github.Response, so
1541
- // there's no need to independently close it.
1542
- if resp != nil && resp .Body != nil {
1543
- defer resp .Body .Close ()
1544
- }
1545
- if err != nil {
1546
- return fmt .Errorf ("could not download file for scan: %w" , err )
1547
- }
1548
- if resp .StatusCode != http .StatusOK {
1549
- return fmt .Errorf ("unexpected HTTP response status when trying to download file for scan: %v" , resp .Status )
1550
- }
1551
-
1552
- chunkSkel := sources.Chunk {
1553
- SourceType : s .Type (),
1554
- SourceName : s .name ,
1555
- SourceID : s .SourceID (),
1556
- JobID : s .JobID (),
1557
- SecretID : target .SecretID ,
1558
- SourceMetadata : & source_metadatapb.MetaData {
1559
- Data : & source_metadatapb.MetaData_Github {Github : meta },
1560
- },
1561
- Verify : s .verify ,
1562
- }
1563
- fileCtx := context .WithValues (ctx , "path" , meta .GetFile ())
1564
- return handlers .HandleFile (fileCtx , readCloser , & chunkSkel , reporter )
1565
- }
1566
-
1567
1427
func (s * Source ) ChunkUnit (ctx context.Context , unit sources.SourceUnit , reporter sources.ChunkReporter ) error {
1568
1428
repoURL , _ := unit .SourceUnitID ()
1569
1429
ctx = context .WithValue (ctx , "repo" , repoURL )
0 commit comments