trufflesecurity · camgunz · May 5, 2025 · May 5, 2025 · May 7, 2025 · May 7, 2025
diff --git a/pkg/common/rate_limiter.go b/pkg/common/rate_limiter.go
@@ -0,0 +1,122 @@
+package common
+
+import (
+	"errors"
+	"fmt"
+	"net/http"
+	"sync"
+	"time"
+
+	"github.com/trufflesecurity/trufflehog/v3/pkg/context"
+)
+
+// RateLimit represents a rate limiting implementation. A rate limiter
+// comprises 0 or more limits. Policies should be goroutine safe.
+//
+// Importantly, limits can assume they're only ever used on a single API, and
+// thus can be used in more than one rate limiter. For example, if an API has 2
+// endpoints, one accepts 5r/s and another accepts 1r/s, but both have a limit
+// of total 500r/month, the policy implementing the 500r/month limit should be
+// able to be used in both of the 2 rate limiters for the 5r/s and 1r/s limits.
+type RateLimit interface {
+	// Execute and update execute and update a policy, respectively. These
+	// should:
+	// - Be goroutine safe
+	// - Check if ctx has been canceled
+	// - Not modify req/res
+	// If they return an error, it's combined with errors from the
+	// execution/updating of the other limits. Other limits will still be
+	// executed/updated.
+	//
+	// If waiting/sleeping is required, Execute should do it. Keep in mind,
+	// however, that each policy's Execute method is called serially, so Execute
+	// should *NEVER* sleep for a duration--it should only sleep until a time.
+	// This also means that if an API only returns durations, Update must
+	// immediately convert them into times, and it's recommended to pad these
+	// somewhat.
+	Execute(ctx context.Context, req *http.Request, now time.Time) error
+	Update(ctx context.Context, res *http.Response) error
+}
+
+// RateLimiter provides a facility for rate limiting HTTP requests. To use it:
+// - Create a RateLimiter with its limits
+// - Call .Do instead of what you would normally call to make a request
+// - Process the response (returned from .Do) as normal
+type RateLimiter struct {
+	limits []RateLimit
+}
+
+// Returns a new rate limiter with the given limits.
+func NewRateLimiter(limits ...RateLimit) *RateLimiter {
+	return &RateLimiter{limits: limits}
+}
+
+// Makes an HTTP request subject to the rate limiter's limits.
+func (rl *RateLimiter) Do(
+	ctx context.Context,
+	req *http.Request,
+	makeRequest func() (*http.Response, error),
+) (*http.Response, error) {
+	if len(rl.limits) == 0 {
+		return makeRequest()
+	}
+
+	now := time.Now()
+
+	for i, lim := range rl.limits {
+		if err := ctx.Err(); err != nil {
+			return nil, err
+		}
+
+		// [NOTE] It's maybe better to do this asynchronously, in case an errant
+		//		    limit sleeps for a duration instead of until a specific time, but
+		//			  I haven't thought through that.
+		if err := lim.Execute(ctx, req, now); err != nil {
+			return nil, fmt.Errorf(
+				"error executing rate limit policy %d: %w",
+				i,
+				err,
+			)
+		}
+	}
+
+	res, err := makeRequest()
+	if err != nil {
+		return nil, fmt.Errorf("error making HTTP request: %w", err)
+	}
+
+	// [NOTE] errgroup.Group oddly isn't what we want here. It presumes you want
+	// 			  to stop all other processing if a single task fails (we don't), and
+	// 			  that functionality is the only reason to use it instead of a
+	// 			  WaitGroup.
+	wg := &sync.WaitGroup{}
+	updateErrorLock := &sync.Mutex{}
+	var updateError error = nil
+
+	for i, lim := range rl.limits {
+		wg.Add(1)
+		go func(i int, lim RateLimit) {
+			defer wg.Done()
+
+			if err := lim.Update(ctx, res); err != nil {
+				err = fmt.Errorf("error updating rate limit policy %d: %w", i, err)
+
+				updateErrorLock.Lock()
+				if updateError == nil {
+					updateError = err
+				} else {
+					updateError = errors.Join(updateError, err)
+				}
+				updateErrorLock.Unlock()
+			}
+		}(i, lim)
+	}
+
+	wg.Wait()
+
+	if updateError != nil {
+		return nil, fmt.Errorf("error updating rate limits: %w", updateError)
+	}
+
+	return res, nil
+}
@@ -7,6 +7,7 @@ import (
 	"net/http"
 	"time"
 
+	"github.com/trufflesecurity/trufflehog/v3/pkg/common"
 	"github.com/trufflesecurity/trufflehog/v3/pkg/context"
 	"golang.org/x/time/rate"
 
@@ -183,6 +184,42 @@ type Response struct {
 	UID             string `json:"uid,omitempty"`
 }
 
+// TokenBucketRateLimit implements a basic "requests per second with
+// bursting" rate limiter.
+type TokenBucketRateLimit struct {
+	limiter *rate.Limiter
+}
+
+// Creates a new TokenBucketRateLimit
+// lim: a Limit representing the max number of requests per second
+// burst: max number of requests that can be sent if any requests can be sent
+//
+// This is a (very) thin wrapper around Google's rate limiter.
+func NewTokenBucketRateLimit(lim rate.Limit, burst int) *TokenBucketRateLimit {
+	return &TokenBucketRateLimit{
+		limiter: rate.NewLimiter(rate.Limit(lim), burst),
+	}
+}
+
+func (tp *TokenBucketRateLimit) Execute(
+	ctx context.Context,
+	req *http.Request,
+	now time.Time,
+) error {
+	if err := ctx.Err(); err != nil {
+		return err
+	}
+
+	return tp.limiter.Wait(ctx)
+}
+
+func (tp *TokenBucketRateLimit) Update(
+	ctx context.Context,
+	res *http.Response,
+) error {
+	return nil
+}
+
 // A Client manages communication with the Postman API.
 type Client struct {
 	// HTTP client used to communicate with the API
@@ -193,10 +230,10 @@ type Client struct {
 
 	// Rate limiter needed for Postman API workspace and collection requests. Postman API rate limit
 	// is 10 calls in 10 seconds for GET /collections, GET /workspaces, and GET /workspaces/{id} endpoints.
-	WorkspaceAndCollectionRateLimiter *rate.Limiter
+	WorkspaceAndCollectionRateLimiter *common.RateLimiter
 
 	// Rate limiter needed for Postman API. General rate limit is 300 requests per minute.
-	GeneralRateLimiter *rate.Limiter
+	GeneralRateLimiter *common.RateLimiter
 }
 
 // NewClient returns a new Postman API client.
@@ -208,10 +245,14 @@ func NewClient(postmanToken string) *Client {
 	}
 
 	c := &Client{
-		HTTPClient:                        http.DefaultClient,
-		Headers:                           bh,
-		WorkspaceAndCollectionRateLimiter: rate.NewLimiter(rate.Every(time.Second), 1),
-		GeneralRateLimiter:                rate.NewLimiter(rate.Every(time.Second/5), 1),
+		HTTPClient: http.DefaultClient,
+		Headers:    bh,
+		WorkspaceAndCollectionRateLimiter: common.NewRateLimiter(
+			NewTokenBucketRateLimit(rate.Every(time.Second), 1),
+		),
+		GeneralRateLimiter: common.NewRateLimiter(
+			NewTokenBucketRateLimit(rate.Every(time.Second/5), 1),
+		),
 	}
 
 	return c
@@ -247,13 +288,15 @@ func checkResponseStatus(r *http.Response) error {
 }
 
 // getPostmanResponseBodyBytes makes a request to the Postman API and returns the response body as bytes.
-func (c *Client) getPostmanResponseBodyBytes(ctx context.Context, url string, headers map[string]string) ([]byte, error) {
+func (c *Client) getPostmanResponseBodyBytes(ctx context.Context, url string, headers map[string]string, rl *common.RateLimiter) ([]byte, error) {
 	req, err := c.NewRequest(url, headers)
 	if err != nil {
 		return nil, err
 	}
 
-	resp, err := c.HTTPClient.Do(req)
+	resp, err := rl.Do(ctx, req, func() (*http.Response, error) {
+		return c.HTTPClient.Do(req)
+	})
 	if err != nil {
 		return nil, err
 	}
@@ -280,10 +323,12 @@ func (c *Client) EnumerateWorkspaces(ctx context.Context) ([]Workspace, error) {
 		Workspaces []Workspace `json:"workspaces"`
 	}{}
 
-	if err := c.WorkspaceAndCollectionRateLimiter.Wait(ctx); err != nil {
-		return nil, fmt.Errorf("could not wait for rate limiter during workspaces enumeration getting: %w", err)
-	}
-	body, err := c.getPostmanResponseBodyBytes(ctx, "https://api.getpostman.com/workspaces", nil)
+	body, err := c.getPostmanResponseBodyBytes(
+		ctx,
+		"https://api.getpostman.com/workspaces",
+		nil,
+		c.WorkspaceAndCollectionRateLimiter,
+	)
 	if err != nil {
 		return nil, fmt.Errorf("could not get postman workspace response bytes during enumeration: %w", err)
 	}
@@ -315,10 +360,12 @@ func (c *Client) GetWorkspace(ctx context.Context, workspaceUUID string) (Worksp
 	}{}
 
 	url := fmt.Sprintf(WORKSPACE_URL, workspaceUUID)
-	if err := c.WorkspaceAndCollectionRateLimiter.Wait(ctx); err != nil {
-		return Workspace{}, fmt.Errorf("could not wait for rate limiter during workspace getting: %w", err)
-	}
-	body, err := c.getPostmanResponseBodyBytes(ctx, url, nil)
+	body, err := c.getPostmanResponseBodyBytes(
+		ctx,
+		url,
+		nil,
+		c.WorkspaceAndCollectionRateLimiter,
+	)
 	if err != nil {
 		return Workspace{}, fmt.Errorf("could not get postman workspace (%s) response bytes: %w", workspaceUUID, err)
 	}
@@ -336,10 +383,12 @@ func (c *Client) GetEnvironmentVariables(ctx context.Context, environment_uuid s
 	}{}
 
 	url := fmt.Sprintf(ENVIRONMENTS_URL, environment_uuid)
-	if err := c.GeneralRateLimiter.Wait(ctx); err != nil {
-		return VariableData{}, fmt.Errorf("could not wait for rate limiter during environment variable getting: %w", err)
-	}
-	body, err := c.getPostmanResponseBodyBytes(ctx, url, nil)
+	body, err := c.getPostmanResponseBodyBytes(
+		ctx,
+		url,
+		nil,
+		c.GeneralRateLimiter,
+	)
 	if err != nil {
 		return VariableData{}, fmt.Errorf("could not get postman environment (%s) response bytes: %w", environment_uuid, err)
 	}
@@ -357,10 +406,12 @@ func (c *Client) GetCollection(ctx context.Context, collection_uuid string) (Col
 	}{}
 
 	url := fmt.Sprintf(COLLECTIONS_URL, collection_uuid)
-	if err := c.WorkspaceAndCollectionRateLimiter.Wait(ctx); err != nil {
-		return Collection{}, fmt.Errorf("could not wait for rate limiter during collection getting: %w", err)
-	}
-	body, err := c.getPostmanResponseBodyBytes(ctx, url, nil)
+	body, err := c.getPostmanResponseBodyBytes(
+		ctx,
+		url,
+		nil,
+		c.WorkspaceAndCollectionRateLimiter,
+	)
 	if err != nil {
 		return Collection{}, fmt.Errorf("could not get postman collection (%s) response bytes: %w", collection_uuid, err)
 	}