-
Notifications
You must be signed in to change notification settings - Fork 1.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Custom regex parallel verify #1127
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ import ( | |
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors" | ||
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/custom_detectorspb" | ||
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" | ||
"golang.org/x/sync/errgroup" | ||
) | ||
|
||
// The maximum number of matches from one chunk. This const is used when | ||
|
@@ -82,66 +83,98 @@ func (c *customRegexWebhook) FromData(ctx context.Context, verify bool, data []b | |
// ] | ||
matches := permutateMatches(regexMatches) | ||
|
||
g := new(errgroup.Group) | ||
|
||
// Create result object and test for verification. | ||
resultsCh := make(chan detectors.Result, maxTotalMatches) | ||
for _, match := range matches { | ||
match := match | ||
g.Go(func() error { | ||
err := c.createResults(ctx, match, verify, resultsCh) | ||
0x1 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return err | ||
}) | ||
} | ||
|
||
if err := g.Wait(); err != nil { | ||
return nil, err | ||
} | ||
close(resultsCh) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: I think we should move this up right after you created the chan and defer the close. Otherwise if we encounter an err in the goroutines w/in the errgroup we are going to leak with this unclosed channel. so just: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. aaah! yes that's way cleaner!!! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately we need to I do think that maybe we can ignore the error returned by |
||
|
||
for result := range resultsCh { | ||
results = append(results, result) | ||
} | ||
|
||
return results, nil | ||
} | ||
|
||
func (c *customRegexWebhook) createResults(ctx context.Context, match map[string][]string, verify bool, results chan<- detectors.Result) error { | ||
if common.IsDone(ctx) { | ||
// TODO: Log we're possibly leaving out results. | ||
return ctx.Err() | ||
} | ||
var raw string | ||
for _, values := range match { | ||
// values[0] contains the entire regex match. | ||
raw += values[0] | ||
} | ||
result := detectors.Result{ | ||
DetectorType: detectorspb.DetectorType_CustomRegex, | ||
Raw: []byte(raw), | ||
} | ||
|
||
if !verify { | ||
select { | ||
case <-ctx.Done(): | ||
return ctx.Err() | ||
case results <- result: | ||
return nil | ||
} | ||
} | ||
// Verify via webhook. | ||
jsonBody, err := json.Marshal(map[string]map[string][]string{ | ||
c.GetName(): match, | ||
}) | ||
if err != nil { | ||
// This should never happen, but if it does, return nil to not | ||
// disrupt other verification. | ||
return nil | ||
} | ||
// Try each config until we successfully verify. | ||
for _, verifyConfig := range c.GetVerify() { | ||
if common.IsDone(ctx) { | ||
// TODO: Log we're possibly leaving out results. | ||
return results, nil | ||
} | ||
var raw string | ||
for _, values := range match { | ||
// values[0] contains the entire regex match. | ||
raw += values[0] | ||
} | ||
result := detectors.Result{ | ||
DetectorType: detectorspb.DetectorType_CustomRegex, | ||
Raw: []byte(raw), | ||
} | ||
|
||
if !verify { | ||
results = append(results, result) | ||
continue | ||
return ctx.Err() | ||
} | ||
// Verify via webhook. | ||
jsonBody, err := json.Marshal(map[string]map[string][]string{ | ||
c.GetName(): match, | ||
}) | ||
req, err := http.NewRequestWithContext(ctx, "POST", verifyConfig.GetEndpoint(), bytes.NewReader(jsonBody)) | ||
if err != nil { | ||
continue | ||
} | ||
// Try each config until we successfully verify. | ||
for _, verifyConfig := range c.GetVerify() { | ||
if common.IsDone(ctx) { | ||
// TODO: Log we're possibly leaving out results. | ||
return results, nil | ||
} | ||
req, err := http.NewRequestWithContext(ctx, "POST", verifyConfig.GetEndpoint(), bytes.NewReader(jsonBody)) | ||
if err != nil { | ||
continue | ||
} | ||
for _, header := range verifyConfig.GetHeaders() { | ||
key, value, found := strings.Cut(header, ":") | ||
if !found { | ||
// Should be unreachable due to validation. | ||
continue | ||
} | ||
req.Header.Add(key, strings.TrimLeft(value, "\t\n\v\f\r ")) | ||
} | ||
res, err := httpClient.Do(req) | ||
if err != nil { | ||
for _, header := range verifyConfig.GetHeaders() { | ||
key, value, found := strings.Cut(header, ":") | ||
if !found { | ||
// Should be unreachable due to validation. | ||
continue | ||
} | ||
// TODO: Read response body. | ||
res.Body.Close() | ||
if res.StatusCode == http.StatusOK { | ||
result.Verified = true | ||
break | ||
} | ||
req.Header.Add(key, strings.TrimLeft(value, "\t\n\v\f\r ")) | ||
} | ||
res, err := httpClient.Do(req) | ||
if err != nil { | ||
continue | ||
} | ||
// TODO: Read response body. | ||
res.Body.Close() | ||
if res.StatusCode == http.StatusOK { | ||
result.Verified = true | ||
break | ||
} | ||
results = append(results, result) | ||
} | ||
|
||
return results, nil | ||
select { | ||
case <-ctx.Done(): | ||
return ctx.Err() | ||
case results <- result: | ||
return nil | ||
} | ||
} | ||
|
||
func (c *customRegexWebhook) Keywords() []string { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
question: Are we okay not setting an upper bound on the number of goroutines here? Is the idea the len of matches will be low enough for this not to cause issues?
If we did plan on bounding the number of goroutines we can add:
g.SetLimit(<some-limit>)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@mcastorina thoughts?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was thinking unlimited would be okay because it's at most 100 goroutines that will be doing "async I/O" work.