diff --git a/pkg/detectors/aws/aws.go b/pkg/detectors/aws/aws.go index caaff1d54663..1174e1c4c80b 100644 --- a/pkg/detectors/aws/aws.go +++ b/pkg/detectors/aws/aws.go @@ -28,6 +28,10 @@ var ( // Key types are from this list https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_identifiers.html#identifiers-unique-ids idPat = regexp.MustCompile(`\b((?:AKIA|ABIA|ACCA|ASIA)[0-9A-Z]{16})\b`) secretPat = regexp.MustCompile(`\b([A-Za-z0-9+/]{40})\b`) + // Hashes, like those for git, do technically match the secret pattern. + // But they are extremely unlikely to be generated as an actual AWS secret. + // So when we find them, if they're not verified, we should ignore the result. + falsePositiveSecretCheck = regexp.MustCompile(`[a-f0-9]{40}`) ) // Keywords are used for efficiently pre-filtering chunks. @@ -148,6 +152,11 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result } } + // If the result is unverified and matches something like a git hash, don't include it in the results. + if !s1.Verified && falsePositiveSecretCheck.MatchString(resSecretMatch) { + continue + } + results = append(results, s1) // If we've found a verified match with this ID, we don't need to look for any more. So move on to the next ID. if s1.Verified { @@ -155,5 +164,32 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result } } } - return detectors.CleanResults(results), nil + return awsCustomCleanResults(results), nil +} + +func awsCustomCleanResults(results []detectors.Result) []detectors.Result { + if len(results) == 0 { + return results + } + + // For every ID, we want at most one result, preferrably verified. + idResults := map[string]detectors.Result{} + for _, result := range results { + // Always accept the verified result as the result for the given ID. + if result.Verified { + idResults[result.Redacted] = result + continue + } + + // Only include an unverified result if we don't already have a result for a given ID. + if _, exist := idResults[result.Redacted]; !exist { + idResults[result.Redacted] = result + } + } + + out := []detectors.Result{} + for _, r := range idResults { + out = append(out, r) + } + return out } diff --git a/pkg/detectors/aws/aws_test.go b/pkg/detectors/aws/aws_test.go index a663b8701b1b..7119408bda30 100644 --- a/pkg/detectors/aws/aws_test.go +++ b/pkg/detectors/aws/aws_test.go @@ -2,15 +2,16 @@ package aws import ( "context" + "crypto/sha256" "fmt" "testing" "time" - "github.com/kylelemons/godebug/pretty" - "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" - "github.com/trufflesecurity/trufflehog/v3/pkg/common" + "github.com/trufflesecurity/trufflehog/v3/pkg/detectors" "github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb" + + "github.com/kylelemons/godebug/pretty" ) func TestAWS_FromChunk(t *testing.T) { @@ -23,6 +24,10 @@ func TestAWS_FromChunk(t *testing.T) { secret := testSecrets.MustGetField("AWS") id := testSecrets.MustGetField("AWS_ID") inactiveSecret := testSecrets.MustGetField("AWS_INACTIVE") + inactiveID := id[:len(id)-3] + "XYZ" + hasher := sha256.New() + hasher.Write([]byte(inactiveSecret)) + hash := string(hasher.Sum(nil)) type args struct { ctx context.Context @@ -81,6 +86,61 @@ func TestAWS_FromChunk(t *testing.T) { want: nil, wantErr: false, }, + { + name: "found two, one included for every ID found", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("The verified ID is %s with a secret of %s, but the unverified ID is %s and this is the secret %s", id, secret, inactiveID, inactiveSecret)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_AWS, + Verified: true, + Redacted: id, + }, + { + DetectorType: detectorspb.DetectorType_AWS, + Verified: false, + Redacted: inactiveID, + }, + }, + wantErr: false, + }, + { + name: "not found, because unverified secret was a hash", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("You can find a aws secret %s within aws %s but not valid", hash, id)), // The secret would satisfy the regex but be filtered out after not passing validation. + verify: true, + }, + want: nil, + wantErr: false, + }, + { + name: "found two, returned both because the active secret for one paired with the inactive ID, despite the hash", + s: Scanner{}, + args: args{ + ctx: context.Background(), + data: []byte(fmt.Sprintf("The verified ID is %s with a secret of %s, but the unverified ID is %s and the secret is this hash %s", id, secret, inactiveID, hash)), + verify: true, + }, + want: []detectors.Result{ + { + DetectorType: detectorspb.DetectorType_AWS, + Verified: true, + Redacted: id, + }, + { + DetectorType: detectorspb.DetectorType_AWS, + Verified: false, + Redacted: inactiveID, + }, + }, + wantErr: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) {