Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make archive handler configurable #1077

Merged
merged 2 commits into from
Feb 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/decoders"
"github.com/trufflesecurity/trufflehog/v3/pkg/engine"
"github.com/trufflesecurity/trufflehog/v3/pkg/handlers"
"github.com/trufflesecurity/trufflehog/v3/pkg/log"
"github.com/trufflesecurity/trufflehog/v3/pkg/output"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
Expand All @@ -46,6 +47,9 @@ var (
printAvgDetectorTime = cli.Flag("print-avg-detector-time", "Print the average time spent on each detector.").Bool()
noUpdate = cli.Flag("no-update", "Don't check for updates.").Bool()
fail = cli.Flag("fail", "Exit with code 183 if results are found.").Bool()
archiveMaxSize = cli.Flag("archive-max-size", "Maximum size of archive to scan.").Bytes()
archiveMaxDepth = cli.Flag("archive-max-depth", "Maximum depth of archive to scan.").Int()
archiveTimeout = cli.Flag("archive-timeout", "Maximum time to spend extracting an archive.").Duration()

gitScan = cli.Command("git", "Find credentials in git repositories.")
gitScanURI = gitScan.Arg("uri", "Git repository URL. https://, file://, or ssh:// schema expected.").Required().String()
Expand Down Expand Up @@ -192,6 +196,16 @@ func run(state overseer.State) {
}
}

if *archiveMaxSize != 0 {
handlers.SetArchiveMaxSize(int(*archiveMaxSize))
}
if *archiveMaxDepth != 0 {
handlers.SetArchiveMaxDepth(*archiveMaxDepth)
}
if *archiveTimeout != 0 {
handlers.SetArchiveMaxTimeout(*archiveTimeout)
}

ctx := context.TODO()
e := engine.Start(ctx,
engine.WithConcurrency(*concurrency),
Expand Down
51 changes: 36 additions & 15 deletions pkg/handlers/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ import (
"errors"
"fmt"
"io"
"time"

"github.com/mholt/archiver/v4"
log "github.com/sirupsen/logrus"
"github.com/trufflesecurity/trufflehog/v3/pkg/common"
)

type ctxKey int
Expand All @@ -18,26 +20,42 @@ const (
)

var (
maxDepth = 5
maxDepth = 5
maxSize = 250 * 1024 * 1024 // 20MB
maxTimeout = time.Duration(30) * time.Second
)

// Archive is a handler for extracting and decompressing archives.
type Archive struct {
maxSize int
size int
size int
}

// New sets a default maximum size and current size counter.
func (d *Archive) New() {
d.maxSize = 20 * 1024 * 1024 // 20MB
d.size = 0
}

// SetArchiveMaxSize sets the maximum size of the archive.
func SetArchiveMaxSize(size int) {
maxSize = size
}

// SetArchiveMaxDepth sets the maximum depth of the archive.
func SetArchiveMaxDepth(depth int) {
maxDepth = depth
}

// SetArchiveMaxTimeout sets the maximum timeout for the archive handler.
func SetArchiveMaxTimeout(timeout time.Duration) {
maxTimeout = timeout
}

// FromFile extracts the files from an archive.
func (d *Archive) FromFile(data io.Reader) chan ([]byte) {
ctx := context.Background()
func (d *Archive) FromFile(originalCtx context.Context, data io.Reader) chan ([]byte) {
archiveChan := make(chan ([]byte), 512)
go func() {
ctx, cancel := context.WithTimeout(originalCtx, maxTimeout)
defer cancel()
defer close(archiveChan)
err := d.openArchive(ctx, 0, data, archiveChan)
if err != nil {
Expand Down Expand Up @@ -83,7 +101,7 @@ func (d *Archive) openArchive(ctx context.Context, depth int, reader io.Reader,
if err != nil {
return err
}
fileBytes, err := d.ReadToMax(compReader)
fileBytes, err := d.ReadToMax(ctx, compReader)
if err != nil {
return err
}
Expand All @@ -94,7 +112,7 @@ func (d *Archive) openArchive(ctx context.Context, depth int, reader io.Reader,
}

// IsFiletype returns true if the provided reader is an archive.
func (d *Archive) IsFiletype(reader io.Reader) (io.Reader, bool) {
func (d *Archive) IsFiletype(ctx context.Context, reader io.Reader) (io.Reader, bool) {
format, readerB, err := archiver.Identify("", reader)
if err != nil {
return readerB, false
Expand All @@ -121,7 +139,7 @@ func (d *Archive) extractorHandler(archiveChan chan ([]byte)) func(context.Conte
if err != nil {
return err
}
fileBytes, err := d.ReadToMax(fReader)
fileBytes, err := d.ReadToMax(ctx, fReader)
if err != nil {
return err
}
Expand All @@ -136,7 +154,7 @@ func (d *Archive) extractorHandler(archiveChan chan ([]byte)) func(context.Conte
}

// ReadToMax reads up to the max size.
func (d *Archive) ReadToMax(reader io.Reader) (data []byte, err error) {
func (d *Archive) ReadToMax(ctx context.Context, reader io.Reader) (data []byte, err error) {
// Archiver v4 is in alpha and using an experimental version of
// rardecode. There is a bug somewhere with rar decoder format 29
// that can lead to a panic. An issue is open in rardecode repo
Expand All @@ -153,8 +171,11 @@ func (d *Archive) ReadToMax(reader io.Reader) (data []byte, err error) {
}
}()
fileContent := bytes.Buffer{}
log.Tracef("Remaining buffer capacity: %d", d.maxSize-d.size)
for i := 0; i <= d.maxSize/512; i++ {
log.Tracef("Remaining buffer capacity: %d", maxSize-d.size)
for i := 0; i <= maxSize/512; i++ {
if common.IsDone(ctx) {
return nil, ctx.Err()
}
fileChunk := make([]byte, 512)
bRead, err := reader.Read(fileChunk)
if err != nil && !errors.Is(err, io.EOF) {
Expand All @@ -165,11 +186,11 @@ func (d *Archive) ReadToMax(reader io.Reader) (data []byte, err error) {
fileContent.Write(fileChunk[0:bRead])
}
if bRead < 512 {
break
return fileContent.Bytes(), nil
}
if d.size >= d.maxSize && bRead == 512 {
if d.size >= maxSize && bRead == 512 {
log.Debug("Max archive size reached.")
break
return fileContent.Bytes(), nil
}
}
return fileContent.Bytes(), nil
Expand Down
2 changes: 1 addition & 1 deletion pkg/handlers/archive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ func TestArchiveHandler(t *testing.T) {
if err != nil {
t.Errorf("error creating reusable reader: %s", err)
}
archiveChan := archive.FromFile(newReader)
archiveChan := archive.FromFile(context.TODO(), newReader)

count := 0
re := regexp.MustCompile(testCase.matchString)
Expand Down
8 changes: 4 additions & 4 deletions pkg/handlers/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ func DefaultHandlers() []Handler {
}

type Handler interface {
FromFile(io.Reader) chan ([]byte)
IsFiletype(io.Reader) (io.Reader, bool)
FromFile(context.Context, io.Reader) chan ([]byte)
IsFiletype(context.Context, io.Reader) (io.Reader, bool)
New()
}

Expand All @@ -25,7 +25,7 @@ func HandleFile(ctx context.Context, file io.Reader, chunkSkel *sources.Chunk, c
for _, h := range DefaultHandlers() {
h.New()
var isType bool
if file, isType = h.IsFiletype(file); isType {
if file, isType = h.IsFiletype(ctx, file); isType {
handler = h
break
}
Expand All @@ -35,7 +35,7 @@ func HandleFile(ctx context.Context, file io.Reader, chunkSkel *sources.Chunk, c
}

// Process the file and read all []byte chunks from handlerChan.
handlerChan := handler.FromFile(file)
handlerChan := handler.FromFile(ctx, file)
for {
select {
case data, open := <-handlerChan:
Expand Down