Skip to content

Commit

Permalink
Split files instead of using ReadAll (trufflesecurity#1387)
Browse files Browse the repository at this point in the history
* Split files instead of using ReadAll

* Remove dup chunk

* Actually break out of loop
  • Loading branch information
bill-rich committed Jun 12, 2023
1 parent ca19472 commit c2e3e7d
Showing 1 changed file with 28 additions and 16 deletions.
44 changes: 28 additions & 16 deletions pkg/sources/filesystem/filesystem.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package filesystem

import (
"bufio"
"fmt"
"io"
"io/fs"
Expand Down Expand Up @@ -181,23 +182,34 @@ func (s *Source) scanFile(ctx context.Context, path string, chunksChan chan *sou
return err
}
reReader.Stop()
data, err := io.ReadAll(reReader)
if err != nil {
return fmt.Errorf("unable to read file: %w", err)
}
chunksChan <- &sources.Chunk{
SourceType: s.Type(),
SourceName: s.name,
SourceID: s.SourceID(),
Data: data,
SourceMetadata: &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Filesystem{
Filesystem: &source_metadatapb.Filesystem{
File: sanitizer.UTF8(path),

for {
chunkBytes := make([]byte, BufferSize)
reader := bufio.NewReaderSize(reReader, BufferSize)
n, err := reader.Read(chunkBytes)
if err != nil && !errors.Is(err, io.EOF) {
break
}
peekData, _ := reader.Peek(PeekSize)
if n > 0 {
chunksChan <- &sources.Chunk{
SourceType: s.Type(),
SourceName: s.name,
SourceID: s.SourceID(),
Data: append(chunkBytes[:n], peekData...),
SourceMetadata: &source_metadatapb.MetaData{
Data: &source_metadatapb.MetaData_Filesystem{
Filesystem: &source_metadatapb.Filesystem{
File: sanitizer.UTF8(path),
},
},
},
},
},
Verify: s.verify,
Verify: s.verify,
}
}
if errors.Is(err, io.EOF) {
break
}
}
return nil
}

0 comments on commit c2e3e7d

Please sign in to comment.