From 6663caf8e90946d5d72ae9783ba28ddaac3e657c Mon Sep 17 00:00:00 2001 From: Miccah Castorina Date: Fri, 13 Sep 2024 18:53:25 -0700 Subject: [PATCH 1/8] Implement SourceUnitEnumChunker for GitHub This change refactors the internal scan method to introduce a scanRepo method to perform the actual scan. --- pkg/sources/github/github.go | 109 ++++++++++++++++-------------- pkg/sources/github/github_test.go | 4 +- 2 files changed, 62 insertions(+), 51 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index c98031627f54..e4dc8b4aff54 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -118,6 +118,7 @@ func (s *Source) setScanOptions(base, head string) { // Ensure the Source satisfies the interfaces at compile time var _ sources.Source = (*Source)(nil) var _ sources.SourceUnitUnmarshaller = (*Source)(nil) +var _ sources.SourceUnitEnumChunker = (*Source)(nil) var endsWithGithub = regexp.MustCompile(`github\.com/?$`) @@ -338,7 +339,7 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, tar return nil }, } - err := s.enumerate(ctx, noopReporter) + err := s.Enumerate(ctx, noopReporter) if err != nil { return fmt.Errorf("error enumerating: %w", err) } @@ -346,11 +347,11 @@ func (s *Source) Chunks(ctx context.Context, chunksChan chan *sources.Chunk, tar return s.scan(ctx, chunksReporter) } -// enumerate enumerates the GitHub source based on authentication method and +// Enumerate enumerates the GitHub source based on authentication method and // user configuration. It populates s.filteredRepoCache, s.repoInfoCache, // s.memberCache, s.totalRepoSize, s.orgsCache, and s.repos. Additionally, // repositories and gists are reported to the provided UnitReporter. -func (s *Source) enumerate(ctx context.Context, reporter sources.UnitReporter) error { +func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) error { seenUnits := make(map[sources.SourceUnit]struct{}) // Wrapper reporter to deduplicate and filter found units. dedupeReporter := sources.VisitorReporter{ @@ -603,7 +604,6 @@ func (s *Source) scan(ctx context.Context, reporter sources.ChunkReporter) error reposToScan, progressIndexOffset := sources.FilterReposToResume(s.repos, s.GetProgress().EncodedResumeInfo) s.repos = reposToScan - scanErrs := sources.NewScanErrors() // Setup scan options if it wasn't provided. if s.scanOptions == nil { s.scanOptions = &git.ScanOptions{} @@ -615,6 +615,7 @@ func (s *Source) scan(ctx context.Context, reporter sources.ChunkReporter) error if common.IsDone(ctx) { return nil } + ctx := context.WithValue(ctx, "repo", repoURL) // TODO: set progress complete is being called concurrently with i s.setProgressCompleteWithRepo(i, progressIndexOffset, repoURL) @@ -625,64 +626,68 @@ func (s *Source) scan(ctx context.Context, reporter sources.ChunkReporter) error s.resumeInfoSlice = sources.RemoveRepoFromResumeInfo(s.resumeInfoSlice, repoURL) }(s, repoURL) - if !strings.HasSuffix(repoURL, ".git") { - scanErrs.Add(fmt.Errorf("repo %s does not end in .git", repoURL)) + if err := s.scanRepo(ctx, repoURL, reporter); err != nil { + ctx.Logger().Error(err, "error scanning repo") return nil } - // Scan the repository - repoInfo, ok := s.repoInfoCache.get(repoURL) - if !ok { - // This should never happen. - err := fmt.Errorf("no repoInfo for URL: %s", repoURL) - ctx.Logger().Error(err, "failed to scan repository") - return nil - } - repoCtx := context.WithValues(ctx, "repo", repoURL) - duration, err := s.cloneAndScanRepo(repoCtx, repoURL, repoInfo, reporter) - if err != nil { - scanErrs.Add(err) - return nil - } + atomic.AddUint64(&scannedCount, 1) + return nil + }) + } - // Scan the wiki, if enabled, and the repo has one. - if s.conn.IncludeWikis && repoInfo.hasWiki && s.wikiIsReachable(ctx, repoURL) { - wikiURL := strings.TrimSuffix(repoURL, ".git") + ".wiki.git" - wikiCtx := context.WithValue(ctx, "repo", wikiURL) - - _, err := s.cloneAndScanRepo(wikiCtx, wikiURL, repoInfo, reporter) - if err != nil { - // Ignore "Repository not found" errors. - // It's common for GitHub's API to say a repo has a wiki when it doesn't. - if !strings.Contains(err.Error(), "not found") { - scanErrs.Add(fmt.Errorf("error scanning wiki: %w", err)) - } + _ = s.jobPool.Wait() + s.SetProgressComplete(len(s.repos), len(s.repos), "Completed GitHub scan", "") - // Don't return, it still might be possible to scan comments. - } - } + return nil +} + +func (s *Source) scanRepo(ctx context.Context, repoURL string, reporter sources.ChunkReporter) error { + if !strings.HasSuffix(repoURL, ".git") { + return fmt.Errorf("repo does not end in .git") + } + // Scan the repository + repoInfo, ok := s.repoInfoCache.get(repoURL) + if !ok { + // This should never happen. + return fmt.Errorf("no repoInfo for URL: %s", repoURL) + } + duration, err := s.cloneAndScanRepo(ctx, repoURL, repoInfo, reporter) + if err != nil { + return err + } + + // Scan the wiki, if enabled, and the repo has one. + if s.conn.IncludeWikis && repoInfo.hasWiki && s.wikiIsReachable(ctx, repoURL) { + wikiURL := strings.TrimSuffix(repoURL, ".git") + ".wiki.git" + wikiCtx := context.WithValue(ctx, "repo", wikiURL) - // Scan comments, if enabled. - if s.includeGistComments || s.includeIssueComments || s.includePRComments { - if err = s.scanComments(repoCtx, repoURL, repoInfo, reporter); err != nil { - scanErrs.Add(fmt.Errorf("error scanning comments in repo %s: %w", repoURL, err)) - return nil + _, err := s.cloneAndScanRepo(wikiCtx, wikiURL, repoInfo, reporter) + if err != nil { + // Ignore "Repository not found" errors. + // It's common for GitHub's API to say a repo has a wiki when it doesn't. + if !strings.Contains(err.Error(), "not found") { + if err := reporter.ChunkErr(ctx, fmt.Errorf("error scanning wiki: %w", err)); err != nil { + return err } } - repoCtx.Logger().V(2).Info(fmt.Sprintf("scanned %d/%d repos", scannedCount, len(s.repos)), "duration_seconds", duration) - githubReposScanned.WithLabelValues(s.name).Inc() - atomic.AddUint64(&scannedCount, 1) - return nil - }) + // Don't return, it still might be possible to scan comments. + } } - _ = s.jobPool.Wait() - if scanErrs.Count() > 0 { - ctx.Logger().Info("failed to scan some repositories", "error_count", scanErrs.Count(), "errors", scanErrs.String()) + // Scan comments, if enabled. + if s.includeGistComments || s.includeIssueComments || s.includePRComments { + if err := s.scanComments(ctx, repoURL, repoInfo, reporter); err != nil { + err := fmt.Errorf("error scanning comments: %w", err) + if err := reporter.ChunkErr(ctx, err); err != nil { + return err + } + } } - s.SetProgressComplete(len(s.repos), len(s.repos), "Completed GitHub scan", "") + ctx.Logger().V(2).Info("finished scanning repo", "duration_seconds", duration) + githubReposScanned.WithLabelValues(s.name).Inc() return nil } @@ -1478,3 +1483,9 @@ func (s *Source) scanTarget(ctx context.Context, target sources.ChunkingTarget, Verify: s.verify} return handlers.HandleFile(ctx, readCloser, &chunkSkel, reporter) } + +func (s *Source) ChunkUnit(ctx context.Context, unit sources.SourceUnit, reporter sources.ChunkReporter) error { + repoURL, _ := unit.SourceUnitID() + ctx = context.WithValue(ctx, "repo", repoURL) + return s.scanRepo(ctx, repoURL, reporter) +} diff --git a/pkg/sources/github/github_test.go b/pkg/sources/github/github_test.go index d659ca0146ad..bf722fa2174a 100644 --- a/pkg/sources/github/github_test.go +++ b/pkg/sources/github/github_test.go @@ -577,7 +577,7 @@ func TestEnumerate(t *testing.T) { } // Act - err := s.enumerate(context.Background(), reporter) + err := s.Enumerate(context.Background(), reporter) slices.Sort(reportedRepos) // Assert @@ -653,7 +653,7 @@ func BenchmarkEnumerate(b *testing.B) { setupMocks(b) b.StartTimer() - _ = s.enumerate(context.Background(), noopReporter()) + _ = s.Enumerate(context.Background(), noopReporter()) } } From 80ffe9c9109c7e277d77ff9d0c1c83e0876c73ed Mon Sep 17 00:00:00 2001 From: Miccah Castorina Date: Fri, 13 Sep 2024 18:56:16 -0700 Subject: [PATCH 2/8] Export unit fields so the values are captured in the report --- pkg/sources/github/github.go | 20 ++++++++++---------- pkg/sources/github/repo.go | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index e4dc8b4aff54..2299e58da61d 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -84,20 +84,20 @@ var _ sources.SourceUnit = (*RepoUnit)(nil) var _ sources.SourceUnit = (*GistUnit)(nil) type RepoUnit struct { - name string - url string + Name string `json:"name"` + URL string `json:"url"` } -func (r RepoUnit) SourceUnitID() (string, sources.SourceUnitKind) { return r.url, "repo" } -func (r RepoUnit) Display() string { return r.name } +func (r RepoUnit) SourceUnitID() (string, sources.SourceUnitKind) { return r.URL, "repo" } +func (r RepoUnit) Display() string { return r.Name } type GistUnit struct { - name string - url string + Name string `json:"name"` + URL string `json:"url"` } -func (g GistUnit) SourceUnitID() (string, sources.SourceUnitKind) { return g.url, "gist" } -func (g GistUnit) Display() string { return g.name } +func (g GistUnit) SourceUnitID() (string, sources.SourceUnitKind) { return g.URL, "gist" } +func (g GistUnit) Display() string { return g.Name } // -------------------------------------------------------------------------------- @@ -373,7 +373,7 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e // Report any values that were already configured. for _, name := range s.filteredRepoCache.Keys() { url, _ := s.filteredRepoCache.Get(name) - _ = dedupeReporter.UnitOk(ctx, RepoUnit{name: name, url: url}) + _ = dedupeReporter.UnitOk(ctx, RepoUnit{Name: name, URL: url}) } // I'm not wild about switching on the connector type here (as opposed to dispatching to the connector itself) but @@ -820,7 +820,7 @@ func (s *Source) addUserGistsToCache(ctx context.Context, user string, reporter for _, gist := range gists { s.filteredRepoCache.Set(gist.GetID(), gist.GetGitPullURL()) s.cacheGistInfo(gist) - if err := reporter.UnitOk(ctx, GistUnit{name: gist.GetID(), url: gist.GetGitPullURL()}); err != nil { + if err := reporter.UnitOk(ctx, GistUnit{Name: gist.GetID(), URL: gist.GetGitPullURL()}); err != nil { return err } } diff --git a/pkg/sources/github/repo.go b/pkg/sources/github/repo.go index 168f39c77579..1f01291ea31f 100644 --- a/pkg/sources/github/repo.go +++ b/pkg/sources/github/repo.go @@ -217,7 +217,7 @@ func (s *Source) processRepos(ctx context.Context, target string, reporter sourc s.totalRepoSize += r.GetSize() s.filteredRepoCache.Set(repoName, repoURL) s.cacheRepoInfo(r) - if err := reporter.UnitOk(ctx, RepoUnit{name: repoName, url: repoURL}); err != nil { + if err := reporter.UnitOk(ctx, RepoUnit{Name: repoName, URL: repoURL}); err != nil { return err } logger.V(3).Info("repo attributes", "name", repoName, "kb_size", r.GetSize(), "repo_url", repoURL) From bea1df1e5e62cd12f08bc36e574643c51decdc4a Mon Sep 17 00:00:00 2001 From: Miccah Castorina Date: Fri, 13 Sep 2024 19:06:53 -0700 Subject: [PATCH 3/8] Add comment for scanRepo --- pkg/sources/github/github.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 2299e58da61d..3d88812da044 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -642,6 +642,10 @@ func (s *Source) scan(ctx context.Context, reporter sources.ChunkReporter) error return nil } +// scanRepo attempts to scan the provided URL and any associated wiki and +// comments if configured. An error is returned if we could not find necessary +// repository metadata or clone the repo, otherwise all errors are reported to +// the ChunkReporter. func (s *Source) scanRepo(ctx context.Context, repoURL string, reporter sources.ChunkReporter) error { if !strings.HasSuffix(repoURL, ".git") { return fmt.Errorf("repo does not end in .git") From 2516f7215cb5b679cd957d36eec2497fdafd32a7 Mon Sep 17 00:00:00 2001 From: Miccah Castorina Date: Mon, 16 Sep 2024 12:18:04 -0700 Subject: [PATCH 4/8] Break out ensureRepoInfoCache into a method --- pkg/sources/github/github.go | 107 ++++++++++++++++++++--------------- 1 file changed, 60 insertions(+), 47 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 3d88812da044..71271495e9ee 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -373,6 +373,10 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e // Report any values that were already configured. for _, name := range s.filteredRepoCache.Keys() { url, _ := s.filteredRepoCache.Get(name) + url, err := s.ensureRepoInfoCache(ctx, url) + if err != nil { + _ = dedupeReporter.UnitErr(ctx, err) + } _ = dedupeReporter.UnitOk(ctx, RepoUnit{Name: name, URL: url}) } @@ -396,55 +400,12 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e } s.repos = make([]string, 0, s.filteredRepoCache.Count()) -RepoLoop: for _, repo := range s.filteredRepoCache.Values() { - repoCtx := context.WithValue(ctx, "repo", repo) - - // Ensure that |s.repoInfoCache| contains an entry for |repo|. - // This compensates for differences in enumeration logic between `--org` and `--repo`. - // See: https://github.com/trufflesecurity/trufflehog/pull/2379#discussion_r1487454788 - if _, ok := s.repoInfoCache.get(repo); !ok { - repoCtx.Logger().V(2).Info("Caching repository info") - - _, urlParts, err := getRepoURLParts(repo) - if err != nil { - repoCtx.Logger().Error(err, "Failed to parse repository URL") - continue - } + ctx := context.WithValue(ctx, "repo", repo) - if isGistUrl(urlParts) { - // Cache gist info. - for { - gistID := extractGistID(urlParts) - gist, _, err := s.connector.APIClient().Gists.Get(repoCtx, gistID) - // Normalize the URL to the Gist's pull URL. - // See https://github.com/trufflesecurity/trufflehog/pull/2625#issuecomment-2025507937 - repo = gist.GetGitPullURL() - if s.handleRateLimit(repoCtx, err) { - continue - } - if err != nil { - repoCtx.Logger().Error(err, "Failed to fetch gist") - continue RepoLoop - } - s.cacheGistInfo(gist) - break - } - } else { - // Cache repository info. - for { - ghRepo, _, err := s.connector.APIClient().Repositories.Get(repoCtx, urlParts[1], urlParts[2]) - if s.handleRateLimit(repoCtx, err) { - continue - } - if err != nil { - repoCtx.Logger().Error(err, "Failed to fetch repository") - continue RepoLoop - } - s.cacheRepoInfo(ghRepo) - break - } - } + repo, err := s.ensureRepoInfoCache(ctx, repo) + if err != nil { + ctx.Logger().Error(err, "error caching repo info") } s.repos = append(s.repos, repo) } @@ -455,6 +416,58 @@ RepoLoop: return nil } +// ensureRepoInfoCache checks that s.repoInfoCache has an entry for the +// provided repository URL. If not, it fetches and stores the metadata for the +// repository. In some cases, the gist URL needs to be normalized, which is +// returned by this function. +func (s *Source) ensureRepoInfoCache(ctx context.Context, repo string) (string, error) { + if _, ok := s.repoInfoCache.get(repo); ok { + return repo, nil + } + // Ensure that |s.repoInfoCache| contains an entry for |repo|. + // This compensates for differences in enumeration logic between `--org` and `--repo`. + // See: https://github.com/trufflesecurity/trufflehog/pull/2379#discussion_r1487454788 + ctx.Logger().V(2).Info("Caching repository info") + + _, urlParts, err := getRepoURLParts(repo) + if err != nil { + return repo, fmt.Errorf("failed to parse repository URL: %w", err) + } + + if isGistUrl(urlParts) { + // Cache gist info. + for { + gistID := extractGistID(urlParts) + gist, _, err := s.connector.APIClient().Gists.Get(ctx, gistID) + // Normalize the URL to the Gist's pull URL. + // See https://github.com/trufflesecurity/trufflehog/pull/2625#issuecomment-2025507937 + repo = gist.GetGitPullURL() + if s.handleRateLimit(ctx, err) { + continue + } + if err != nil { + return repo, fmt.Errorf("failed to fetch gist") + } + s.cacheGistInfo(gist) + break + } + } else { + // Cache repository info. + for { + ghRepo, _, err := s.connector.APIClient().Repositories.Get(ctx, urlParts[1], urlParts[2]) + if s.handleRateLimit(ctx, err) { + continue + } + if err != nil { + return repo, fmt.Errorf("failed to fetch repository") + } + s.cacheRepoInfo(ghRepo) + break + } + } + return repo, nil +} + func (s *Source) enumerateBasicAuth(ctx context.Context, reporter sources.UnitReporter) error { for _, org := range s.orgsCache.Keys() { orgCtx := context.WithValue(ctx, "account", org) From bee72fa82fe242a7a1219b8bf8321bad2a9d96d9 Mon Sep 17 00:00:00 2001 From: Miccah Castorina Date: Fri, 20 Sep 2024 09:43:41 -0700 Subject: [PATCH 5/8] Update comments and check errors --- pkg/sources/github/github.go | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 71271495e9ee..6f4c1db02dd4 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -371,13 +371,19 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e VisitErr: reporter.UnitErr, } // Report any values that were already configured. + // This compensates for differences in enumeration logic between `--org` and `--repo`. + // See: https://github.com/trufflesecurity/trufflehog/pull/2379#discussion_r1487454788 for _, name := range s.filteredRepoCache.Keys() { url, _ := s.filteredRepoCache.Get(name) url, err := s.ensureRepoInfoCache(ctx, url) if err != nil { - _ = dedupeReporter.UnitErr(ctx, err) + if err := dedupeReporter.UnitErr(ctx, err); err != nil { + return err + } + } + if err := dedupeReporter.UnitOk(ctx, RepoUnit{Name: name, URL: url}); err != nil { + return err } - _ = dedupeReporter.UnitOk(ctx, RepoUnit{Name: name, URL: url}) } // I'm not wild about switching on the connector type here (as opposed to dispatching to the connector itself) but @@ -400,6 +406,8 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e } s.repos = make([]string, 0, s.filteredRepoCache.Count()) + // Double make sure that all enumerated repositories in the + // filteredRepoCache have an entry in the repoInfoCache. for _, repo := range s.filteredRepoCache.Values() { ctx := context.WithValue(ctx, "repo", repo) @@ -424,9 +432,6 @@ func (s *Source) ensureRepoInfoCache(ctx context.Context, repo string) (string, if _, ok := s.repoInfoCache.get(repo); ok { return repo, nil } - // Ensure that |s.repoInfoCache| contains an entry for |repo|. - // This compensates for differences in enumeration logic between `--org` and `--repo`. - // See: https://github.com/trufflesecurity/trufflehog/pull/2379#discussion_r1487454788 ctx.Logger().V(2).Info("Caching repository info") _, urlParts, err := getRepoURLParts(repo) From c539c7bd947143ea8670d6b392809f15c4b3e5f7 Mon Sep 17 00:00:00 2001 From: Miccah Castorina Date: Fri, 20 Sep 2024 09:43:56 -0700 Subject: [PATCH 6/8] Ensure that the repoInfoCache contains the repo during ChunkUnit --- pkg/sources/github/github.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 6f4c1db02dd4..518ee7eacbf3 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -1509,5 +1509,11 @@ func (s *Source) scanTarget(ctx context.Context, target sources.ChunkingTarget, func (s *Source) ChunkUnit(ctx context.Context, unit sources.SourceUnit, reporter sources.ChunkReporter) error { repoURL, _ := unit.SourceUnitID() ctx = context.WithValue(ctx, "repo", repoURL) + // ChunkUnit is not guaranteed to be called from Enumerate, so we must + // check and fetch the repoInfoCache for this repo. + repoURL, err := s.ensureRepoInfoCache(ctx, repoURL) + if err != nil { + return err + } return s.scanRepo(ctx, repoURL, reporter) } From 63c7e42f723a1dfe845d53611128ac07a86b9ec5 Mon Sep 17 00:00:00 2001 From: Miccah Castorina Date: Fri, 20 Sep 2024 10:34:52 -0700 Subject: [PATCH 7/8] Add integration test for ChunkUnit --- pkg/sources/github/github_integration_test.go | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/pkg/sources/github/github_integration_test.go b/pkg/sources/github/github_integration_test.go index 35069aee512e..f028af7971f3 100644 --- a/pkg/sources/github/github_integration_test.go +++ b/pkg/sources/github/github_integration_test.go @@ -843,3 +843,38 @@ func TestSource_Chunks_TargetedScan(t *testing.T) { }) } } + +func TestChunkUnit(t *testing.T) { + ctx := context.Background() + conn, _ := anypb.New(&sourcespb.GitHub{ + Repositories: []string{"https://github.com/trufflesecurity/driftwood.git"}, + Credential: &sourcespb.GitHub_Unauthenticated{}, + }) + s := Source{} + if err := s.Init(ctx, "github integration test source", 0, 0, false, conn, 1); err != nil { + t.Errorf("Init() failed: %v", err) + } + + unit := RepoUnit{Name: "driftwood", URL: "https://github.com/trufflesecurity/driftwood.git"} + reporter := &countChunkReporter{} + if err := s.ChunkUnit(ctx, unit, reporter); err != nil { + t.Errorf("ChunkUnit() failed: %v", err) + } + assert.GreaterOrEqual(t, reporter.chunkCount, 65) + assert.Equal(t, 0, reporter.errCount) +} + +type countChunkReporter struct { + chunkCount int + errCount int +} + +func (m *countChunkReporter) ChunkOk(ctx context.Context, chunk sources.Chunk) error { + m.chunkCount++ + return nil +} + +func (m *countChunkReporter) ChunkErr(ctx context.Context, err error) error { + m.errCount++ + return nil +} From 80b6219f769fa219b9d568d278a388db61555cea Mon Sep 17 00:00:00 2001 From: Miccah Castorina Date: Fri, 20 Sep 2024 10:36:04 -0700 Subject: [PATCH 8/8] Move s.scanOptions initialization to Init() --- pkg/sources/github/github.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 518ee7eacbf3..44335d6e3fc8 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -213,6 +213,11 @@ func (s *Source) Init(aCtx context.Context, name string, jobID sources.JobID, so s.jobPool = &errgroup.Group{} s.jobPool.SetLimit(concurrency) + // Setup scan options if it wasn't provided. + if s.scanOptions == nil { + s.scanOptions = &git.ScanOptions{} + } + var conn sourcespb.GitHub err = anypb.UnmarshalTo(connection, &conn, proto.UnmarshalOptions{}) if err != nil { @@ -622,11 +627,6 @@ func (s *Source) scan(ctx context.Context, reporter sources.ChunkReporter) error reposToScan, progressIndexOffset := sources.FilterReposToResume(s.repos, s.GetProgress().EncodedResumeInfo) s.repos = reposToScan - // Setup scan options if it wasn't provided. - if s.scanOptions == nil { - s.scanOptions = &git.ScanOptions{} - } - for i, repoURL := range s.repos { i, repoURL := i, repoURL s.jobPool.Go(func() error {