Introduce InMemoryRepo container

Summary: ## This stack Continues the implementation of the primitives that will validate submodule expansion in large repo's. bonsais. The goal of validation is to make sure that **the submodule metadata file is always kept in sync with the expansion**. How this will be done is more complicated than it seems, but on a high level we will 1 Get the fsnode in the submodule repo from the commit in the metadata file 2. Derive the fsnode for the new commit 3. Get the entry for the expansion and compare with the fsnode from step #1. 4. We'll perform many validations of consistency in this process (e.g. if expansion is changed, metadata file has to be changed as well). ## Challenge of validation To efficiently verify that the working copies match, we want to get fsnodes of both the commit being expanded in the submodule (easy) and the new commit being synced to the large repo. The second one is a problem, because in order to get it, we would need access to the large repo's blobstore and to derive the fsnode using the large repo's derived data. The problem with is that the primitives that rewrite commits (thus expand the submodules) are, on purpose, only given the target repo's id, instead of the actual repo (and its blobstore). This was done a while back (D43002436) to make sure we don't accidentally write the target repo. We want to keep the maintain that safety net, so the problem this diff tries to solve is: **provide READ-ONLY access to some parts of the large repo to the commit sync primitives** (e.g. CommitInMemorySyncer). ## This diff Introduces a new Repo facet container, called `InMemoryRepo`, which can be passed to the validation primitives and will ensure that any writes performed during validation only happen in memory, i.e. are not persisted to the large repo's blobstore or tables. This InMemoryRepo needs to have `RepoDerivedData` and `Changesets`. But to create a `RepoDerivedData`, I need to provide a bunch of other things, most of which I won't need. So instead of implementing a "wrapper" for all of these things (e..g `BonsaiGitMapping`, `BonsaiHgMapping`), I defined a dummy data type that will implement these to satisfy the type checker. This is good because it will be very clear what the validation primitives will be using and if in the future this changes, the tests will crash because they'll try to read from or write to storage that they're not supposed to. In which case the person making the change will implement a wrapper to ensure the operations only write to memory. Reviewed By: markbt Differential Revision: D55701183 fbshipit-source-id: 706b8d4cbef8cde6a9562a52009db90901d4a685
facebook · Apr 17, 2024 · 54b6cee · 54b6cee
1 parent 835a118
commit 54b6cee
Show file tree

Hide file tree

Showing 10 changed files with 554 additions and 3 deletions.
diff --git a/eden/mononoke/blobstore/readonlyblob/src/lib.rs b/eden/mononoke/blobstore/readonlyblob/src/lib.rs
@@ -20,7 +20,7 @@ mod errors;
 pub use crate::errors::ErrorKind;
 
 /// A layer over an existing blobstore that prevents writes.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct ReadOnlyBlobstore<T> {
     blobstore: T,
 }

diff --git a/eden/mononoke/commit_rewriting/cross_repo_sync/Cargo.toml b/eden/mononoke/commit_rewriting/cross_repo_sync/Cargo.toml
@@ -14,6 +14,7 @@ path = "test/main.rs"
 [dependencies]
 anyhow = "1.0.75"
 async-recursion = "1.0.2"
+async-trait = "0.1.71"
 blobstore = { version = "0.1.0", path = "../../blobstore" }
 bonsai_git_mapping = { version = "0.1.0", path = "../../bonsai_git_mapping" }
 bonsai_globalrev_mapping = { version = "0.1.0", path = "../../bonsai_globalrev_mapping" }
@@ -25,19 +26,24 @@ bounded_traversal = { version = "0.1.0", path = "../../common/bounded_traversal"
 cacheblob = { version = "0.1.0", path = "../../blobstore/cacheblob" }
 changeset_info = { version = "0.1.0", path = "../../derived_data/changeset_info" }
 changesets = { version = "0.1.0", path = "../../changesets" }
+changesets_impl = { version = "0.1.0", path = "../../changesets/changesets_impl" }
 cloned = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
 commit_graph = { version = "0.1.0", path = "../../repo_attributes/commit_graph/commit_graph" }
+commit_graph_types = { version = "0.1.0", path = "../../repo_attributes/commit_graph/commit_graph_types" }
 commit_transformation = { version = "0.1.0", path = "../../megarepo_api/commit_transformation" }
 context = { version = "0.1.0", path = "../../server/context" }
+derivative = "2.2"
 derived_data = { version = "0.1.0", path = "../../derived_data" }
 either = "1.5"
 environment = { version = "0.1.0", path = "../../cmdlib/environment" }
 facet = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
 fbinit = { version = "0.1.2", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
+filenodes = { version = "0.1.0", path = "../../filenodes" }
 filestore = { version = "0.1.0", path = "../../filestore" }
 fsnodes = { version = "0.1.0", path = "../../derived_data/fsnodes" }
 futures = { version = "0.3.30", features = ["async-await", "compat"] }
 git_types = { version = "0.1.0", path = "../../git/git_types" }
+itertools = "0.11.0"
 justknobs = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
 live_commit_sync_config = { version = "0.1.0", path = "../live_commit_sync_config" }
 manifest = { version = "0.1.0", path = "../../manifest" }
@@ -52,6 +58,7 @@ pushrebase = { version = "0.1.0", path = "../../pushrebase" }
 pushrebase_hooks = { version = "0.1.0", path = "../../pushrebase/pushrebase_hooks" }
 pushrebase_mutation_mapping = { version = "0.1.0", path = "../../pushrebase_mutation_mapping" }
 rand = { version = "0.8", features = ["small_rng"] }
+readonlyblob = { version = "0.1.0", path = "../../blobstore/readonlyblob" }
 ref-cast = "1.0.18"
 repo_blobstore = { version = "0.1.0", path = "../../blobrepo/repo_blobstore" }
 repo_bookmark_attrs = { version = "0.1.0", path = "../../repo_attributes/repo_bookmark_attrs" }
@@ -62,24 +69,26 @@ scopeguard = "1.0.0"
 scuba_ext = { version = "0.1.0", path = "../../common/scuba_ext" }
 slog = { version = "2.7", features = ["max_level_trace", "nested-values"] }
 sorted_vector_map = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
+sql = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
+sql_construct = { version = "0.1.0", path = "../../common/sql_construct" }
 static_assertions = "1.1.0"
 synced_commit_mapping = { version = "0.1.0", path = "../synced_commit_mapping" }
 synced_commit_mapping_pushrebase_hook = { version = "0.1.0", path = "../synced_commit_mapping/pushrebase_hook" }
 thiserror = "1.0.49"
 tokio = { version = "1.36.0", features = ["full", "test-util", "tracing"] }
 topo_sort = { version = "0.1.0", path = "../../common/topo_sort" }
+vec1 = { version = "1", features = ["serde"] }
 
 [dev-dependencies]
 ascii = "1.0"
 blobrepo = { version = "0.1.0", path = "../../blobrepo" }
 bulk_derivation = { version = "0.1.0", path = "../../derived_data/bulk_derivation" }
+changesets_creation = { version = "0.1.0", path = "../../changesets/changesets_creation" }
 cross_repo_sync_test_utils = { version = "0.1.0", path = "test_utils" }
 fbinit-tokio = { version = "0.1.2", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
 fixtures = { version = "0.1.0", path = "../../tests/fixtures" }
 mercurial_derivation = { version = "0.1.0", path = "../../derived_data/mercurial_derivation" }
 mononoke_types-mocks = { version = "0.1.0", path = "../../mononoke_types/mocks" }
-sql = { version = "0.1.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
-sql_construct = { version = "0.1.0", path = "../../common/sql_construct" }
 sql_ext = { version = "0.1.0", path = "../../common/rust/sql_ext" }
 strum = { version = "0.24", features = ["derive"] }
 test_repo_factory = { version = "0.1.0", path = "../../repo_factory/test_repo_factory" }

diff --git a/eden/mononoke/commit_rewriting/cross_repo_sync/TARGETS b/eden/mononoke/commit_rewriting/cross_repo_sync/TARGETS
@@ -25,8 +25,11 @@ rust_library(
     deps = [
         "fbsource//third-party/rust:anyhow",
         "fbsource//third-party/rust:async-recursion",
+        "fbsource//third-party/rust:async-trait",
+        "fbsource//third-party/rust:derivative",
         "fbsource//third-party/rust:either",
         "fbsource//third-party/rust:futures",
+        "fbsource//third-party/rust:itertools",
         "fbsource//third-party/rust:maplit",
         "fbsource//third-party/rust:rand",
         "fbsource//third-party/rust:ref-cast",
@@ -35,20 +38,24 @@ rust_library(
         "fbsource//third-party/rust:static_assertions",
         "fbsource//third-party/rust:thiserror",
         "fbsource//third-party/rust:tokio",
+        "fbsource//third-party/rust:vec1",
         "//common/rust/shed/borrowed:borrowed",
         "//common/rust/shed/cloned:cloned",
         "//common/rust/shed/facet:facet",
         "//common/rust/shed/fbinit:fbinit",
         "//common/rust/shed/justknobs_stub:justknobs",
         "//common/rust/shed/sorted_vector_map:sorted_vector_map",
+        "//common/rust/shed/sql:sql",
         "//eden/mononoke/blobrepo:repo_blobstore",
         "//eden/mononoke/blobstore:blobstore",
         "//eden/mononoke/blobstore:cacheblob",
+        "//eden/mononoke/blobstore:readonlyblob",
         "//eden/mononoke/bonsai_git_mapping:bonsai_git_mapping",
         "//eden/mononoke/bonsai_globalrev_mapping:bonsai_globalrev_mapping",
         "//eden/mononoke/bonsai_hg_mapping:bonsai_hg_mapping",
         "//eden/mononoke/bookmarks:bookmarks",
         "//eden/mononoke/changesets:changesets",
+        "//eden/mononoke/changesets:changesets_impl",
         "//eden/mononoke/cmdlib:environment",
         "//eden/mononoke/commit_rewriting/bookmark_renaming:bookmark_renaming",
         "//eden/mononoke/commit_rewriting/live_commit_sync_config:live_commit_sync_config",
@@ -57,10 +64,12 @@ rust_library(
         "//eden/mononoke/commit_rewriting/synced_commit_mapping:synced_commit_mapping_pushrebase_hook",
         "//eden/mononoke/common/bounded_traversal:bounded_traversal",
         "//eden/mononoke/common/scuba_ext:scuba_ext",
+        "//eden/mononoke/common/sql_construct:sql_construct",
         "//eden/mononoke/common/topo_sort:topo_sort",
         "//eden/mononoke/derived_data:changeset_info",
         "//eden/mononoke/derived_data:derived_data",
         "//eden/mononoke/derived_data:fsnodes",
+        "//eden/mononoke/filenodes:filenodes",
         "//eden/mononoke/filestore:filestore",
         "//eden/mononoke/git/git_types:git_types",
         "//eden/mononoke/manifest:manifest",
@@ -74,6 +83,7 @@ rust_library(
         "//eden/mononoke/pushrebase:pushrebase_hooks",
         "//eden/mononoke/pushrebase_mutation_mapping:pushrebase_mutation_mapping",
         "//eden/mononoke/repo_attributes/commit_graph/commit_graph:commit_graph",
+        "//eden/mononoke/repo_attributes/commit_graph/commit_graph_types:commit_graph_types",
         "//eden/mononoke/repo_attributes/repo_bookmark_attrs:repo_bookmark_attrs",
         "//eden/mononoke/repo_attributes/repo_cross_repo:repo_cross_repo",
         "//eden/mononoke/repo_attributes/repo_derived_data:repo_derived_data",
@@ -146,6 +156,8 @@ rust_unittest(
         "//eden/mononoke/blobstore:cacheblob",
         "//eden/mononoke/bonsai_hg_mapping:bonsai_hg_mapping",
         "//eden/mononoke/bookmarks:bookmarks",
+        "//eden/mononoke/changesets:changesets",
+        "//eden/mononoke/changesets/changesets_creation:changesets_creation",
         "//eden/mononoke/commit_rewriting/live_commit_sync_config:live_commit_sync_config",
         "//eden/mononoke/commit_rewriting/synced_commit_mapping:synced_commit_mapping",
         "//eden/mononoke/common/sql_construct:sql_construct",

diff --git a/eden/mononoke/commit_rewriting/cross_repo_sync/src/git_submodules.rs b/eden/mononoke/commit_rewriting/cross_repo_sync/src/git_submodules.rs
@@ -5,9 +5,12 @@
  * GNU General Public License version 2.
  */
 
+mod dummy_struct;
 mod expand;
+mod in_memory_repo;
 mod utils;
 mod validation;
 
 pub use expand::expand_and_validate_all_git_submodule_file_changes;
 pub use expand::SubmoduleExpansionData;
+pub use in_memory_repo::InMemoryRepo;
diff --git a/eden/mononoke/commit_rewriting/cross_repo_sync/src/git_submodules/dummy_struct.rs b/eden/mononoke/commit_rewriting/cross_repo_sync/src/git_submodules/dummy_struct.rs
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This software may be used and distributed according to the terms of the
+ * GNU General Public License version 2.
+ */
+
+use std::collections::HashMap;
+
+use ::sql::Transaction;
+use anyhow::Error;
+use anyhow::Result;
+use async_trait::async_trait;
+use bonsai_git_mapping::AddGitMappingErrorKind;
+use bonsai_git_mapping::BonsaiGitMapping;
+use bonsai_git_mapping::BonsaiGitMappingEntry;
+use bonsai_git_mapping::BonsaisOrGitShas;
+use commit_graph_types::edges::ChangesetEdges;
+use commit_graph_types::storage::CommitGraphStorage;
+use commit_graph_types::storage::FetchedChangesetEdges;
+use commit_graph_types::storage::Prefetch;
+use context::CoreContext;
+use filenodes::FilenodeInfo;
+use filenodes::FilenodeRange;
+use filenodes::FilenodeResult;
+use filenodes::Filenodes;
+use filenodes::PreparedFilenode;
+use mercurial_types::HgFileNodeId;
+use mononoke_types::hash::GitSha1;
+use mononoke_types::ChangesetId;
+use mononoke_types::ChangesetIdPrefix;
+use mononoke_types::ChangesetIdsResolvedFromPrefix;
+use mononoke_types::RepoPath;
+use mononoke_types::RepositoryId;
+use vec1::Vec1;
+
+/// Struct created to satisfy the type system when creating a `RepoDerivedData`
+/// for the `InMemoryRepo`.
+pub(crate) struct DummyStruct;
+
+#[async_trait]
+impl Filenodes for DummyStruct {
+    async fn add_filenodes(
+        &self,
+        _ctx: &CoreContext,
+        _info: Vec<PreparedFilenode>,
+    ) -> Result<FilenodeResult<()>> {
+        unimplemented!()
+    }
+
+    async fn add_or_replace_filenodes(
+        &self,
+        _ctx: &CoreContext,
+        _info: Vec<PreparedFilenode>,
+    ) -> Result<FilenodeResult<()>> {
+        unimplemented!()
+    }
+
+    async fn get_filenode(
+        &self,
+        _ctx: &CoreContext,
+        _path: &RepoPath,
+        _filenode: HgFileNodeId,
+    ) -> Result<FilenodeResult<Option<FilenodeInfo>>> {
+        unimplemented!()
+    }
+
+    async fn get_all_filenodes_maybe_stale(
+        &self,
+        _ctx: &CoreContext,
+        _path: &RepoPath,
+        _limit: Option<u64>,
+    ) -> Result<FilenodeResult<FilenodeRange>> {
+        unimplemented!()
+    }
+
+    fn prime_cache(&self, _ctx: &CoreContext, _filenodes: &[PreparedFilenode]) {
+        unimplemented!()
+    }
+}
+
+#[async_trait]
+impl BonsaiGitMapping for DummyStruct {
+    fn repo_id(&self) -> RepositoryId {
+        unimplemented!()
+    }
+
+    async fn add(
+        &self,
+        _ctx: &CoreContext,
+        _entry: BonsaiGitMappingEntry,
+    ) -> Result<(), AddGitMappingErrorKind> {
+        unimplemented!()
+    }
+
+    async fn bulk_add(
+        &self,
+        _ctx: &CoreContext,
+        _entries: &[BonsaiGitMappingEntry],
+    ) -> Result<(), AddGitMappingErrorKind> {
+        unimplemented!()
+    }
+
+    async fn bulk_add_git_mapping_in_transaction(
+        &self,
+        _ctx: &CoreContext,
+        _entries: &[BonsaiGitMappingEntry],
+        _transaction: Transaction,
+    ) -> Result<Transaction, AddGitMappingErrorKind> {
+        unimplemented!()
+    }
+
+    async fn get(
+        &self,
+        _ctx: &CoreContext,
+        _cs: BonsaisOrGitShas,
+    ) -> Result<Vec<BonsaiGitMappingEntry>, Error> {
+        unimplemented!()
+    }
+
+    /// Use caching for the ranges of one element, use slower path otherwise.
+    async fn get_in_range(
+        &self,
+        _ctx: &CoreContext,
+        _low: GitSha1,
+        _high: GitSha1,
+        _limit: usize,
+    ) -> Result<Vec<GitSha1>, Error> {
+        unimplemented!()
+    }
+}
+
+#[async_trait]
+impl CommitGraphStorage for DummyStruct {
+    fn repo_id(&self) -> RepositoryId {
+        unimplemented!()
+    }
+
+    async fn add(&self, _ctx: &CoreContext, _edges: ChangesetEdges) -> Result<bool> {
+        unimplemented!()
+    }
+
+    async fn add_many(
+        &self,
+        _ctx: &CoreContext,
+        _many_edges: Vec1<ChangesetEdges>,
+    ) -> Result<usize> {
+        unimplemented!()
+    }
+
+    async fn fetch_edges(&self, _ctx: &CoreContext, _cs_id: ChangesetId) -> Result<ChangesetEdges> {
+        unimplemented!()
+    }
+
+    async fn maybe_fetch_edges(
+        &self,
+        _ctx: &CoreContext,
+        _cs_id: ChangesetId,
+    ) -> Result<Option<ChangesetEdges>> {
+        unimplemented!()
+    }
+
+    async fn fetch_many_edges(
+        &self,
+        _ctx: &CoreContext,
+        _cs_ids: &[ChangesetId],
+        _prefetch: Prefetch,
+    ) -> Result<HashMap<ChangesetId, FetchedChangesetEdges>> {
+        unimplemented!()
+    }
+
+    async fn maybe_fetch_many_edges(
+        &self,
+        _ctx: &CoreContext,
+        _cs_ids: &[ChangesetId],
+        _prefetch: Prefetch,
+    ) -> Result<HashMap<ChangesetId, FetchedChangesetEdges>> {
+        unimplemented!()
+    }
+
+    async fn find_by_prefix(
+        &self,
+        _ctx: &CoreContext,
+        _cs_prefix: ChangesetIdPrefix,
+        _limit: usize,
+    ) -> Result<ChangesetIdsResolvedFromPrefix> {
+        unimplemented!()
+    }
+
+    async fn fetch_children(
+        &self,
+        _ctx: &CoreContext,
+        _cs_id: ChangesetId,
+    ) -> Result<Vec<ChangesetId>> {
+        unimplemented!()
+    }
+}