Skip to content

Commit

Permalink
Fix non-deterministic behavior in last-use repopulation
Browse files Browse the repository at this point in the history
This fixes an issue where some last-use tests would fail sporadically
because the code that populates missing database entries was using the
current time as it was iterating over the files. If the clock happened
to roll over while it was iterating, then different files would get
different timestamps non-deterministically.

The fix is to snapshot the current time when it starts, and reuse that
time while repopulating. I didn't do this originally just because I was
reluctant to pass yet another argument around. However, it seems like
this is necessary. In #12634, we discussed other options such as having
some kind of process-global "now" snapshot (like in `Config`), but I'm
reluctant to do that because I am uneasy dealing with long-lived
programs, or handling before/after relationships (like different parts
of the code not considering that all timestamps might be equal). It
might be something that we could consider in the future, but I'm not
sure I want to try right now.
  • Loading branch information
ehuss committed Nov 12, 2023
1 parent 9a1b092 commit 7637016
Showing 1 changed file with 14 additions and 7 deletions.
21 changes: 14 additions & 7 deletions src/cargo/core/global_cache_tracker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,7 @@ impl GlobalCacheTracker {
// TODO: Investigate how slow this might be.
Self::sync_db_with_files(
&tx,
now,
config,
&base,
gc_opts.is_download_cache_size_set(),
Expand Down Expand Up @@ -695,6 +696,7 @@ impl GlobalCacheTracker {
/// caller can delete them.
fn sync_db_with_files(
conn: &Connection,
now: Timestamp,
config: &Config,
base: &BasePaths,
sync_size: bool,
Expand All @@ -703,8 +705,8 @@ impl GlobalCacheTracker {
let _p = crate::util::profile::start("global cache db sync");
debug!(target: "gc", "starting db sync");
// For registry_index and git_db, add anything that is missing in the db.
Self::update_parent_for_missing_from_db(conn, REGISTRY_INDEX_TABLE, &base.index)?;
Self::update_parent_for_missing_from_db(conn, GIT_DB_TABLE, &base.git_db)?;
Self::update_parent_for_missing_from_db(conn, now, REGISTRY_INDEX_TABLE, &base.index)?;
Self::update_parent_for_missing_from_db(conn, now, GIT_DB_TABLE, &base.git_db)?;

// For registry_crate, registry_src, and git_checkout, remove anything
// from the db that isn't on disk.
Expand Down Expand Up @@ -746,9 +748,10 @@ impl GlobalCacheTracker {

// For registry_crate, registry_src, and git_checkout, add anything
// that is missing in the db.
Self::populate_untracked_crate(conn, &base.crate_dir)?;
Self::populate_untracked_crate(conn, now, &base.crate_dir)?;
Self::populate_untracked(
conn,
now,
config,
REGISTRY_INDEX_TABLE,
"registry_id",
Expand All @@ -758,6 +761,7 @@ impl GlobalCacheTracker {
)?;
Self::populate_untracked(
conn,
now,
config,
GIT_DB_TABLE,
"git_id",
Expand Down Expand Up @@ -791,6 +795,7 @@ impl GlobalCacheTracker {
/// For parent tables, add any entries that are on disk but aren't tracked in the db.
fn update_parent_for_missing_from_db(
conn: &Connection,
now: Timestamp,
parent_table_name: &str,
base_path: &Path,
) -> CargoResult<()> {
Expand All @@ -805,7 +810,6 @@ impl GlobalCacheTracker {
VALUES (?1, ?2)
ON CONFLICT DO NOTHING",
))?;
let now = now();
for name in names {
stmt.execute(params![name, now])?;
}
Expand Down Expand Up @@ -882,15 +886,18 @@ impl GlobalCacheTracker {
/// Updates the database to add any `.crate` files that are currently
/// not tracked (such as when they are downloaded by an older version of
/// cargo).
fn populate_untracked_crate(conn: &Connection, base_path: &Path) -> CargoResult<()> {
fn populate_untracked_crate(
conn: &Connection,
now: Timestamp,
base_path: &Path,
) -> CargoResult<()> {
let _p = crate::util::profile::start("populate untracked crate");
trace!(target: "gc", "populating untracked crate files");
let mut insert_stmt = conn.prepare_cached(
"INSERT INTO registry_crate (registry_id, name, size, timestamp)
VALUES (?1, ?2, ?3, ?4)
ON CONFLICT DO NOTHING",
)?;
let now = now();
let index_names = Self::names_from(&base_path)?;
for index_name in index_names {
let Some(id) = Self::id_from_name(conn, REGISTRY_INDEX_TABLE, &index_name)? else {
Expand All @@ -915,6 +922,7 @@ impl GlobalCacheTracker {
/// (such as when they are downloaded by an older version of cargo).
fn populate_untracked(
conn: &Connection,
now: Timestamp,
config: &Config,
id_table_name: &str,
id_column_name: &str,
Expand All @@ -940,7 +948,6 @@ impl GlobalCacheTracker {
ON CONFLICT DO NOTHING",
))?;
let mut progress = Progress::with_style("Scanning", ProgressStyle::Ratio, config);
let now = now();
// Compute the size of any directory not in the database.
for id_name in id_names {
let Some(id) = Self::id_from_name(conn, id_table_name, &id_name)? else {
Expand Down

0 comments on commit 7637016

Please sign in to comment.