From bdae240c90c4d556968c024fc55d4a065a59634b Mon Sep 17 00:00:00 2001 From: Clement Rey Date: Mon, 15 Jan 2024 16:17:25 +0100 Subject: [PATCH] Primary caching 13: stats & memory panel integration for range queries (#4785) Title. https://github.com/rerun-io/rerun/assets/2910679/cf2c2748-a461-49fe-8124-c2a94164c956 --- Part of the primary caching series of PR (index search, joins, deserialization): - #4592 - #4593 - #4659 - #4680 - #4681 - #4698 - #4711 - #4712 - #4721 - #4726 - #4773 - #4784 - #4785 - #4793 - #4800 --- crates/re_data_store/src/store_format.rs | 6 +- crates/re_data_store/src/store_write.rs | 5 +- .../re_log_types/src/time_point/timeline.rs | 3 +- crates/re_query_cache/src/cache.rs | 9 +- crates/re_query_cache/src/cache_stats.rs | 101 ++++++++++---- crates/re_query_cache/src/lib.rs | 4 +- crates/re_viewer/src/app.rs | 3 +- crates/re_viewer/src/ui/memory_panel.rs | 123 ++++++++++++++---- 8 files changed, 194 insertions(+), 60 deletions(-) diff --git a/crates/re_data_store/src/store_format.rs b/crates/re_data_store/src/store_format.rs index 2208fce454ad..33962271bcb3 100644 --- a/crates/re_data_store/src/store_format.rs +++ b/crates/re_data_store/src/store_format.rs @@ -130,7 +130,11 @@ impl std::fmt::Display for IndexedBucket { let time_range = { let time_range = &self.inner.read().time_range; if time_range.min.as_i64() != i64::MAX && time_range.max.as_i64() != i64::MIN { - self.timeline.format_time_range_utc(time_range) + format!( + " - {}: {}", + self.timeline.name(), + self.timeline.format_time_range_utc(time_range) + ) } else { "time range: N/A\n".to_owned() } diff --git a/crates/re_data_store/src/store_write.rs b/crates/re_data_store/src/store_write.rs index a300760243c0..5046982daa7e 100644 --- a/crates/re_data_store/src/store_write.rs +++ b/crates/re_data_store/src/store_write.rs @@ -384,10 +384,7 @@ impl IndexedTable { if 0 < config.indexed_bucket_num_rows { let bucket_time_range = bucket.inner.read().time_range; - re_log::debug_once!( - "Failed to split bucket on timeline {}", - bucket.timeline.format_time_range_utc(&bucket_time_range) - ); + re_log::debug_once!("Failed to split bucket on timeline {}", timeline.name()); if 1 < config.indexed_bucket_num_rows && bucket_time_range.min == bucket_time_range.max diff --git a/crates/re_log_types/src/time_point/timeline.rs b/crates/re_log_types/src/time_point/timeline.rs index 3f880eef1aec..b6832ed97805 100644 --- a/crates/re_log_types/src/time_point/timeline.rs +++ b/crates/re_log_types/src/time_point/timeline.rs @@ -101,8 +101,7 @@ impl Timeline { time_zone_for_timestamps: TimeZone, ) -> String { format!( - " - {}: from {} to {} (all inclusive)", - self.name, + "{}..={}", self.typ.format(time_range.min, time_zone_for_timestamps), self.typ.format(time_range.max, time_zone_for_timestamps), ) diff --git a/crates/re_query_cache/src/cache.rs b/crates/re_query_cache/src/cache.rs index bebe00e5f1ac..14be0c39a40b 100644 --- a/crates/re_query_cache/src/cache.rs +++ b/crates/re_query_cache/src/cache.rs @@ -12,7 +12,7 @@ use seq_macro::seq; use re_data_store::{ LatestAtQuery, RangeQuery, StoreDiff, StoreEvent, StoreSubscriber, StoreSubscriberHandle, }; -use re_log_types::{EntityPath, RowId, StoreId, TimeInt, Timeline}; +use re_log_types::{EntityPath, RowId, StoreId, TimeInt, TimeRange, Timeline}; use re_query::ArchetypeView; use re_types_core::{ components::InstanceKey, Archetype, ArchetypeName, Component, ComponentName, SizeBytes as _, @@ -425,6 +425,13 @@ impl CacheBucket { self.data_times.iter() } + #[inline] + pub fn time_range(&self) -> Option { + let first_time = self.data_times.front().map(|(t, _)| *t)?; + let last_time = self.data_times.back().map(|(t, _)| *t)?; + Some(TimeRange::new(first_time, last_time)) + } + #[inline] pub fn contains_data_time(&self, data_time: TimeInt) -> bool { let first_time = self.data_times.front().map_or(&TimeInt::MAX, |(t, _)| t); diff --git a/crates/re_query_cache/src/cache_stats.rs b/crates/re_query_cache/src/cache_stats.rs index e912e57d8515..ecf774aa6fb9 100644 --- a/crates/re_query_cache/src/cache_stats.rs +++ b/crates/re_query_cache/src/cache_stats.rs @@ -1,24 +1,9 @@ -use std::{collections::BTreeMap, sync::atomic::AtomicBool}; +use std::collections::BTreeMap; -use re_log_types::EntityPath; +use re_log_types::{EntityPath, TimeRange, Timeline}; use re_types_core::ComponentName; -use crate::{Caches, LatestAtCache}; - -// --- - -/// If `true`, enables the much-more-costly-to-compute per-component stats. -static ENABLE_DETAILED_STATS: AtomicBool = AtomicBool::new(false); - -#[inline] -pub fn detailed_stats() -> bool { - ENABLE_DETAILED_STATS.load(std::sync::atomic::Ordering::Relaxed) -} - -#[inline] -pub fn set_detailed_stats(b: bool) { - ENABLE_DETAILED_STATS.store(b, std::sync::atomic::Ordering::Relaxed); -} +use crate::{Caches, LatestAtCache, RangeCache}; // --- @@ -28,6 +13,7 @@ pub fn set_detailed_stats(b: bool) { #[derive(Default, Debug, Clone)] pub struct CachesStats { pub latest_at: BTreeMap, + pub range: BTreeMap>, } impl CachesStats { @@ -35,12 +21,20 @@ impl CachesStats { pub fn total_size_bytes(&self) -> u64 { re_tracing::profile_function!(); - let Self { latest_at } = self; + let Self { latest_at, range } = self; let latest_at_size_bytes: u64 = latest_at.values().map(|stats| stats.total_size_bytes).sum(); - - latest_at_size_bytes + let range_size_bytes: u64 = range + .values() + .flat_map(|all_ranges| { + all_ranges + .iter() + .map(|(_, _, stats)| stats.total_size_bytes) + }) + .sum(); + + latest_at_size_bytes + range_size_bytes } } @@ -50,10 +44,19 @@ pub struct CachedEntityStats { pub total_rows: u64, pub total_size_bytes: u64, - /// Only if [`detailed_stats`] returns `true` (see [`set_detailed_stats`]). + /// Only if `detailed_stats` is `true` (see [`Caches::stats`]). pub per_component: Option>, } +impl CachedEntityStats { + #[inline] + pub fn is_empty(&self) -> bool { + // NOTE: That looks non-sensical, but it can happen if the cache is bugged, which we'd like + // to know. + self.total_rows == 0 && self.total_size_bytes == 0 + } +} + /// Stats for a cached component. #[derive(Default, Debug, Clone)] pub struct CachedComponentStats { @@ -65,7 +68,7 @@ impl Caches { /// Computes the stats for all primary caches. /// /// `per_component` toggles per-component stats. - pub fn stats() -> CachesStats { + pub fn stats(detailed_stats: bool) -> CachesStats { re_tracing::profile_function!(); Self::with(|caches| { @@ -77,7 +80,7 @@ impl Caches { (key.entity_path.clone(), { let mut total_size_bytes = 0u64; let mut total_rows = 0u64; - let mut per_component = detailed_stats().then(BTreeMap::default); + let mut per_component = detailed_stats.then(BTreeMap::default); for latest_at_cache in caches_per_arch.latest_at_per_archetype.read().values() @@ -93,6 +96,8 @@ impl Caches { total_rows = per_data_time.len() as u64 + timeless.is_some() as u64; if let Some(per_component) = per_component.as_mut() { + re_tracing::profile_scope!("detailed"); + for bucket in per_data_time.values() { for (component_name, data) in &bucket.read().components { let stats: &mut CachedComponentStats = @@ -123,7 +128,53 @@ impl Caches { }) .collect(); - CachesStats { latest_at } + let range = caches + .0 + .read() + .iter() + .map(|(key, caches_per_arch)| { + (key.entity_path.clone(), { + caches_per_arch + .range_per_archetype + .read() + .values() + .map(|range_cache| { + let RangeCache { + bucket, + total_size_bytes, + } = &*range_cache.read(); + + let total_rows = bucket.data_times.len() as u64; + + let mut per_component = detailed_stats.then(BTreeMap::default); + if let Some(per_component) = per_component.as_mut() { + re_tracing::profile_scope!("detailed"); + + for (component_name, data) in &bucket.components { + let stats: &mut CachedComponentStats = + per_component.entry(*component_name).or_default(); + stats.total_rows += data.dyn_num_entries() as u64; + stats.total_instances += data.dyn_num_values() as u64; + } + } + + ( + key.timeline, + bucket.time_range().unwrap_or(TimeRange::EMPTY), + CachedEntityStats { + total_size_bytes: *total_size_bytes, + total_rows, + + per_component, + }, + ) + }) + .collect() + }) + }) + .collect(); + + CachesStats { latest_at, range } }) } } diff --git a/crates/re_query_cache/src/lib.rs b/crates/re_query_cache/src/lib.rs index c5f95d489438..fc7f84439f86 100644 --- a/crates/re_query_cache/src/lib.rs +++ b/crates/re_query_cache/src/lib.rs @@ -8,9 +8,7 @@ mod query; mod range; pub use self::cache::{AnyQuery, Caches}; -pub use self::cache_stats::{ - detailed_stats, set_detailed_stats, CachedComponentStats, CachedEntityStats, CachesStats, -}; +pub use self::cache_stats::{CachedComponentStats, CachedEntityStats, CachesStats}; pub use self::flat_vec_deque::{ErasedFlatVecDeque, FlatVecDeque}; pub use self::query::{ query_archetype_pov1, query_archetype_with_history_pov1, MaybeCachedComponentData, diff --git a/crates/re_viewer/src/app.rs b/crates/re_viewer/src/app.rs index ec8122ae2875..40aa0d1cd32a 100644 --- a/crates/re_viewer/src/app.rs +++ b/crates/re_viewer/src/app.rs @@ -1128,7 +1128,8 @@ impl eframe::App for App { }; let store_stats = store_hub.stats(); - let caches_stats = re_query_cache::Caches::stats(); + let caches_stats = + re_query_cache::Caches::stats(self.memory_panel.primary_cache_detailed_stats_enabled()); // do early, before doing too many allocations self.memory_panel diff --git a/crates/re_viewer/src/ui/memory_panel.rs b/crates/re_viewer/src/ui/memory_panel.rs index f3c587c52094..156e875826e4 100644 --- a/crates/re_viewer/src/ui/memory_panel.rs +++ b/crates/re_viewer/src/ui/memory_panel.rs @@ -1,3 +1,5 @@ +use std::sync::atomic::AtomicBool; + use re_data_store::{DataStoreConfig, DataStoreRowStats, DataStoreStats}; use re_format::{format_bytes, format_number}; use re_memory::{util::sec_since_start, MemoryHistory, MemoryLimit, MemoryUse}; @@ -12,6 +14,13 @@ use crate::{env_vars::RERUN_TRACK_ALLOCATIONS, store_hub::StoreHubStats}; pub struct MemoryPanel { history: MemoryHistory, memory_purge_times: Vec, + + /// If `true`, enables the much-more-costly-to-compute per-component stats for the primary + /// cache. + prim_cache_detailed_stats: AtomicBool, + + /// If `true`, will show stats about empty primary caches too, which likely indicates a bug (dangling bucket). + prim_cache_show_empty: AtomicBool, } impl MemoryPanel { @@ -35,10 +44,17 @@ impl MemoryPanel { } /// Note that we purged memory at this time, to show in stats. + #[inline] pub fn note_memory_purge(&mut self) { self.memory_purge_times.push(sec_since_start()); } + #[inline] + pub fn primary_cache_detailed_stats_enabled(&self) -> bool { + self.prim_cache_detailed_stats + .load(std::sync::atomic::Ordering::Relaxed) + } + #[allow(clippy::too_many_arguments)] pub fn ui( &self, @@ -59,7 +75,7 @@ impl MemoryPanel { .min_width(250.0) .default_width(300.0) .show_inside(ui, |ui| { - Self::left_side( + self.left_side( ui, re_ui, limit, @@ -76,6 +92,7 @@ impl MemoryPanel { } fn left_side( + &self, ui: &mut egui::Ui, re_ui: &re_ui::ReUi, limit: &MemoryLimit, @@ -106,7 +123,7 @@ impl MemoryPanel { ui.separator(); ui.collapsing("Primary Cache Resources", |ui| { - Self::caches_stats(ui, re_ui, caches_stats); + self.caches_stats(ui, re_ui, caches_stats); }); ui.separator(); @@ -309,35 +326,95 @@ impl MemoryPanel { }); } - fn caches_stats(ui: &mut egui::Ui, re_ui: &re_ui::ReUi, caches_stats: &CachesStats) { - let mut detailed_stats = re_query_cache::detailed_stats(); + fn caches_stats(&self, ui: &mut egui::Ui, re_ui: &re_ui::ReUi, caches_stats: &CachesStats) { + use std::sync::atomic::Ordering::Relaxed; + + let mut detailed_stats = self.prim_cache_detailed_stats.load(Relaxed); re_ui .checkbox(ui, &mut detailed_stats, "Detailed stats") .on_hover_text("Show detailed statistics when hovering entity paths below.\nThis will slow down the program."); - re_query_cache::set_detailed_stats(detailed_stats); + self.prim_cache_detailed_stats + .store(detailed_stats, Relaxed); - let CachesStats { latest_at } = caches_stats; + let mut show_empty = self.prim_cache_show_empty.load(Relaxed); + re_ui + .checkbox(ui, &mut show_empty, "Show empty caches") + .on_hover_text( + "Show empty caches too.\nDangling buckets are generally the result of a bug.", + ); + self.prim_cache_show_empty.store(show_empty, Relaxed); - // NOTE: This is a debug tool: do _not_ hide empty things. Empty things are a bug. + let CachesStats { latest_at, range } = caches_stats; - ui.separator(); + if show_empty || !latest_at.is_empty() { + ui.separator(); + ui.strong("LatestAt"); + egui::ScrollArea::vertical() + .max_height(200.0) + .id_source("latest_at") + .show(ui, |ui| { + egui::Grid::new("latest_at cache stats grid") + .num_columns(3) + .show(ui, |ui| { + ui.label(egui::RichText::new("Entity").underline()); + ui.label(egui::RichText::new("Rows").underline()) + .on_hover_text( + "How many distinct data timestamps have been cached?", + ); + ui.label(egui::RichText::new("Size").underline()); + ui.end_row(); - ui.strong("LatestAt"); - egui::Grid::new("latest_at cache stats grid") - .num_columns(3) - .show(ui, |ui| { - ui.label(egui::RichText::new("Entity").underline()); - ui.label(egui::RichText::new("Rows").underline()) - .on_hover_text("How many distinct data timestamps have been cached?"); - ui.label(egui::RichText::new("Size").underline()); - ui.end_row(); + for (entity_path, stats) in latest_at { + if !show_empty && stats.is_empty() { + continue; + } - for (entity_path, stats) in latest_at { - let res = ui.label(entity_path.to_string()); - entity_stats_ui(ui, res, stats); - ui.end_row(); - } - }); + let res = ui.label(entity_path.to_string()); + entity_stats_ui(ui, res, stats); + ui.end_row(); + } + }); + }); + } + + if show_empty || !latest_at.is_empty() { + ui.separator(); + ui.strong("Range"); + egui::ScrollArea::vertical() + .max_height(200.0) + .id_source("range") + .show(ui, |ui| { + egui::Grid::new("range cache stats grid") + .num_columns(4) + .show(ui, |ui| { + ui.label(egui::RichText::new("Entity").underline()); + ui.label(egui::RichText::new("Time range").underline()); + ui.label(egui::RichText::new("Rows").underline()) + .on_hover_text( + "How many distinct data timestamps have been cached?", + ); + ui.label(egui::RichText::new("Size").underline()); + ui.end_row(); + + for (entity_path, stats_per_range) in range { + for (timeline, time_range, stats) in stats_per_range { + if !show_empty && stats.is_empty() { + continue; + } + + let res = ui.label(entity_path.to_string()); + ui.label(format!( + "{}({})", + timeline.name(), + timeline.format_time_range_utc(time_range) + )); + entity_stats_ui(ui, res, stats); + ui.end_row(); + } + } + }); + }); + } fn entity_stats_ui( ui: &mut egui::Ui,