From 73f87a610850be2093a52ab32c4fbc58d7d276dd Mon Sep 17 00:00:00 2001 From: Nicholas Yang Date: Tue, 19 Sep 2023 16:11:29 -0400 Subject: [PATCH] fix(turborepo): Run outline fixes (#5966) ### Description Found a lot of bugs while testing #5851, so split it off into a separate PR. ### Testing Instructions Closes TURBO-1334 --------- Co-authored-by: Alexander Lyon Co-authored-by: nicholaslyang --- crates/turborepo-env/src/lib.rs | 1 + crates/turborepo-lib/src/commands/run.rs | 2 +- crates/turborepo-lib/src/config/turbo.rs | 39 ++++++++-------- crates/turborepo-lib/src/engine/builder.rs | 22 +++++++-- crates/turborepo-lib/src/graph/mod.rs | 41 +++++++++++++++++ crates/turborepo-lib/src/hash/mod.rs | 38 ++++++++++------ .../src/package_graph/builder.rs | 44 +++++++++++++----- crates/turborepo-lib/src/package_graph/mod.rs | 42 ++++------------- .../turborepo-lib/src/package_manager/mod.rs | 15 ++++--- crates/turborepo-lib/src/run/global_hash.rs | 41 ++++++++++++----- crates/turborepo-lib/src/run/mod.rs | 13 +++--- crates/turborepo-lib/src/run/task_id.rs | 10 ++++- crates/turborepo-lib/src/task_graph/mod.rs | 29 +++++------- .../turborepo-lib/src/task_graph/visitor.rs | 28 +++++++++--- crates/turborepo-lib/src/task_hash.rs | 45 +++++++++++-------- .../turborepo-paths/src/relative_unix_path.rs | 4 ++ 16 files changed, 265 insertions(+), 149 deletions(-) diff --git a/crates/turborepo-env/src/lib.rs b/crates/turborepo-env/src/lib.rs index bafa53e0d2844..1208839659c85 100644 --- a/crates/turborepo-env/src/lib.rs +++ b/crates/turborepo-env/src/lib.rs @@ -14,6 +14,7 @@ use thiserror::Error; const DEFAULT_ENV_VARS: [&str; 1] = ["VERCEL_ANALYTICS_ID"]; /// Environment mode after we've resolved the `Infer` variant +#[derive(Debug)] pub enum ResolvedEnvMode { Loose, Strict, diff --git a/crates/turborepo-lib/src/commands/run.rs b/crates/turborepo-lib/src/commands/run.rs index eb75c63116097..812f6f5759d5a 100644 --- a/crates/turborepo-lib/src/commands/run.rs +++ b/crates/turborepo-lib/src/commands/run.rs @@ -4,7 +4,7 @@ use tracing::{debug, error}; use crate::{commands::CommandBase, run::Run}; pub async fn run(base: CommandBase) -> Result<()> { - let mut run = Run::new(base); + let mut run = Run::new(&base); debug!("using the experimental rust codepath"); debug!("configured run struct: {:?}", run); diff --git a/crates/turborepo-lib/src/config/turbo.rs b/crates/turborepo-lib/src/config/turbo.rs index 4ab21d7088057..2b18738a72376 100644 --- a/crates/turborepo-lib/src/config/turbo.rs +++ b/crates/turborepo-lib/src/config/turbo.rs @@ -13,7 +13,7 @@ use crate::{ config::Error, package_json::PackageJson, run::task_id::{TaskId, TaskName, ROOT_PKG_NAME}, - task_graph::{BookkeepingTaskDefinition, Pipeline, TaskDefinitionHashable, TaskOutputs}, + task_graph::{BookkeepingTaskDefinition, Pipeline, TaskDefinitionStable, TaskOutputs}, }; #[derive(Serialize, Deserialize, Debug, Default, PartialEq, Clone)] @@ -31,7 +31,7 @@ pub struct TurboJson { pub(crate) global_deps: Vec, pub(crate) global_dot_env: Vec, pub(crate) global_env: Vec, - pub(crate) global_pass_through_env: Vec, + pub(crate) global_pass_through_env: Option>, pub(crate) pipeline: Pipeline, pub(crate) remote_cache_options: Option, pub(crate) space_id: Option, @@ -234,8 +234,7 @@ impl TryFrom for BookkeepingTaskDefinition { pass_through_env.sort(); Ok(pass_through_env) }) - .transpose()? - .unwrap_or_default(); + .transpose()?; let dot_env = raw_task .dot_env @@ -265,7 +264,7 @@ impl TryFrom for BookkeepingTaskDefinition { defined_fields, experimental_fields: Default::default(), experimental: Default::default(), - task_definition: TaskDefinitionHashable { + task_definition: TaskDefinitionStable { outputs, cache, topological_dependencies, @@ -351,8 +350,7 @@ impl TryFrom for TurboJson { global_pass_through_env.sort(); Ok(global_pass_through_env) }) - .transpose()? - .unwrap_or_default(), + .transpose()?, global_deps: { let mut global_deps: Vec<_> = global_file_dependencies.into_iter().collect(); global_deps.sort(); @@ -455,9 +453,9 @@ impl TurboJson { set.insert("Cache".to_string()); set }, - task_definition: TaskDefinitionHashable { + task_definition: TaskDefinitionStable { cache: false, - ..TaskDefinitionHashable::default() + ..TaskDefinitionStable::default() }, ..BookkeepingTaskDefinition::default() }, @@ -567,8 +565,7 @@ mod tests { package_json::PackageJson, run::task_id::TaskName, task_graph::{ - BookkeepingTaskDefinition, TaskDefinitionExperiments, TaskDefinitionHashable, - TaskOutputs, + BookkeepingTaskDefinition, TaskDefinitionExperiments, TaskDefinitionStable, TaskOutputs, }, }; @@ -587,7 +584,7 @@ mod tests { ; "global dot env (unsorted)")] #[test_case(r#"{ "globalPassThroughEnv": ["GITHUB_TOKEN", "AWS_SECRET_KEY"] }"#, TurboJson { - global_pass_through_env: vec!["AWS_SECRET_KEY".to_string(), "GITHUB_TOKEN".to_string()], + global_pass_through_env: Some(vec!["AWS_SECRET_KEY".to_string(), "GITHUB_TOKEN".to_string()]), ..TurboJson::default() } )] @@ -617,9 +614,9 @@ mod tests { "//#build".into(), BookkeepingTaskDefinition { defined_fields: ["Cache".to_string()].into_iter().collect(), - task_definition: TaskDefinitionHashable { + task_definition: TaskDefinitionStable { cache: false, - ..TaskDefinitionHashable::default() + ..TaskDefinitionStable::default() }, ..BookkeepingTaskDefinition::default() } @@ -652,9 +649,9 @@ mod tests { "//#build".into(), BookkeepingTaskDefinition { defined_fields: ["Cache".to_string()].into_iter().collect(), - task_definition: TaskDefinitionHashable { + task_definition: TaskDefinitionStable { cache: true, - ..TaskDefinitionHashable::default() + ..TaskDefinitionStable::default() }, ..BookkeepingTaskDefinition::default() } @@ -663,9 +660,9 @@ mod tests { "//#test".into(), BookkeepingTaskDefinition { defined_fields: ["Cache".to_string()].into_iter().collect(), - task_definition: TaskDefinitionHashable { + task_definition: TaskDefinitionStable { cache: false, - ..TaskDefinitionHashable::default() + ..TaskDefinitionStable::default() }, ..BookkeepingTaskDefinition::default() } @@ -706,7 +703,7 @@ mod tests { defined_fields: ["Persistent".to_string()].into_iter().collect(), experimental_fields: HashSet::new(), experimental: TaskDefinitionExperiments::default(), - task_definition: TaskDefinitionHashable::default() + task_definition: TaskDefinitionStable::default() } )] #[test_case( @@ -746,7 +743,7 @@ mod tests { ].into_iter().collect(), experimental_fields: HashSet::new(), experimental: TaskDefinitionExperiments {}, - task_definition: TaskDefinitionHashable { + task_definition: TaskDefinitionStable { dot_env: vec![RelativeUnixPathBuf::new("package/a/.env").unwrap()], env: vec!["OS".to_string()], outputs: TaskOutputs { @@ -756,7 +753,7 @@ mod tests { cache: false, inputs: vec!["package/a/src/**".to_string()], output_mode: OutputLogsMode::Full, - pass_through_env: vec!["AWS_SECRET_KEY".to_string()], + pass_through_env: Some(vec!["AWS_SECRET_KEY".to_string()]), task_dependencies: vec!["cli#build".into()], topological_dependencies: vec![], persistent: true, diff --git a/crates/turborepo-lib/src/engine/builder.rs b/crates/turborepo-lib/src/engine/builder.rs index a0a20e874b5e9..91a0fe88247cb 100644 --- a/crates/turborepo-lib/src/engine/builder.rs +++ b/crates/turborepo-lib/src/engine/builder.rs @@ -6,6 +6,7 @@ use turbopath::AbsoluteSystemPath; use super::Engine; use crate::{ config::{validate_extends, validate_no_package_task_syntax, TurboJson}, + graph, package_graph::{PackageGraph, WorkspaceName, WorkspaceNode}, run::task_id::{TaskId, TaskName, ROOT_PKG_NAME}, task_graph::{BookkeepingTaskDefinition, TaskDefinition}, @@ -30,6 +31,8 @@ pub enum Error { Config(#[from] crate::config::Error), #[error("Invalid turbo.json:\n{error_lines}")] Validation { error_lines: String }, + #[error(transparent)] + Graph(#[from] graph::Error), } pub struct EngineBuilder<'a> { @@ -128,6 +131,13 @@ impl<'a> EngineBuilder<'a> { } if !missing_tasks.is_empty() { + let mut missing_tasks = missing_tasks + .into_iter() + .map(|task_name| task_name.to_string()) + .collect::>(); + // We sort the tasks mostly to keep it deterministic for our tests + missing_tasks.sort(); + return Err(Error::MissingTasks(missing_tasks.into_iter().join(", "))); } @@ -238,6 +248,8 @@ impl<'a> EngineBuilder<'a> { } } + graph::validate_graph(&engine.task_graph)?; + Ok(engine.seal()) } @@ -868,9 +880,10 @@ mod test { WorkspaceName::Root, turbo_json(json!({ "pipeline": { - "libA#build": { "dependsOn": ["app1#compile", "app1#build"] }, + "libA#build": { "dependsOn": ["app1#compile", "app1#test"] }, "build": { "dependsOn": ["^build"] }, - "compile": { "dependsOn": ["^build"] } + "compile": {}, + "test": {} } })), )] @@ -889,9 +902,10 @@ mod test { .unwrap(); let expected = deps! { + "app1#compile" => ["___ROOT___"], + "app1#test" => ["___ROOT___"], "app1#build" => ["libA#build"], - "app1#compile" => ["libA#build"], - "libA#build" => ["app1#build", "app1#compile"] + "libA#build" => ["app1#compile", "app1#test"] }; assert_eq!(all_dependencies(&engine), expected); } diff --git a/crates/turborepo-lib/src/graph/mod.rs b/crates/turborepo-lib/src/graph/mod.rs index d48100bdd9c80..94165801fc2cd 100644 --- a/crates/turborepo-lib/src/graph/mod.rs +++ b/crates/turborepo-lib/src/graph/mod.rs @@ -1,3 +1,44 @@ mod walker; +use std::fmt::Display; + +use itertools::Itertools; +use petgraph::prelude::*; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum Error { + #[error("cyclic dependency detected:\n{0}")] + CyclicDependencies(String), + #[error("{0} depends on itself")] + SelfDependency(String), +} + +pub fn validate_graph(graph: &Graph) -> Result<(), Error> { + // This is equivalent to AcyclicGraph.Cycles from Go's dag library + let cycles_lines = petgraph::algo::tarjan_scc(&graph) + .into_iter() + .filter(|cycle| cycle.len() > 1) + .map(|cycle| { + let workspaces = cycle.into_iter().map(|id| graph.node_weight(id).unwrap()); + format!("\t{}", workspaces.format(", ")) + }) + .join("\n"); + + if !cycles_lines.is_empty() { + return Err(Error::CyclicDependencies(cycles_lines)); + } + + for edge in graph.edge_references() { + if edge.source() == edge.target() { + let node = graph + .node_weight(edge.source()) + .expect("edge pointed to missing node"); + return Err(Error::SelfDependency(node.to_string())); + } + } + + Ok(()) +} + pub use walker::{WalkMessage, Walker}; diff --git a/crates/turborepo-lib/src/hash/mod.rs b/crates/turborepo-lib/src/hash/mod.rs index 973545dae475b..8f1215c538583 100644 --- a/crates/turborepo-lib/src/hash/mod.rs +++ b/crates/turborepo-lib/src/hash/mod.rs @@ -41,15 +41,16 @@ mod proto_capnp { } } +#[derive(Debug)] pub struct TaskHashable<'a> { // hashes pub(crate) global_hash: &'a str, pub(crate) task_dependency_hashes: Vec, pub(crate) hash_of_files: &'a str, - pub(crate) external_deps_hash: String, + pub(crate) external_deps_hash: Option, // task - pub(crate) package_dir: turbopath::RelativeUnixPathBuf, + pub(crate) package_dir: Option, pub(crate) task: &'a str, pub(crate) outputs: TaskOutputs, pub(crate) pass_through_args: &'a [String], @@ -62,11 +63,12 @@ pub struct TaskHashable<'a> { pub(crate) dot_env: &'a [turbopath::RelativeUnixPathBuf], } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct GlobalHashable<'a> { pub global_cache_key: &'static str, pub global_file_hash_map: HashMap, - pub root_external_dependencies_hash: String, + // This is None in single package mode + pub root_external_dependencies_hash: Option, pub env: &'a [String], pub resolved_env_vars: Vec, pub pass_through_env: &'a [String], @@ -198,9 +200,15 @@ impl From> for Builder { let mut builder = message.init_root(); builder.set_global_hash(task_hashable.global_hash); - builder.set_package_dir(&task_hashable.package_dir.to_string()); + if let Some(package_dir) = task_hashable.package_dir { + builder.set_package_dir(&package_dir.to_string()); + } + builder.set_hash_of_files(task_hashable.hash_of_files); - builder.set_external_deps_hash(&task_hashable.external_deps_hash); + if let Some(external_deps_hash) = task_hashable.external_deps_hash { + builder.set_external_deps_hash(&external_deps_hash); + } + builder.set_task(task_hashable.task); builder.set_env_mode(task_hashable.env_mode.into()); @@ -221,11 +229,11 @@ impl From> for Builder { } { - let mut pass_thru_args_builder = builder + let mut pass_through_args_builder = builder .reborrow() .init_pass_thru_args(task_hashable.pass_through_args.len() as u32); for (i, arg) in task_hashable.pass_through_args.iter().enumerate() { - pass_thru_args_builder.set(i as u32, arg); + pass_through_args_builder.set(i as u32, arg); } } @@ -237,11 +245,11 @@ impl From> for Builder { } { - let mut pass_thru_env_builder = builder + let mut pass_through_env_builder = builder .reborrow() .init_pass_thru_env(task_hashable.pass_through_env.len() as u32); for (i, env) in task_hashable.pass_through_env.iter().enumerate() { - pass_thru_env_builder.set(i as u32, env); + pass_through_env_builder.set(i as u32, env); } } @@ -307,7 +315,9 @@ impl<'a> From> for Builder { } } - builder.set_root_external_deps_hash(&hashable.root_external_dependencies_hash); + if let Some(root_external_dependencies_hash) = hashable.root_external_dependencies_hash { + builder.set_root_external_deps_hash(&root_external_dependencies_hash); + } { let mut entries = builder.reborrow().init_env(hashable.env.len() as u32); @@ -384,9 +394,9 @@ mod test { let task_hashable = TaskHashable { global_hash: "global_hash", task_dependency_hashes: vec!["task_dependency_hash".to_string()], - package_dir: turbopath::RelativeUnixPathBuf::new("package_dir").unwrap(), + package_dir: Some(turbopath::RelativeUnixPathBuf::new("package_dir").unwrap()), hash_of_files: "hash_of_files", - external_deps_hash: "external_deps_hash".to_string(), + external_deps_hash: Some("external_deps_hash".to_string()), task: "task", outputs: TaskOutputs { inclusions: vec!["inclusions".to_string()], @@ -413,7 +423,7 @@ mod test { )] .into_iter() .collect(), - root_external_dependencies_hash: "0000000000000000".to_string(), + root_external_dependencies_hash: Some("0000000000000000".to_string()), env: &["env".to_string()], resolved_env_vars: vec![], pass_through_env: &["pass_through_env".to_string()], diff --git a/crates/turborepo-lib/src/package_graph/builder.rs b/crates/turborepo-lib/src/package_graph/builder.rs index 7352a95dfe9d1..f9011192f0fda 100644 --- a/crates/turborepo-lib/src/package_graph/builder.rs +++ b/crates/turborepo-lib/src/package_graph/builder.rs @@ -1,4 +1,5 @@ use std::{ + backtrace::Backtrace, collections::{BTreeMap, HashMap, HashSet}, fmt, }; @@ -13,6 +14,7 @@ use turborepo_lockfiles::Lockfile; use super::{PackageGraph, WorkspaceInfo, WorkspaceName, WorkspaceNode}; use crate::{ + graph, package_graph::{PackageName, PackageVersion}, package_json::PackageJson, package_manager::PackageManager, @@ -25,12 +27,16 @@ pub struct PackageGraphBuilder<'a> { package_manager: Option, package_jsons: Option>, lockfile: Option>, + quiet: bool, } #[derive(Debug, thiserror::Error)] pub enum Error { #[error("could not resolve workspaces: {0}")] - PackageManager(#[from] crate::package_manager::Error), + PackageManager( + #[from] crate::package_manager::Error, + #[backtrace] Backtrace, + ), #[error( "Failed to add workspace \"{name}\" from \"{path}\", it already exists at \ \"{existing_path}\"" @@ -46,10 +52,8 @@ pub enum Error { PackageJson(#[from] crate::package_json::Error), #[error("package.json must have a name field:\n{0}")] PackageJsonMissingName(AbsoluteSystemPathBuf), - #[error("cyclic dependency detected:\n{0}")] - CyclicDependencies(String), - #[error("{0} depends on itself")] - SelfDependency(WorkspaceNode), + #[error(transparent)] + Graph(#[from] graph::Error), #[error(transparent)] Lockfile(#[from] turborepo_lockfiles::Error), } @@ -63,6 +67,7 @@ impl<'a> PackageGraphBuilder<'a> { package_manager: None, package_jsons: None, lockfile: None, + quiet: false, } } @@ -77,6 +82,11 @@ impl<'a> PackageGraphBuilder<'a> { self } + pub fn with_quiet(mut self) -> Self { + self.quiet = true; + self + } + #[allow(dead_code)] pub fn with_package_jsons( mut self, @@ -115,6 +125,7 @@ struct BuildState<'a, S> { node_lookup: HashMap, lockfile: Option>, package_jsons: Option>, + quiet: bool, state: std::marker::PhantomData, } @@ -153,6 +164,7 @@ impl<'a> BuildState<'a, ResolvedPackageManager> { package_manager, package_jsons, lockfile, + quiet, } = builder; let package_manager = package_manager.map_or_else( || PackageManager::get_package_manager(repo_root, Some(&root_package_json)), @@ -163,6 +175,7 @@ impl<'a> BuildState<'a, ResolvedPackageManager> { WorkspaceName::Root, WorkspaceInfo { package_json: root_package_json, + package_json_path: AnchoredSystemPathBuf::from_raw("package.json").unwrap(), ..Default::default() }, ); @@ -174,6 +187,7 @@ impl<'a> BuildState<'a, ResolvedPackageManager> { workspaces, lockfile, package_jsons, + quiet, workspace_graph: Graph::new(), node_lookup: HashMap::new(), state: std::marker::PhantomData, @@ -244,6 +258,7 @@ impl<'a> BuildState<'a, ResolvedPackageManager> { workspace_graph, node_lookup, lockfile, + quiet, .. } = self; Ok(BuildState { @@ -254,6 +269,7 @@ impl<'a> BuildState<'a, ResolvedPackageManager> { workspace_graph, node_lookup, lockfile, + quiet, package_jsons: None, state: std::marker::PhantomData, }) @@ -353,11 +369,13 @@ impl<'a> BuildState<'a, ResolvedWorkspaces> { let lockfile = match self.populate_lockfile() { Ok(lockfile) => Some(lockfile), Err(e) => { - warn!( - "Issues occurred when constructing package graph. Turbo will function, but \ - some features may not be available: {}", - e - ); + if !self.quiet { + warn!( + "Issues occurred when constructing package graph. Turbo will function, \ + but some features may not be available: {}", + e + ); + } None } }; @@ -369,6 +387,7 @@ impl<'a> BuildState<'a, ResolvedWorkspaces> { workspaces, workspace_graph, node_lookup, + quiet, .. } = self; Ok(BuildState { @@ -379,6 +398,7 @@ impl<'a> BuildState<'a, ResolvedWorkspaces> { workspace_graph, node_lookup, lockfile, + quiet, package_jsons: None, state: std::marker::PhantomData, }) @@ -427,7 +447,9 @@ impl<'a> BuildState<'a, ResolvedLockfile> { fn build(mut self) -> PackageGraph { if let Err(e) = self.populate_transitive_dependencies() { - warn!("Unable to calculate transitive closures: {}", e); + if !self.quiet { + warn!("Unable to calculate transitive closures: {}", e); + } } let Self { package_manager, diff --git a/crates/turborepo-lib/src/package_graph/mod.rs b/crates/turborepo-lib/src/package_graph/mod.rs index 8ac4e96ad4d26..11c66f3974c83 100644 --- a/crates/turborepo-lib/src/package_graph/mod.rs +++ b/crates/turborepo-lib/src/package_graph/mod.rs @@ -4,12 +4,11 @@ use std::{ }; use anyhow::Result; -use itertools::Itertools; -use petgraph::visit::{depth_first_search, EdgeRef, Reversed}; +use petgraph::visit::{depth_first_search, Reversed}; use turbopath::{AbsoluteSystemPath, AnchoredSystemPath, AnchoredSystemPathBuf}; use turborepo_lockfiles::Lockfile; -use crate::{package_json::PackageJson, package_manager::PackageManager}; +use crate::{graph, package_json::PackageJson, package_manager::PackageManager}; mod builder; @@ -102,34 +101,8 @@ impl PackageGraph { PackageGraphBuilder::new(repo_root, root_package_json) } - pub fn validate(&self) -> Result<(), builder::Error> { - // This is equivalent to AcyclicGraph.Cycles from Go's dag library - let cycles_lines = petgraph::algo::tarjan_scc(&self.workspace_graph) - .into_iter() - .filter(|cycle| cycle.len() > 1) - .map(|cycle| { - let workspaces = cycle - .into_iter() - .map(|id| self.workspace_graph.node_weight(id).unwrap()); - format!("\t{}", workspaces.format(", ")) - }) - .join("\n"); - - if !cycles_lines.is_empty() { - return Err(builder::Error::CyclicDependencies(cycles_lines)); - } - - for edge in self.workspace_graph.edge_references() { - if edge.source() == edge.target() { - let node = self - .workspace_graph - .node_weight(edge.source()) - .expect("edge pointed to missing node"); - return Err(builder::Error::SelfDependency(node.clone())); - } - } - - Ok(()) + pub fn validate(&self) -> Result<(), Error> { + Ok(graph::validate_graph(&self.workspace_graph)?) } /// Returns the number of workspaces in the repo @@ -681,7 +654,7 @@ mod test { assert_matches!( pkg_graph.validate(), - Err(builder::Error::CyclicDependencies(_)) + Err(builder::Error::Graph(graph::Error::CyclicDependencies(_))) ); } @@ -712,6 +685,9 @@ mod test { .build() .unwrap(); - assert_matches!(pkg_graph.validate(), Err(builder::Error::SelfDependency(_))); + assert_matches!( + pkg_graph.validate(), + Err(builder::Error::Graph(graph::Error::SelfDependency(_))) + ); } } diff --git a/crates/turborepo-lib/src/package_manager/mod.rs b/crates/turborepo-lib/src/package_manager/mod.rs index 0cc9d7a43dbc5..35e3b07801aae 100644 --- a/crates/turborepo-lib/src/package_manager/mod.rs +++ b/crates/turborepo-lib/src/package_manager/mod.rs @@ -294,20 +294,21 @@ impl PackageManager { &self, root_path: &AbsoluteSystemPath, ) -> Result { - let (mut inclusions, mut exclusions) = self.get_configured_workspace_globs(root_path)?; + let (inclusions, mut exclusions) = self.get_configured_workspace_globs(root_path)?; exclusions.extend(self.get_default_exclusions()); // Yarn appends node_modules to every other glob specified if *self == PackageManager::Yarn { inclusions - .iter_mut() + .iter() .for_each(|inclusion| exclusions.push(format!("{inclusion}/node_modules/**"))); } + let globs = WorkspaceGlobs::new(inclusions, exclusions)?; Ok(globs) } - fn get_default_exclusions(&self) -> impl Iterator { + pub fn get_default_exclusions(&self) -> impl Iterator { let ignores = match self { PackageManager::Pnpm | PackageManager::Pnpm6 => { ["**/node_modules/**", "**/bower_components/**"].as_slice() @@ -326,8 +327,11 @@ impl PackageManager { ) -> Result<(Vec, Vec), Error> { let globs = match self { PackageManager::Pnpm | PackageManager::Pnpm6 => { + // Make sure to convert this to a missing workspace error + // so we can catch it in the case of single package mode. let workspace_yaml = - fs::read_to_string(root_path.join_component("pnpm-workspace.yaml"))?; + fs::read_to_string(root_path.join_component("pnpm-workspace.yaml")) + .map_err(|_| Error::Workspace(MissingWorkspaceError::from(self)))?; let pnpm_workspace: PnpmWorkspace = serde_yaml::from_str(&workspace_yaml)?; if pnpm_workspace.packages.is_empty() { return Err(MissingWorkspaceError::from(self).into()); @@ -341,7 +345,8 @@ impl PackageManager { | PackageManager::Bun => { let package_json_text = fs::read_to_string(root_path.join_component("package.json"))?; - let package_json: PackageJsonWorkspaces = serde_json::from_str(&package_json_text)?; + let package_json: PackageJsonWorkspaces = serde_json::from_str(&package_json_text) + .map_err(|_| Error::Workspace(MissingWorkspaceError::from(self)))?; // Make sure to convert this to a missing workspace error if package_json.workspaces.as_ref().is_empty() { return Err(MissingWorkspaceError::from(self).into()); diff --git a/crates/turborepo-lib/src/run/global_hash.rs b/crates/turborepo-lib/src/run/global_hash.rs index d2b119b5ebb96..7dd310908a8a3 100644 --- a/crates/turborepo-lib/src/run/global_hash.rs +++ b/crates/turborepo-lib/src/run/global_hash.rs @@ -13,6 +13,7 @@ use crate::{ cli::EnvMode, hash::{GlobalHashable, TurboHash}, package_graph::WorkspaceInfo, + package_manager, package_manager::PackageManager, }; @@ -29,11 +30,12 @@ enum GlobalHashError {} pub struct GlobalHashableInputs<'a> { global_cache_key: &'static str, global_file_hash_map: HashMap, - root_external_dependencies_hash: String, + // This is `None` in single package mode + root_external_dependencies_hash: Option, env: &'a [String], // Only Option to allow #[derive(Default)] resolved_env_vars: Option, - pass_through_env: &'a [String], + pass_through_env: Option<&'a [String]>, env_mode: EnvMode, framework_inference: bool, dot_env: &'a [RelativeUnixPathBuf], @@ -41,6 +43,7 @@ pub struct GlobalHashableInputs<'a> { #[allow(clippy::too_many_arguments)] pub fn get_global_hash_inputs<'a, L: ?Sized + Lockfile>( + is_monorepo: bool, root_workspace: &WorkspaceInfo, root_path: &AbsoluteSystemPath, package_manager: &PackageManager, @@ -48,7 +51,7 @@ pub fn get_global_hash_inputs<'a, L: ?Sized + Lockfile>( global_file_dependencies: &'a [String], env_at_execution_start: &EnvironmentVariableMap, global_env: &'a [String], - global_pass_through_env: &'a [String], + global_pass_through_env: Option<&'a [String]>, env_mode: EnvMode, framework_inference: bool, dot_env: &'a [RelativeUnixPathBuf], @@ -64,12 +67,23 @@ pub fn get_global_hash_inputs<'a, L: ?Sized + Lockfile>( let mut global_deps = HashSet::new(); if !global_file_dependencies.is_empty() { - let globs = package_manager.get_workspace_globs(root_path)?; + let exclusions = match package_manager.get_workspace_globs(root_path) { + Ok(globs) => globs.raw_exclusions, + // If we hit a missing workspaces error, we could be in single package mode + // so we should just use the default globs + Err(package_manager::Error::Workspace(_)) => { + package_manager.get_default_exclusions().collect() + } + Err(err) => { + debug!("no workspace globs found"); + return Err(err.into()); + } + }; let files = globwalk::globwalk( root_path, global_file_dependencies, - &globs.raw_exclusions, + &exclusions, WalkType::All, )?; @@ -102,11 +116,14 @@ pub fn get_global_hash_inputs<'a, L: ?Sized + Lockfile>( global_file_hash_map.extend(dot_env_object); } - let root_external_dependencies_hash = root_workspace.get_external_deps_hash(); + let root_external_dependencies_hash = + is_monorepo.then(|| root_workspace.get_external_deps_hash()); debug!( - "rust external deps hash: {}", + "external deps hash: {}", root_external_dependencies_hash + .as_deref() + .unwrap_or("no hash (single package)") ); Ok(GlobalHashableInputs { @@ -125,11 +142,15 @@ pub fn get_global_hash_inputs<'a, L: ?Sized + Lockfile>( impl<'a> GlobalHashableInputs<'a> { pub fn calculate_global_hash_from_inputs(mut self) -> String { match self.env_mode { - EnvMode::Infer if !self.pass_through_env.is_empty() => { + // In infer mode, if there is any pass_through config (even if it is an empty array) + // we'll hash the whole object, so we can detect changes to that config + // Further, resolve the envMode to the concrete value. + EnvMode::Infer if self.pass_through_env.is_some() => { self.env_mode = EnvMode::Strict; } EnvMode::Loose => { - self.pass_through_env = &[]; + // Remove the passthroughs from hash consideration if we're explicitly loose. + self.pass_through_env = None; } _ => {} } @@ -147,7 +168,7 @@ impl<'a> GlobalHashableInputs<'a> { .resolved_env_vars .map(|evm| evm.all.to_hashable()) .unwrap_or_default(), - pass_through_env: self.pass_through_env, + pass_through_env: self.pass_through_env.unwrap_or_default(), env_mode: self.env_mode, framework_inference: self.framework_inference, dot_env: self.dot_env, diff --git a/crates/turborepo-lib/src/run/mod.rs b/crates/turborepo-lib/src/run/mod.rs index f5120041662bd..75aa7e8bbba08 100644 --- a/crates/turborepo-lib/src/run/mod.rs +++ b/crates/turborepo-lib/src/run/mod.rs @@ -38,13 +38,13 @@ use crate::{ }; #[derive(Debug)] -pub struct Run { - base: CommandBase, +pub struct Run<'a> { + base: &'a CommandBase, processes: ProcessManager, } -impl Run { - pub fn new(base: CommandBase) -> Self { +impl<'a> Run<'a> { + pub fn new(base: &'a CommandBase) -> Self { let processes = ProcessManager::new(); Self { base, processes } } @@ -204,6 +204,7 @@ impl Run { .expect("must have root workspace"); let global_hash_inputs = get_global_hash_inputs( + !opts.run_opts.single_package, root_workspace, &self.base.repo_root, pkg_dep_graph.package_manager(), @@ -211,7 +212,7 @@ impl Run { &root_turbo_json.global_deps, &env_at_execution_start, &root_turbo_json.global_env, - &root_turbo_json.global_pass_through_env, + root_turbo_json.global_pass_through_env.as_deref(), opts.run_opts.env_mode, opts.run_opts.framework_inference, &root_turbo_json.global_dot_env, @@ -234,7 +235,7 @@ impl Run { let mut global_env_mode = opts.run_opts.env_mode; if matches!(global_env_mode, EnvMode::Infer) - && !root_turbo_json.global_pass_through_env.is_empty() + && root_turbo_json.global_pass_through_env.is_some() { global_env_mode = EnvMode::Strict; } diff --git a/crates/turborepo-lib/src/run/task_id.rs b/crates/turborepo-lib/src/run/task_id.rs index be269b1231b7d..34171a0492871 100644 --- a/crates/turborepo-lib/src/run/task_id.rs +++ b/crates/turborepo-lib/src/run/task_id.rs @@ -8,7 +8,8 @@ pub const TASK_DELIMITER: &str = "#"; pub const ROOT_PKG_NAME: &str = "//"; /// A task identifier as it will appear in the task graph -#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize)] +#[serde(from = "String")] pub struct TaskId<'a> { package: Cow<'a, str>, task: Cow<'a, str>, @@ -66,6 +67,13 @@ impl<'a> TaskId<'a> { &self.package } + pub fn to_workspace_name(&self) -> WorkspaceName { + match self.package.as_ref() { + ROOT_PKG_NAME => WorkspaceName::Root, + package => WorkspaceName::Other(package.into()), + } + } + pub fn task(&self) -> &str { &self.task } diff --git a/crates/turborepo-lib/src/task_graph/mod.rs b/crates/turborepo-lib/src/task_graph/mod.rs index c2c9852adccf4..0ded9e830047a 100644 --- a/crates/turborepo-lib/src/task_graph/mod.rs +++ b/crates/turborepo-lib/src/task_graph/mod.rs @@ -18,7 +18,7 @@ pub struct BookkeepingTaskDefinition { pub defined_fields: HashSet, pub experimental_fields: HashSet, pub experimental: TaskDefinitionExperiments, - pub task_definition: TaskDefinitionHashable, + pub task_definition: TaskDefinitionStable, } // A list of config fields in a task definition that are considered @@ -35,12 +35,11 @@ pub struct TaskOutputs { pub exclusions: Vec, } -// taskDefinitionHashable exists as a definition for PristinePipeline, which is -// used downstream for calculating the global hash. We want to exclude -// experimental fields here because we don't want experimental fields to be part -// of the global hash. +// These are the stable fields of a TaskDefinition, versus the experimental ones +// TODO: Consolidate this and experiments, because the split is an artifact of +// the Go implementation #[derive(Clone, Debug, Serialize, PartialEq, Eq)] -pub struct TaskDefinitionHashable { +pub struct TaskDefinitionStable { pub(crate) outputs: TaskOutputs, pub(crate) cache: bool, pub(crate) topological_dependencies: Vec>, @@ -49,11 +48,11 @@ pub struct TaskDefinitionHashable { pub(crate) output_mode: OutputLogsMode, pub(crate) persistent: bool, pub(crate) env: Vec, - pub(crate) pass_through_env: Vec, + pub(crate) pass_through_env: Option>, pub(crate) dot_env: Vec, } -impl Default for TaskDefinitionHashable { +impl Default for TaskDefinitionStable { fn default() -> Self { Self { outputs: TaskOutputs::default(), @@ -64,7 +63,7 @@ impl Default for TaskDefinitionHashable { output_mode: OutputLogsMode::default(), persistent: false, env: Vec::new(), - pass_through_env: Vec::new(), + pass_through_env: None, dot_env: Vec::new(), } } @@ -79,7 +78,7 @@ pub struct TaskDefinition { // This field is custom-marshalled from `env` and `depends_on`` pub(crate) env: Vec, - pub(crate) pass_through_env: Vec, + pub(crate) pass_through_env: Option>, pub(crate) dot_env: Vec, @@ -110,13 +109,7 @@ pub struct TaskDefinition { impl BookkeepingTaskDefinition { // Useful for splitting out the metadata vs fields which allows for easier // definition merging - fn split( - self, - ) -> ( - Bookkeeping, - TaskDefinitionHashable, - TaskDefinitionExperiments, - ) { + fn split(self) -> (Bookkeeping, TaskDefinitionStable, TaskDefinitionExperiments) { let Self { defined_fields, experimental_fields, @@ -230,7 +223,7 @@ impl TaskDefinition { // TODO(olszewski) simplify this construction and throw off the shackles of Go let ( meta, - TaskDefinitionHashable { + TaskDefinitionStable { outputs, cache, topological_dependencies, diff --git a/crates/turborepo-lib/src/task_graph/visitor.rs b/crates/turborepo-lib/src/task_graph/visitor.rs index 7ab20c0c1107e..869a1e66ec30c 100644 --- a/crates/turborepo-lib/src/task_graph/visitor.rs +++ b/crates/turborepo-lib/src/task_graph/visitor.rs @@ -22,7 +22,7 @@ use crate::{ RunCache, }, task_hash, - task_hash::{PackageInputsHashes, TaskHasher}, + task_hash::{PackageInputsHashes, TaskHashTracker, TaskHasher}, }; // This holds the whole world @@ -35,6 +35,8 @@ pub struct Visitor<'a> { sink: OutputSink, color_cache: ColorSelector, ui: UI, + // For testing purposes we need to be able to silence the output + silent: bool, } #[derive(Debug, thiserror::Error)] @@ -89,24 +91,29 @@ impl<'a> Visitor<'a> { sink, color_cache, ui, + silent: false, } } + pub fn silent(mut self) -> Self { + self.silent = true; + self + } + pub async fn visit(&self, engine: Arc) -> Result<(), Error> { let concurrency = self.opts.run_opts.concurrency as usize; let (node_sender, mut node_stream) = mpsc::channel(concurrency); - let engine_handle = { let engine = engine.clone(); tokio::spawn(engine.execute(ExecutionOptions::new(false, concurrency), node_sender)) }; - let mut tasks = FuturesUnordered::new(); while let Some(message) = node_stream.recv().await { let crate::engine::Message { info, callback } = message; let is_github_actions = self.opts.run_opts.is_github_actions; let package_name = WorkspaceName::from(info.package()); + let workspace_dir = self.package_graph .workspace_dir(&package_name) @@ -146,7 +153,7 @@ impl<'a> Visitor<'a> { let task_env_mode = match self.global_env_mode { // Task env mode is only independent when global env mode is `infer`. - EnvMode::Infer if !task_definition.pass_through_env.is_empty() => { + EnvMode::Infer if task_definition.pass_through_env.is_some() => { ResolvedEnvMode::Strict } // If we're in infer mode we have just detected non-usage of strict env vars. @@ -184,12 +191,15 @@ impl<'a> Visitor<'a> { let prefix = self.prefix(&info); let pretty_prefix = self.color_cache.prefix_with_color(&task_hash, &prefix); let ui = self.ui; + let silent = self.silent; tasks.push(tokio::spawn(async move { let _task_cache = task_cache; - let mut prefixed_ui = - Self::prefixed_ui(ui, is_github_actions, &output_client, pretty_prefix); - prefixed_ui.output(command); + if !silent { + let mut prefixed_ui = + Self::prefixed_ui(ui, is_github_actions, &output_client, pretty_prefix); + prefixed_ui.output(command); + } callback.send(Ok(())).unwrap(); })); } @@ -262,6 +272,10 @@ impl<'a> Visitor<'a> { } prefixed_ui } + + pub fn into_task_hash_tracker(self) -> TaskHashTracker { + self.task_hasher.into_task_hash_tracker() + } } // A tiny enum that allows us to use the same type for stdout and stderr without diff --git a/crates/turborepo-lib/src/task_hash.rs b/crates/turborepo-lib/src/task_hash.rs index 940be3150a476..f31a6fd5c4714 100644 --- a/crates/turborepo-lib/src/task_hash.rs +++ b/crates/turborepo-lib/src/task_hash.rs @@ -4,6 +4,7 @@ use std::{ }; use rayon::prelude::*; +use serde::Serialize; use thiserror::Error; use tracing::debug; use turbopath::{AbsoluteSystemPath, AnchoredSystemPath, AnchoredSystemPathBuf}; @@ -16,7 +17,7 @@ use crate::{ hash::{FileHashes, TaskHashable, TurboHash}, opts::Opts, package_graph::{WorkspaceInfo, WorkspaceName}, - run::task_id::{TaskId, ROOT_PKG_NAME}, + run::task_id::TaskId, task_graph::TaskDefinition, }; @@ -72,10 +73,6 @@ impl PackageInputsHashes { return None; }; - if task_id.package() == ROOT_PKG_NAME { - return None; - } - let task_definition = match task_definitions .get(task_id) .ok_or_else(|| Error::MissingPipelineEntry(task_id.clone())) @@ -84,8 +81,7 @@ impl PackageInputsHashes { Err(err) => return Some(Err(err)), }; - // TODO: Look into making WorkspaceName take a Cow - let workspace_name = WorkspaceName::Other(task_id.package().to_string()); + let workspace_name = task_id.to_workspace_name(); let pkg = match workspaces .get(&workspace_name) @@ -144,11 +140,14 @@ impl PackageInputsHashes { } } -#[derive(Default)] +#[derive(Default, Debug, Serialize)] pub struct TaskHashTracker { + #[serde(skip)] package_task_env_vars: HashMap, DetailedMap>, package_task_hashes: HashMap, String>, + #[serde(skip)] package_task_framework: HashMap, String>, + #[serde(skip)] package_task_outputs: HashMap, Vec>, } @@ -199,7 +198,7 @@ impl<'a> TaskHasher<'a> { let mut matching_env_var_map = EnvironmentVariableMap::default(); if do_framework_inference { - // Se if we infer a framework + // See if we infer a framework if let Some(framework) = infer_framework(workspace, is_monorepo) { debug!("auto detected framework for {}", task_id.package()); debug!( @@ -244,7 +243,7 @@ impl<'a> TaskHasher<'a> { matching_env_var_map.union(&inference_env_var_map); matching_env_var_map.difference(&user_env_var_set.exclusions); } else { - let all_env_var_map = self + all_env_var_map = self .env_at_execution_start .from_wildcards(&task_definition.env)?; @@ -269,6 +268,7 @@ impl<'a> TaskHasher<'a> { let hashable_env_pairs = env_vars.all.to_hashable(); let outputs = task_definition.hashable_outputs(task_id); let task_dependency_hashes = self.calculate_dependency_hashes(dependency_set)?; + let external_deps_hash = is_monorepo.then(|| workspace.get_external_deps_hash()); debug!( "task hash env vars for {}:{}\n vars: {:?}", @@ -277,23 +277,32 @@ impl<'a> TaskHasher<'a> { hashable_env_pairs ); + let package_dir = workspace.package_path().to_unix(); + let is_root_package = package_dir.is_empty(); + // We wrap in an Option to mimic Go's serialization of nullable values + let optional_package_dir = (!is_root_package).then_some(package_dir); + let task_hashable = TaskHashable { global_hash: self.global_hash, task_dependency_hashes, - package_dir: workspace.package_path().to_unix(), + package_dir: optional_package_dir, hash_of_files, - external_deps_hash: workspace.get_external_deps_hash(), + external_deps_hash, task: task_id.task(), outputs, pass_through_args: self.opts.run_opts.pass_through_args, env: &task_definition.env, resolved_env_vars: hashable_env_pairs, - pass_through_env: &task_definition.pass_through_env, + pass_through_env: task_definition + .pass_through_env + .as_deref() + .unwrap_or_default(), env_mode: task_env_mode, dot_env: &task_definition.dot_env, }; - let task_hash = task_hashable.hash(); + + let task_hash = task_hashable.calculate_task_hash(); let mut task_hash_tracker = self.task_hash_tracker.lock().map_err(|_| Error::Mutex)?; task_hash_tracker @@ -326,10 +335,6 @@ impl<'a> TaskHasher<'a> { continue; }; - if dependency_task_id.package() == ROOT_PKG_NAME { - continue; - } - let task_hash_tracker = self.task_hash_tracker.lock().map_err(|_| Error::Mutex)?; let dependency_hash = task_hash_tracker .package_task_hashes @@ -343,4 +348,8 @@ impl<'a> TaskHasher<'a> { Ok(dependency_hash_list) } + + pub fn into_task_hash_tracker(self) -> TaskHashTracker { + self.task_hash_tracker.into_inner().unwrap() + } } diff --git a/crates/turborepo-paths/src/relative_unix_path.rs b/crates/turborepo-paths/src/relative_unix_path.rs index 05c408b4ca573..6f06eaf95d579 100644 --- a/crates/turborepo-paths/src/relative_unix_path.rs +++ b/crates/turborepo-paths/src/relative_unix_path.rs @@ -48,6 +48,10 @@ impl RelativeUnixPath { AnchoredSystemPathBuf(self.to_system_path_buf()) } + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + pub fn to_owned(&self) -> RelativeUnixPathBuf { RelativeUnixPathBuf(self.0.to_owned()) }