Skip to content

Commit

Permalink
Start setting up tpch planning benchmarks (#8665)
Browse files Browse the repository at this point in the history
* Start setting up tpch planning benchmarks

* Add remaining tpch queries

* Fix bench function

* Clippy
  • Loading branch information
matthewmturner authored Dec 30, 2023
1 parent 00a679a commit 545275b
Showing 1 changed file with 156 additions and 0 deletions.
156 changes: 156 additions & 0 deletions datafusion/core/benches/sql_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,104 @@ pub fn create_table_provider(column_prefix: &str, num_columns: usize) -> Arc<Mem
MemTable::try_new(schema, vec![]).map(Arc::new).unwrap()
}

pub fn create_tpch_schemas() -> [(String, Schema); 8] {
let lineitem_schema = Schema::new(vec![
Field::new("l_orderkey", DataType::Int64, false),
Field::new("l_partkey", DataType::Int64, false),
Field::new("l_suppkey", DataType::Int64, false),
Field::new("l_linenumber", DataType::Int32, false),
Field::new("l_quantity", DataType::Decimal128(15, 2), false),
Field::new("l_extendedprice", DataType::Decimal128(15, 2), false),
Field::new("l_discount", DataType::Decimal128(15, 2), false),
Field::new("l_tax", DataType::Decimal128(15, 2), false),
Field::new("l_returnflag", DataType::Utf8, false),
Field::new("l_linestatus", DataType::Utf8, false),
Field::new("l_shipdate", DataType::Date32, false),
Field::new("l_commitdate", DataType::Date32, false),
Field::new("l_receiptdate", DataType::Date32, false),
Field::new("l_shipinstruct", DataType::Utf8, false),
Field::new("l_shipmode", DataType::Utf8, false),
Field::new("l_comment", DataType::Utf8, false),
]);

let orders_schema = Schema::new(vec![
Field::new("o_orderkey", DataType::Int64, false),
Field::new("o_custkey", DataType::Int64, false),
Field::new("o_orderstatus", DataType::Utf8, false),
Field::new("o_totalprice", DataType::Decimal128(15, 2), false),
Field::new("o_orderdate", DataType::Date32, false),
Field::new("o_orderpriority", DataType::Utf8, false),
Field::new("o_clerk", DataType::Utf8, false),
Field::new("o_shippriority", DataType::Int32, false),
Field::new("o_comment", DataType::Utf8, false),
]);

let part_schema = Schema::new(vec![
Field::new("p_partkey", DataType::Int64, false),
Field::new("p_name", DataType::Utf8, false),
Field::new("p_mfgr", DataType::Utf8, false),
Field::new("p_brand", DataType::Utf8, false),
Field::new("p_type", DataType::Utf8, false),
Field::new("p_size", DataType::Int32, false),
Field::new("p_container", DataType::Utf8, false),
Field::new("p_retailprice", DataType::Decimal128(15, 2), false),
Field::new("p_comment", DataType::Utf8, false),
]);

let supplier_schema = Schema::new(vec![
Field::new("s_suppkey", DataType::Int64, false),
Field::new("s_name", DataType::Utf8, false),
Field::new("s_address", DataType::Utf8, false),
Field::new("s_nationkey", DataType::Int64, false),
Field::new("s_phone", DataType::Utf8, false),
Field::new("s_acctbal", DataType::Decimal128(15, 2), false),
Field::new("s_comment", DataType::Utf8, false),
]);

let partsupp_schema = Schema::new(vec![
Field::new("ps_partkey", DataType::Int64, false),
Field::new("ps_suppkey", DataType::Int64, false),
Field::new("ps_availqty", DataType::Int32, false),
Field::new("ps_supplycost", DataType::Decimal128(15, 2), false),
Field::new("ps_comment", DataType::Utf8, false),
]);

let customer_schema = Schema::new(vec![
Field::new("c_custkey", DataType::Int64, false),
Field::new("c_name", DataType::Utf8, false),
Field::new("c_address", DataType::Utf8, false),
Field::new("c_nationkey", DataType::Int64, false),
Field::new("c_phone", DataType::Utf8, false),
Field::new("c_acctbal", DataType::Decimal128(15, 2), false),
Field::new("c_mktsegment", DataType::Utf8, false),
Field::new("c_comment", DataType::Utf8, false),
]);

let nation_schema = Schema::new(vec![
Field::new("n_nationkey", DataType::Int64, false),
Field::new("n_name", DataType::Utf8, false),
Field::new("n_regionkey", DataType::Int64, false),
Field::new("n_comment", DataType::Utf8, false),
]);

let region_schema = Schema::new(vec![
Field::new("r_regionkey", DataType::Int64, false),
Field::new("r_name", DataType::Utf8, false),
Field::new("r_comment", DataType::Utf8, false),
]);

[
("lineitem".to_string(), lineitem_schema),
("orders".to_string(), orders_schema),
("part".to_string(), part_schema),
("supplier".to_string(), supplier_schema),
("partsupp".to_string(), partsupp_schema),
("customer".to_string(), customer_schema),
("nation".to_string(), nation_schema),
("region".to_string(), region_schema),
]
}

fn create_context() -> SessionContext {
let ctx = SessionContext::new();
ctx.register_table("t1", create_table_provider("a", 200))
Expand All @@ -68,6 +166,16 @@ fn create_context() -> SessionContext {
.unwrap();
ctx.register_table("t700", create_table_provider("c", 700))
.unwrap();

let tpch_schemas = create_tpch_schemas();
tpch_schemas.iter().for_each(|(name, schema)| {
ctx.register_table(
name,
Arc::new(MemTable::try_new(Arc::new(schema.clone()), vec![]).unwrap()),
)
.unwrap();
});

ctx
}

Expand Down Expand Up @@ -115,6 +223,54 @@ fn criterion_benchmark(c: &mut Criterion) {
)
})
});

let q1_sql = std::fs::read_to_string("../../benchmarks/queries/q1.sql").unwrap();
let q2_sql = std::fs::read_to_string("../../benchmarks/queries/q2.sql").unwrap();
let q3_sql = std::fs::read_to_string("../../benchmarks/queries/q3.sql").unwrap();
let q4_sql = std::fs::read_to_string("../../benchmarks/queries/q4.sql").unwrap();
let q5_sql = std::fs::read_to_string("../../benchmarks/queries/q5.sql").unwrap();
let q6_sql = std::fs::read_to_string("../../benchmarks/queries/q6.sql").unwrap();
let q7_sql = std::fs::read_to_string("../../benchmarks/queries/q7.sql").unwrap();
let q8_sql = std::fs::read_to_string("../../benchmarks/queries/q8.sql").unwrap();
let q9_sql = std::fs::read_to_string("../../benchmarks/queries/q9.sql").unwrap();
let q10_sql = std::fs::read_to_string("../../benchmarks/queries/q10.sql").unwrap();
let q11_sql = std::fs::read_to_string("../../benchmarks/queries/q11.sql").unwrap();
let q12_sql = std::fs::read_to_string("../../benchmarks/queries/q12.sql").unwrap();
let q13_sql = std::fs::read_to_string("../../benchmarks/queries/q13.sql").unwrap();
let q14_sql = std::fs::read_to_string("../../benchmarks/queries/q14.sql").unwrap();
// let q15_sql = std::fs::read_to_string("../../benchmarks/queries/q15.sql").unwrap();
let q16_sql = std::fs::read_to_string("../../benchmarks/queries/q16.sql").unwrap();
let q17_sql = std::fs::read_to_string("../../benchmarks/queries/q17.sql").unwrap();
let q18_sql = std::fs::read_to_string("../../benchmarks/queries/q18.sql").unwrap();
let q19_sql = std::fs::read_to_string("../../benchmarks/queries/q19.sql").unwrap();
let q20_sql = std::fs::read_to_string("../../benchmarks/queries/q20.sql").unwrap();
let q21_sql = std::fs::read_to_string("../../benchmarks/queries/q21.sql").unwrap();
let q22_sql = std::fs::read_to_string("../../benchmarks/queries/q22.sql").unwrap();

c.bench_function("physical_plan_tpch", |b| {
b.iter(|| physical_plan(&ctx, &q1_sql));
b.iter(|| physical_plan(&ctx, &q2_sql));
b.iter(|| physical_plan(&ctx, &q3_sql));
b.iter(|| physical_plan(&ctx, &q4_sql));
b.iter(|| physical_plan(&ctx, &q5_sql));
b.iter(|| physical_plan(&ctx, &q6_sql));
b.iter(|| physical_plan(&ctx, &q7_sql));
b.iter(|| physical_plan(&ctx, &q8_sql));
b.iter(|| physical_plan(&ctx, &q9_sql));
b.iter(|| physical_plan(&ctx, &q10_sql));
b.iter(|| physical_plan(&ctx, &q11_sql));
b.iter(|| physical_plan(&ctx, &q12_sql));
b.iter(|| physical_plan(&ctx, &q13_sql));
b.iter(|| physical_plan(&ctx, &q14_sql));
// b.iter(|| physical_plan(&ctx, &q15_sql));
b.iter(|| physical_plan(&ctx, &q16_sql));
b.iter(|| physical_plan(&ctx, &q17_sql));
b.iter(|| physical_plan(&ctx, &q18_sql));
b.iter(|| physical_plan(&ctx, &q19_sql));
b.iter(|| physical_plan(&ctx, &q20_sql));
b.iter(|| physical_plan(&ctx, &q21_sql));
b.iter(|| physical_plan(&ctx, &q22_sql));
});
}

criterion_group!(benches, criterion_benchmark);
Expand Down

0 comments on commit 545275b

Please sign in to comment.