From 673f456a1b4ec8a660c473bacec49eac0512f8b0 Mon Sep 17 00:00:00 2001 From: Thilina Ratnayaka Date: Tue, 26 Sep 2023 11:31:23 -0500 Subject: [PATCH] More pylint fixes --- tests/nomp-api-300-impl.h | 2 +- tests/nomp_api_300.py | 36 ++++++++++++++++++++++-------------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/tests/nomp-api-300-impl.h b/tests/nomp-api-300-impl.h index 8e1703d6..7e29315c 100644 --- a/tests/nomp-api-300-impl.h +++ b/tests/nomp-api-300-impl.h @@ -9,7 +9,7 @@ static int nomp_api_300_aux(const char *fmt, TEST_TYPE *a, TEST_TYPE *b, nomp_test_check(nomp_update(b, 0, n, sizeof(TEST_TYPE), NOMP_TO)); int id = -1; - const char *clauses[4] = {"transform", "nomp_api_300", "transform", 0}; + const char *clauses[4] = {"transform", "nomp_api_300", "madd_transform", 0}; char *knl = generate_knl(fmt, 2, TOSTRING(TEST_TYPE), TOSTRING(TEST_TYPE)); nomp_test_check(nomp_jit(&id, knl, clauses, 4, "a", sizeof(TEST_TYPE), NOMP_PTR, "b", sizeof(TEST_TYPE), NOMP_PTR, "rows", diff --git a/tests/nomp_api_300.py b/tests/nomp_api_300.py index 7904631a..3a58c03d 100644 --- a/tests/nomp_api_300.py +++ b/tests/nomp_api_300.py @@ -1,27 +1,35 @@ +import math + import loopy as lp LOOPY_LANG_VERSION = (2018, 2) -def transform(knl, context): - def split_and_tag(knl, i, axis): - i_inner, i_outer = f"{i}_inner", f"{i}_outer" - knl = lp.split_iname( - knl, i, 32, inner_iname=i_inner, outer_iname=i_outer - ) - knl = lp.tag_inames(knl, {i_outer: f"g.{axis}", i_inner: f"l.{axis}"}) - return knl - - (i, j) = knl.default_entrypoint.all_inames() - knl = split_and_tag(knl, i, 0) - knl = split_and_tag(knl, j, 1) +def madd_transform(knl, context): + block_size = int( + math.sqrt(min(1024, context["device::max_threads_per_block"])) + ) + knl = lp.split_iname(knl, "i", block_size) + knl = lp.split_iname(knl, "j", block_size) + knl = lp.tag_inames( + knl, + { + "i_outer": "g.0", + "i_inner": "l.0", + "j_outer": "g.1", + "j_inner": "l.1", + }, + ) return knl def mxm_transform(knl, context): - knl = lp.split_iname(knl, "i", 32) - knl = lp.split_iname(knl, "j", 32) + block_size = int( + math.sqrt(min(1024, context["device::max_threads_per_block"])) + ) + knl = lp.split_iname(knl, "i", block_size) + knl = lp.split_iname(knl, "j", block_size) knl = lp.tag_inames( knl,