diff --git a/cuda/base/kernel_launch_reduction.cuh b/cuda/base/kernel_launch_reduction.cuh index d1d6285e839..c70e5564503 100644 --- a/cuda/base/kernel_launch_reduction.cuh +++ b/cuda/base/kernel_launch_reduction.cuh @@ -458,7 +458,7 @@ void run_kernel_row_reduction(std::shared_ptr exec, } else { select_run_generic_kernel_row_reduction( subwarp_sizes(), - [&](int compiled_subwarp_size) { + [cols](int compiled_subwarp_size) { return compiled_subwarp_size >= cols || compiled_subwarp_size == config::warp_size; }, @@ -488,7 +488,7 @@ void run_kernel_col_reduction(std::shared_ptr exec, if (cols <= config::warp_size) { select_generic_col_reduction_small( subwarp_sizes(), - [&](int compiled_subwarp_size) { + [cols](int compiled_subwarp_size) { return compiled_subwarp_size >= cols || compiled_subwarp_size == config::warp_size; }, diff --git a/dpcpp/base/kernel_launch_reduction.dp.hpp b/dpcpp/base/kernel_launch_reduction.dp.hpp index 5ebf06b0f71..47d97676bb5 100644 --- a/dpcpp/base/kernel_launch_reduction.dp.hpp +++ b/dpcpp/base/kernel_launch_reduction.dp.hpp @@ -576,7 +576,7 @@ void run_kernel_row_reduction_stage1(std::shared_ptr exec, } else { select_generic_kernel_row_reduction_2d( subsubgroup_sizes(), - [&](int compiled_ssg_size) { + [cols](int compiled_ssg_size) { return compiled_ssg_size >= cols || compiled_ssg_size == sg_size; }, @@ -612,7 +612,7 @@ void run_kernel_col_reduction_stage1(std::shared_ptr exec, if (cols <= sg_size) { select_generic_col_reduction_small( subsubgroup_sizes(), - [&](int compiled_ssg_size) { + [cols](int compiled_ssg_size) { return compiled_ssg_size >= cols || compiled_ssg_size == sg_size; }, diff --git a/hip/base/kernel_launch_reduction.hip.hpp b/hip/base/kernel_launch_reduction.hip.hpp index 610f89673a9..fa20000d5bb 100644 --- a/hip/base/kernel_launch_reduction.hip.hpp +++ b/hip/base/kernel_launch_reduction.hip.hpp @@ -466,7 +466,7 @@ void run_kernel_row_reduction(std::shared_ptr exec, } else { select_run_generic_kernel_row_reduction( subwarp_sizes(), - [&](int compiled_subwarp_size) { + [cols](int compiled_subwarp_size) { return compiled_subwarp_size >= cols || compiled_subwarp_size == config::warp_size; }, @@ -496,7 +496,7 @@ void run_kernel_col_reduction(std::shared_ptr exec, if (cols <= config::warp_size) { select_generic_col_reduction_small( subwarp_sizes(), - [&](int compiled_subwarp_size) { + [cols](int compiled_subwarp_size) { return compiled_subwarp_size >= cols || compiled_subwarp_size == config::warp_size; },