Skip to content

Commit

Permalink
Address deprecations in oneAPI 2023.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
masterleinad committed Jan 25, 2023
1 parent b5b0504 commit 05d008d
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 80 deletions.
17 changes: 0 additions & 17 deletions core/src/SYCL/Kokkos_SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ std::ostream& SYCL::impl_sycl_info(std::ostream& os,
using namespace sycl::info;
return os << "Name: " << device.get_info<device::name>()
<< "\nDriver Version: " << device.get_info<device::driver_version>()
<< "\nIs Host: " << device.is_host()
<< "\nIs CPU: " << device.is_cpu()
<< "\nIs GPU: " << device.is_gpu()
<< "\nIs Accelerator: " << device.is_accelerator()
Expand Down Expand Up @@ -184,7 +183,6 @@ std::ostream& SYCL::impl_sycl_info(std::ostream& os,
<< "\nNative Vector Width Half: "
<< device.get_info<device::native_vector_width_half>()
<< "\nAddress Bits: " << device.get_info<device::address_bits>()
<< "\nImage Support: " << device.get_info<device::image_support>()
<< "\nMax Mem Alloc Size: "
<< device.get_info<device::max_mem_alloc_size>()
<< "\nMax Read Image Args: "
Expand Down Expand Up @@ -217,26 +215,11 @@ std::ostream& SYCL::impl_sycl_info(std::ostream& os,
<< "\nLocal Mem Size: " << device.get_info<device::local_mem_size>()
<< "\nError Correction Support: "
<< device.get_info<device::error_correction_support>()
<< "\nHost Unified Memory: "
<< device.get_info<device::host_unified_memory>()
<< "\nProfiling Timer Resolution: "
<< device.get_info<device::profiling_timer_resolution>()
<< "\nIs Endian Little: "
<< device.get_info<device::is_endian_little>()
<< "\nIs Available: " << device.get_info<device::is_available>()
<< "\nIs Compiler Available: "
<< device.get_info<device::is_compiler_available>()
<< "\nIs Linker Available: "
<< device.get_info<device::is_linker_available>()
<< "\nQueue Profiling: "
<< device.get_info<device::queue_profiling>()
<< "\nVendor: " << device.get_info<device::vendor>()
<< "\nProfile: " << device.get_info<device::profile>()
<< "\nVersion: " << device.get_info<device::version>()
<< "\nPrintf Buffer Size: "
<< device.get_info<device::printf_buffer_size>()
<< "\nPreferred Interop User Sync: "
<< device.get_info<device::preferred_interop_user_sync>()
<< "\nPartition Max Sub Devices: "
<< device.get_info<device::partition_max_sub_devices>()
<< "\nReference Count: "
Expand Down
28 changes: 8 additions & 20 deletions core/src/SYCL/Kokkos_SYCL_Parallel_Reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,12 +293,8 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
instance.scratch_flags(sizeof(unsigned int)));

auto reduction_lambda_factory =
[&](sycl::accessor<value_type, 1, sycl::access::mode::read_write,
sycl::access::target::local>
local_mem,
sycl::accessor<unsigned int, 1, sycl::access::mode::read_write,
sycl::access::target::local>
num_teams_done,
[&](sycl::local_accessor<value_type, 1> local_mem,
sycl::local_accessor<unsigned int, 1> num_teams_done,
sycl::device_ptr<value_type> results_ptr) {
const auto begin = policy.begin();

Expand Down Expand Up @@ -410,9 +406,7 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
};

auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) {
sycl::accessor<unsigned int, 1, sycl::access::mode::read_write,
sycl::access::target::local>
num_teams_done(1, cgh);
sycl::local_accessor<unsigned int, 1> num_teams_done(1, cgh);

auto dummy_reduction_lambda =
reduction_lambda_factory({1, cgh}, num_teams_done, nullptr);
Expand Down Expand Up @@ -453,10 +447,8 @@ class ParallelReduce<FunctorType, Kokkos::RangePolicy<Traits...>, ReducerType,
wgroup_size - 1) /
wgroup_size;

sycl::accessor<value_type, 1, sycl::access::mode::read_write,
sycl::access::target::local>
local_mem(sycl::range<1>(wgroup_size) * std::max(value_count, 1u),
cgh);
sycl::local_accessor<value_type, 1> local_mem(
sycl::range<1>(wgroup_size) * std::max(value_count, 1u), cgh);

cgh.depends_on(memcpy_events);

Expand Down Expand Up @@ -665,13 +657,9 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
if (size > 1) {
auto n_wgroups = (size + wgroup_size - 1) / wgroup_size;
auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) {
sycl::accessor<value_type, 1, sycl::access::mode::read_write,
sycl::access::target::local>
local_mem(sycl::range<1>(wgroup_size) * std::max(value_count, 1u),
cgh);
sycl::accessor<unsigned int, 1, sycl::access::mode::read_write,
sycl::access::target::local>
num_teams_done(1, cgh);
sycl::local_accessor<value_type, 1> local_mem(
sycl::range<1>(wgroup_size) * std::max(value_count, 1u), cgh);
sycl::local_accessor<unsigned int, 1> num_teams_done(1, cgh);

const BarePolicy bare_policy = m_policy;

Expand Down
9 changes: 4 additions & 5 deletions core/src/SYCL/Kokkos_SYCL_Parallel_Scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,10 @@ class ParallelScanSYCLBase {
q.get_device()
.template get_info<sycl::info::device::sub_group_sizes>()
.front();
sycl::accessor<value_type, 1, sycl::access::mode::read_write,
sycl::access::target::local>
local_mem(sycl::range<1>((wgroup_size + min_subgroup_size - 1) /
min_subgroup_size),
cgh);
sycl::local_accessor<value_type, 1> local_mem(
sycl::range<1>((wgroup_size + min_subgroup_size - 1) /
min_subgroup_size),
cgh);

cgh.parallel_for(
sycl::nd_range<1>(n_wgroups * wgroup_size, wgroup_size),
Expand Down
50 changes: 20 additions & 30 deletions core/src/SYCL/Kokkos_SYCL_Parallel_Team.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,12 +398,10 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
auto parallel_for_event = q.submit([&](sycl::handler& cgh) {
// FIXME_SYCL accessors seem to need a size greater than zero at least for
// host queues
sycl::accessor<char, 1, sycl::access::mode::read_write,
sycl::access::target::local>
team_scratch_memory_L0(
sycl::range<1>(
std::max(m_scratch_size[0] + m_shmem_begin, size_t(1))),
cgh);
sycl::local_accessor<char, 1> team_scratch_memory_L0(
sycl::range<1>(
std::max(m_scratch_size[0] + m_shmem_begin, size_t(1))),
cgh);

// Avoid capturing *this since it might not be trivially copyable
const auto shmem_begin = m_shmem_begin;
Expand Down Expand Up @@ -432,8 +430,7 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
auto max_sg_size =
kernel
.get_info<sycl::info::kernel_device_specific::max_sub_group_size>(
q.get_device(),
sycl::range<3>(m_team_size, m_vector_size, 1));
q.get_device());
auto final_vector_size = std::min<int>(m_vector_size, max_sg_size);
// FIXME_SYCL For some reason, explicitly enforcing the kernel bundle to
// be used gives a runtime error.
Expand Down Expand Up @@ -592,12 +589,10 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
auto parallel_reduce_event = q.submit([&](sycl::handler& cgh) {
// FIXME_SYCL accessors seem to need a size greater than zero at least
// for host queues
sycl::accessor<char, 1, sycl::access::mode::read_write,
sycl::access::target::local>
team_scratch_memory_L0(
sycl::range<1>(
std::max(m_scratch_size[0] + m_shmem_begin, size_t(1))),
cgh);
sycl::local_accessor<char, 1> team_scratch_memory_L0(
sycl::range<1>(
std::max(m_scratch_size[0] + m_shmem_begin, size_t(1))),
cgh);

// Avoid capturing *this since it might not be trivially copyable
const auto shmem_begin = m_shmem_begin;
Expand Down Expand Up @@ -645,22 +640,18 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,

// FIXME_SYCL accessors seem to need a size greater than zero at least
// for host queues
sycl::accessor<char, 1, sycl::access::mode::read_write,
sycl::access::target::local>
team_scratch_memory_L0(
sycl::range<1>(
std::max(m_scratch_size[0] + m_shmem_begin, size_t(1))),
cgh);
sycl::local_accessor<char, 1> team_scratch_memory_L0(
sycl::range<1>(
std::max(m_scratch_size[0] + m_shmem_begin, size_t(1))),
cgh);

// Avoid capturing *this since it might not be trivially copyable
const auto shmem_begin = m_shmem_begin;
const size_t scratch_size[2] = {m_scratch_size[0], m_scratch_size[1]};
sycl::device_ptr<char> const global_scratch_ptr = m_global_scratch_ptr;

auto team_reduction_factory =
[&](sycl::accessor<value_type, 1, sycl::access::mode::read_write,
sycl::access::target::local>
local_mem,
[&](sycl::local_accessor<value_type, 1> local_mem,
sycl::device_ptr<value_type> results_ptr) {
sycl::global_ptr<value_type> device_accessible_result_ptr =
m_result_ptr_device_accessible ? m_result_ptr : nullptr;
Expand Down Expand Up @@ -793,7 +784,7 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
}();
auto max_sg_size = kernel.get_info<
sycl::info::kernel_device_specific::max_sub_group_size>(
q.get_device(), sycl::range<3>(m_team_size, m_vector_size, 1));
q.get_device());
auto final_vector_size = std::min<int>(m_vector_size, max_sg_size);
// FIXME_SYCL For some reason, explicitly enforcing the kernel bundle to
// be used gives a runtime error.
Expand All @@ -802,12 +793,11 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,

auto wgroup_size = m_team_size * final_vector_size;
std::size_t size = std::size_t(m_league_size) * wgroup_size;
sycl::accessor<value_type, 1, sycl::access::mode::read_write,
sycl::access::target::local>
local_mem(sycl::range<1>(wgroup_size) * std::max(value_count, 1u) +
(sizeof(unsigned int) + sizeof(value_type) - 1) /
sizeof(value_type),
cgh);
sycl::local_accessor<value_type, 1> local_mem(
sycl::range<1>(wgroup_size) * std::max(value_count, 1u) +
(sizeof(unsigned int) + sizeof(value_type) - 1) /
sizeof(value_type),
cgh);

const auto init_size =
std::max<std::size_t>((size + wgroup_size - 1) / wgroup_size, 1);
Expand Down
3 changes: 1 addition & 2 deletions core/unit_test/sycl/TestSYCL_InterOp_Init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ TEST(sycl, raw_sycl_interop) {
Kokkos::Experimental::SYCL default_space;
sycl::context default_context = default_space.sycl_queue().get_context();

sycl::default_selector device_selector;
sycl::queue queue(default_context, device_selector);
sycl::queue queue(default_context, sycl::default_selector_v);
constexpr int n = 100;
int* p = sycl::malloc_device<int>(n, queue);
{
Expand Down
6 changes: 2 additions & 4 deletions core/unit_test/sycl/TestSYCL_InterOp_Init_Context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ TEST(sycl, raw_sycl_interop_context_1) {
Kokkos::Experimental::SYCL default_space;
sycl::context default_context = default_space.sycl_queue().get_context();

sycl::default_selector device_selector;
sycl::queue queue(default_context, device_selector);
sycl::queue queue(default_context, sycl::default_selector_v);
constexpr int n = 100;
int* p = sycl::malloc_device<int>(n, queue);

Expand Down Expand Up @@ -61,8 +60,7 @@ TEST(sycl, raw_sycl_interop_context_2) {
Kokkos::Experimental::SYCL default_space;
sycl::context default_context = default_space.sycl_queue().get_context();

sycl::default_selector device_selector;
sycl::queue queue(default_context, device_selector);
sycl::queue queue(default_context, sycl::default_selector_v);
constexpr int n = 100;

Kokkos::Experimental::SYCL space(queue);
Expand Down
3 changes: 1 addition & 2 deletions core/unit_test/sycl/TestSYCL_InterOp_Streams.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ TEST(sycl, raw_sycl_queues) {
Kokkos::Experimental::SYCL default_space;
sycl::context default_context = default_space.sycl_queue().get_context();

sycl::default_selector device_selector;
sycl::queue queue(default_context, device_selector);
sycl::queue queue(default_context, sycl::default_selector_v);
int* p = sycl::malloc_device<int>(100, queue);
using MemorySpace = typename TEST_EXECSPACE::memory_space;

Expand Down

0 comments on commit 05d008d

Please sign in to comment.