diff --git a/common/cuda_hip/distributed/vector_kernels.hpp.inc b/common/cuda_hip/distributed/vector_kernels.hpp.inc
index f21bbb2d706..504f7153062 100644
--- a/common/cuda_hip/distributed/vector_kernels.hpp.inc
+++ b/common/cuda_hip/distributed/vector_kernels.hpp.inc
@@ -58,7 +58,7 @@ void build_local(
     //                                    array
     // the flat_idx_it is used by the scatter_if as an index map for the values
     auto map_to_local_row =
-        [range_bounds, range_starting_indices] GKO_THRUST_LAMBDA(
+        [range_bounds, range_starting_indices] __host__ __device__(
             const thrust::tuple<GlobalIndexType, size_type>& idx_range_id)
         -> LocalIndexType {
         const auto idx = thrust::get<0>(idx_range_id);
@@ -73,7 +73,7 @@ void build_local(
 
     auto stride = local_mtx->get_stride();
     auto map_to_flat_idx =
-        [stride] GKO_THRUST_LAMBDA(
+        [stride] __host__ __device__(
             const thrust::tuple<LocalIndexType, GlobalIndexType>& row_col)
         -> size_type {
         return thrust::get<0>(row_col) * stride + thrust::get<1>(row_col);
@@ -83,10 +83,10 @@ void build_local(
             thrust::make_tuple(local_row_it, input.get_const_col_idxs())),
         map_to_flat_idx);
 
-    auto is_local_row = [part_ids,
-                         local_part] GKO_THRUST_LAMBDA(const size_type rid) {
-        return part_ids[rid] == local_part;
-    };
+    auto is_local_row =
+        [part_ids, local_part] __host__ __device__(const size_type rid) {
+            return part_ids[rid] == local_part;
+        };
     thrust::scatter_if(thrust::device, input.get_const_values(),
                        input.get_const_values() + input.get_num_elems(),
                        flat_idx_it, range_id.get_data(),
diff --git a/cuda/distributed/vector_kernels.cu b/cuda/distributed/vector_kernels.cu
index 168bc4eabdc..def3fc8ec87 100644
--- a/cuda/distributed/vector_kernels.cu
+++ b/cuda/distributed/vector_kernels.cu
@@ -50,15 +50,9 @@ namespace cuda {
 namespace distributed_vector {
 
 
-#define GKO_THRUST_LAMBDA __device__
-
-
 #include "common/cuda_hip/distributed/vector_kernels.hpp.inc"
 
 
-#undef GKO_THRUST_LAMBDA
-
-
 }  // namespace distributed_vector
 }  // namespace cuda
 }  // namespace kernels
diff --git a/dpcpp/distributed/vector_kernels.dp.cpp b/dpcpp/distributed/vector_kernels.dp.cpp
index 45ffed4c1ed..422d32b5465 100644
--- a/dpcpp/distributed/vector_kernels.dp.cpp
+++ b/dpcpp/distributed/vector_kernels.dp.cpp
@@ -30,11 +30,16 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include "core/distributed/vector_kernels.hpp"
-
 
+// force-top: on
+// oneDPL needs to be first to avoid issues with libstdc++ TBB impl
 #include <oneapi/dpl/algorithm>
+#include <oneapi/dpl/execution>
 #include <oneapi/dpl/iterator>
+// force-top: off
+
+
+#include "core/distributed/vector_kernels.hpp"
 
 
 #include <ginkgo/core/base/exception_helpers.hpp>
diff --git a/hip/distributed/vector_kernels.hip.cpp b/hip/distributed/vector_kernels.hip.cpp
index bbc7ee1eb1b..6cbfa1224e9 100644
--- a/hip/distributed/vector_kernels.hip.cpp
+++ b/hip/distributed/vector_kernels.hip.cpp
@@ -53,15 +53,9 @@ namespace hip {
 namespace distributed_vector {
 
 
-#define GKO_THRUST_LAMBDA __device__ __host__
-
-
 #include "common/cuda_hip/distributed/vector_kernels.hpp.inc"
 
 
-#undef GKO_THRUST_LAMBDA
-
-
 }  // namespace distributed_vector
 }  // namespace hip
 }  // namespace kernels