From ef4fdfa2e655f3e6bdd340e427caad2cd8250d9c Mon Sep 17 00:00:00 2001 From: DaegyuLee Date: Wed, 26 Jul 2023 14:31:50 +0900 Subject: [PATCH] discriptor added --- CMakeLists.txt | 6 +- .../fast_gicp/cuda/covariance_estimation.cuh | 8 + include/fast_gicp/cuda/fast_vgicp_cuda.cuh | 17 ++ include/fast_gicp/gicp/fast_vgicp_cuda.hpp | 3 +- .../gicp/impl/fast_vgicp_cuda_impl.hpp | 31 ++++ .../cuda/covariance_estimation_gaussian.cu | 156 ++++++++++++++++ .../cuda/covariance_estimation_histogram.cu | 171 ++++++++++++++++++ .../cuda/covariance_estimation_laplacian.cu | 154 ++++++++++++++++ .../cuda/covariance_estimation_polynomial.cu | 167 +++++++++++++++++ 9 files changed, 711 insertions(+), 2 deletions(-) create mode 100644 src/fast_gicp/cuda/covariance_estimation_gaussian.cu create mode 100644 src/fast_gicp/cuda/covariance_estimation_histogram.cu create mode 100644 src/fast_gicp/cuda/covariance_estimation_laplacian.cu create mode 100644 src/fast_gicp/cuda/covariance_estimation_polynomial.cu diff --git a/CMakeLists.txt b/CMakeLists.txt index ba0f536..b3af031 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.10.0) project(fast_gicp) -option(BUILD_VGICP_CUDA "Build GPU-powered VGICP" OFF) +option(BUILD_VGICP_CUDA "Build GPU-powered VGICP" ON) option(BUILD_apps "Build application programs" ON) option(BUILD_test "Build test programs" OFF) option(BUILD_PYTHON_BINDINGS "Build python bindings" OFF) @@ -121,6 +121,10 @@ if(BUILD_VGICP_CUDA) src/fast_gicp/cuda/brute_force_knn.cu src/fast_gicp/cuda/covariance_estimation.cu src/fast_gicp/cuda/covariance_estimation_rbf.cu + src/fast_gicp/cuda/covariance_estimation_polynomial.cu + src/fast_gicp/cuda/covariance_estimation_histogram.cu + src/fast_gicp/cuda/covariance_estimation_laplacian.cu + src/fast_gicp/cuda/covariance_estimation_gaussian.cu src/fast_gicp/cuda/covariance_regularization.cu src/fast_gicp/cuda/gaussian_voxelmap.cu src/fast_gicp/cuda/find_voxel_correspondences.cu diff --git a/include/fast_gicp/cuda/covariance_estimation.cuh b/include/fast_gicp/cuda/covariance_estimation.cuh index 8bb81f6..cc92437 100644 --- a/include/fast_gicp/cuda/covariance_estimation.cuh +++ b/include/fast_gicp/cuda/covariance_estimation.cuh @@ -11,6 +11,14 @@ namespace cuda { void covariance_estimation(const thrust::device_vector& points, int k, const thrust::device_vector& k_neighbors, thrust::device_vector& covariances); void covariance_estimation_rbf(const thrust::device_vector& points, double kernel_width, double max_dist, thrust::device_vector& covariances); + +void covariance_estimation_polynomial(const thrust::device_vector& points, double alpha, double constant, int degree, thrust::device_vector& covariances); + +void covariance_estimation_histogram_intersection(const thrust::device_vector& points, double kernel_width, double max_dist, thrust::device_vector& covariances); + +void covariance_estimation_laplacian(const thrust::device_vector& points, double kernel_width, double max_dist, thrust::device_vector& covariances); + +void covariance_estimation_gaussian(const thrust::device_vector& points, double kernel_width, double max_dist, thrust::device_vector& covariances); } } // namespace fast_gicp diff --git a/include/fast_gicp/cuda/fast_vgicp_cuda.cuh b/include/fast_gicp/cuda/fast_vgicp_cuda.cuh index be1dcb5..128b7b0 100644 --- a/include/fast_gicp/cuda/fast_vgicp_cuda.cuh +++ b/include/fast_gicp/cuda/fast_vgicp_cuda.cuh @@ -39,6 +39,7 @@ public: void set_resolution(double resolution); void set_kernel_params(double kernel_width, double kernel_max_dist); + void set_poly_params(double alpha, double constant, int degree); void set_neighbor_search_method(fast_gicp::NeighborSearchMethod method, double radius); void swap_source_and_target(); @@ -56,6 +57,18 @@ public: void calculate_source_covariances_rbf(RegularizationMethod method); void calculate_target_covariances_rbf(RegularizationMethod method); + void calculate_source_covariances_polynomial(RegularizationMethod method); + void calculate_target_covariances_polynomial(RegularizationMethod method); + + void calculate_source_covariances_histogram_intersection(RegularizationMethod method); + void calculate_target_covariances_histogram_intersection(RegularizationMethod method); + + void calculate_source_covariances_laplacian(RegularizationMethod method); + void calculate_target_covariances_laplacian(RegularizationMethod method); + + void calculate_source_covariances_gaussian(RegularizationMethod method); + void calculate_target_covariances_gaussian(RegularizationMethod method); + void get_source_covariances(std::vector>& covs) const; void get_target_covariances(std::vector>& covs) const; @@ -74,6 +87,10 @@ public: double resolution; double kernel_width; double kernel_max_dist; + double alpha; + double constant; + int degree; + std::unique_ptr offsets; std::unique_ptr source_points; diff --git a/include/fast_gicp/gicp/fast_vgicp_cuda.hpp b/include/fast_gicp/gicp/fast_vgicp_cuda.hpp index 2eb21d7..c9a704d 100644 --- a/include/fast_gicp/gicp/fast_vgicp_cuda.hpp +++ b/include/fast_gicp/gicp/fast_vgicp_cuda.hpp @@ -18,7 +18,7 @@ namespace cuda { class FastVGICPCudaCore; } -enum class NearestNeighborMethod { CPU_PARALLEL_KDTREE, GPU_BRUTEFORCE, GPU_RBF_KERNEL }; +enum class NearestNeighborMethod { CPU_PARALLEL_KDTREE, GPU_BRUTEFORCE, GPU_RBF_KERNEL, GPU_POLY_KERNEL, GPU_HISTOGRAM_KERNEL, GPU_LAPLACIAN_KERNEL, GPU_GAUSSIAN_KERNEL}; /** * @brief Fast Voxelized GICP algorithm boosted with CUDA @@ -56,6 +56,7 @@ class FastVGICPCuda : public LsqRegistration { void setCorrespondenceRandomness(int k); void setResolution(double resolution); void setKernelWidth(double kernel_width, double max_dist = -1.0); + void setPolyParams(double alpha, double constant, int degree); void setRegularizationMethod(RegularizationMethod method); void setNeighborSearchMethod(NeighborSearchMethod method, double radius = -1.0); void setNearestNeighborSearchMethod(NearestNeighborMethod method); diff --git a/include/fast_gicp/gicp/impl/fast_vgicp_cuda_impl.hpp b/include/fast_gicp/gicp/impl/fast_vgicp_cuda_impl.hpp index 2693a88..7d522aa 100644 --- a/include/fast_gicp/gicp/impl/fast_vgicp_cuda_impl.hpp +++ b/include/fast_gicp/gicp/impl/fast_vgicp_cuda_impl.hpp @@ -50,6 +50,11 @@ void FastVGICPCuda::setKernelWidth(double kernel_width vgicp_cuda_->set_kernel_params(kernel_width, max_dist); } +template +void FastVGICPCuda::setPolyParams(double alpha, double constant, int degree) { + vgicp_cuda_->set_poly_params(alpha, constant, degree); +} + template void FastVGICPCuda::setRegularizationMethod(RegularizationMethod method) { regularization_method_ = method; @@ -107,6 +112,19 @@ void FastVGICPCuda::setInputSource(const PointCloudSou case NearestNeighborMethod::GPU_RBF_KERNEL: vgicp_cuda_->calculate_source_covariances_rbf(regularization_method_); break; + case NearestNeighborMethod::GPU_POLY_KERNEL: + vgicp_cuda_->calculate_source_covariances_polynomial(regularization_method_); + break; + case NearestNeighborMethod::GPU_HISTOGRAM_KERNEL: + vgicp_cuda_->calculate_source_covariances_histogram_intersection(regularization_method_); + break; + case NearestNeighborMethod::GPU_LAPLACIAN_KERNEL: + vgicp_cuda_->calculate_source_covariances_laplacian(regularization_method_); + break; + case NearestNeighborMethod::GPU_GAUSSIAN_KERNEL: + vgicp_cuda_->calculate_source_covariances_gaussian(regularization_method_); + break; + } } @@ -136,6 +154,19 @@ void FastVGICPCuda::setInputTarget(const PointCloudTar case NearestNeighborMethod::GPU_RBF_KERNEL: vgicp_cuda_->calculate_target_covariances_rbf(regularization_method_); break; + case NearestNeighborMethod::GPU_POLY_KERNEL: + vgicp_cuda_->calculate_target_covariances_polynomial(regularization_method_); + break; + case NearestNeighborMethod::GPU_HISTOGRAM_KERNEL: + vgicp_cuda_->calculate_target_covariances_histogram_intersection(regularization_method_); + break; + case NearestNeighborMethod::GPU_LAPLACIAN_KERNEL: + vgicp_cuda_->calculate_target_covariances_laplacian(regularization_method_); + break; + case NearestNeighborMethod::GPU_GAUSSIAN_KERNEL: + vgicp_cuda_->calculate_target_covariances_gaussian(regularization_method_); + break; + } vgicp_cuda_->create_target_voxelmap(); } diff --git a/src/fast_gicp/cuda/covariance_estimation_gaussian.cu b/src/fast_gicp/cuda/covariance_estimation_gaussian.cu new file mode 100644 index 0000000..56d9ab6 --- /dev/null +++ b/src/fast_gicp/cuda/covariance_estimation_gaussian.cu @@ -0,0 +1,156 @@ +#include + +#include + +#include +#include + +namespace fast_gicp { +namespace cuda { + +struct NormalDistribution { +public: + EIGEN_MAKE_ALIGNED_OPERATOR_NEW + + __host__ __device__ NormalDistribution() {} + + static __host__ __device__ NormalDistribution zero() { + NormalDistribution dist; + dist.sum_weights = 0.0f; + dist.mean.setZero(); + dist.cov.setZero(); + return dist; + } + + __host__ __device__ NormalDistribution operator+(const NormalDistribution& rhs) const { + NormalDistribution sum; + sum.sum_weights = sum_weights + rhs.sum_weights; + sum.mean = mean + rhs.mean; + sum.cov = cov + rhs.cov; + return sum; + } + + __host__ __device__ NormalDistribution& operator+=(const NormalDistribution& rhs) { + sum_weights += rhs.sum_weights; + mean += rhs.mean; + cov += rhs.cov; + return *this; + } + + __host__ __device__ void accumulate(const float w, const Eigen::Vector3f& x) { + sum_weights += w; + mean += w * x; + cov += w * x * x.transpose(); + } + + __host__ __device__ NormalDistribution& finalize() { + Eigen::Vector3f sum_pt = mean; + mean /= sum_weights; + cov = (cov - mean * sum_pt.transpose()) / sum_weights; + + return *this; + } + + float sum_weights; + Eigen::Vector3f mean; + Eigen::Matrix3f cov; +}; + + +struct covariance_estimation_kernel_gaussian { + static const int BLOCK_SIZE = 512; + + covariance_estimation_kernel_gaussian(thrust::device_ptr exp_factor_ptr, thrust::device_ptr max_dist_ptr, thrust::device_ptr points_ptr) + : exp_factor_ptr(exp_factor_ptr), + max_dist_ptr(max_dist_ptr), + points_ptr(points_ptr) {} + + __host__ __device__ NormalDistribution operator()(const Eigen::Vector3f& x) const { + const float exp_factor = *thrust::raw_pointer_cast(exp_factor_ptr); + const float max_dist = *thrust::raw_pointer_cast(max_dist_ptr); + const float max_dist_sq = max_dist * max_dist; + const Eigen::Vector3f* points = thrust::raw_pointer_cast(points_ptr); + + NormalDistribution dist = NormalDistribution::zero(); + for (int i = 0; i < BLOCK_SIZE; i++) { + float sq_d = (x - points[i]).squaredNorm(); + if (sq_d > max_dist_sq) { + continue; + } + + float r = sqrt(sq_d); + float w = expf(-r * r / (2 * exp_factor * exp_factor)); + dist.accumulate(w, points[i]); + } + + return dist; + } + + thrust::device_ptr exp_factor_ptr; + thrust::device_ptr max_dist_ptr; + thrust::device_ptr points_ptr; +}; + +struct finalization_kernel { + finalization_kernel(const int stride, const thrust::device_vector& accumulated_dists) + : stride(stride), + accumulated_dists_first(accumulated_dists.data()), + accumulated_dists_last(accumulated_dists.data() + accumulated_dists.size()) {} + + __host__ __device__ Eigen::Matrix3f operator()(int index) const { + const NormalDistribution* dists = thrust::raw_pointer_cast(accumulated_dists_first); + const NormalDistribution* dists_last = thrust::raw_pointer_cast(accumulated_dists_last); + const int num_dists = dists_last - dists; + + NormalDistribution sum = dists[index]; + for (int dist_index = index + stride; dist_index < num_dists; dist_index += stride) { + sum += dists[dist_index]; + } + + return sum.finalize().cov; + } + + const int stride; + thrust::device_ptr accumulated_dists_first; + thrust::device_ptr accumulated_dists_last; +}; + +void covariance_estimation_gaussian(const thrust::device_vector& points, double kernel_width, double max_dist, thrust::device_vector& covariances) { + covariances.resize(points.size()); + + thrust::device_vector constants(2); + constants[0] = kernel_width; + constants[1] = max_dist; + thrust::device_ptr exp_factor_ptr = constants.data(); + thrust::device_ptr max_dist_ptr = constants.data() + 1; + + int num_blocks = (points.size() + (covariance_estimation_kernel_gaussian::BLOCK_SIZE - 1)) / covariance_estimation_kernel_gaussian::BLOCK_SIZE; + // padding + thrust::device_vector ext_points(num_blocks * covariance_estimation_kernel_gaussian::BLOCK_SIZE); + thrust::copy(points.begin(), points.end(), ext_points.begin()); + thrust::fill(ext_points.begin() + points.size(), ext_points.end(), Eigen::Vector3f(0.0f, 0.0f, 0.0f)); + + thrust::device_vector accumulated_dists(points.size() * num_blocks); + + thrust::system::cuda::detail::unique_stream stream; + std::vector events(num_blocks); + + // accumulate kerneled point distributions + for (int i = 0; i < num_blocks; i++) { + covariance_estimation_kernel_gaussian kernel(exp_factor_ptr, max_dist_ptr, ext_points.data() + covariance_estimation_kernel_gaussian::BLOCK_SIZE * i); + auto event = thrust::async::transform(points.begin(), points.end(), accumulated_dists.begin() + points.size() * i, kernel); + events[i] = std::move(event); + thrust::system::cuda::detail::create_dependency(stream, events[i]); + } + + // finalize distributions + thrust::transform( + thrust::cuda::par.on(stream.native_handle()), + thrust::counting_iterator(0), + thrust::counting_iterator(points.size()), + covariances.begin(), + finalization_kernel(points.size(), accumulated_dists)); +} + +} // namespace cuda +} // namespace fast_gicp \ No newline at end of file diff --git a/src/fast_gicp/cuda/covariance_estimation_histogram.cu b/src/fast_gicp/cuda/covariance_estimation_histogram.cu new file mode 100644 index 0000000..7ae0928 --- /dev/null +++ b/src/fast_gicp/cuda/covariance_estimation_histogram.cu @@ -0,0 +1,171 @@ +#include + +#include + +#include +#include + +namespace fast_gicp { +namespace cuda { + +struct NormalDistribution { +public: + EIGEN_MAKE_ALIGNED_OPERATOR_NEW + + __host__ __device__ NormalDistribution() {} + + static __host__ __device__ NormalDistribution zero() { + NormalDistribution dist; + dist.sum_weights = 0.0f; + dist.mean.setZero(); + dist.cov.setZero(); + return dist; + } + + __host__ __device__ NormalDistribution operator+(const NormalDistribution& rhs) const { + NormalDistribution sum; + sum.sum_weights = sum_weights + rhs.sum_weights; + sum.mean = mean + rhs.mean; + sum.cov = cov + rhs.cov; + return sum; + } + + __host__ __device__ NormalDistribution& operator+=(const NormalDistribution& rhs) { + sum_weights += rhs.sum_weights; + mean += rhs.mean; + cov += rhs.cov; + return *this; + } + + __host__ __device__ void accumulate(const float w, const Eigen::Vector3f& x) { + sum_weights += w; + mean += w * x; + cov += w * x * x.transpose(); + } + + __host__ __device__ NormalDistribution& finalize() { + Eigen::Vector3f sum_pt = mean; + mean /= sum_weights; + cov = (cov - mean * sum_pt.transpose()) / sum_weights; + + return *this; + } + + float sum_weights; + Eigen::Vector3f mean; + Eigen::Matrix3f cov; +}; + +struct covariance_estimation_kernel { + static const int BLOCK_SIZE = 512; + + covariance_estimation_kernel(thrust::device_ptr exp_factor_ptr, thrust::device_ptr max_dist_ptr, thrust::device_ptr points_ptr) + : exp_factor_ptr(exp_factor_ptr), + max_dist_ptr(max_dist_ptr), + points_ptr(points_ptr) {} + + __host__ __device__ float histogram_intersection_kernel(const Eigen::Vector3f& x, const Eigen::Vector3f& y) const { + // Compute the histogram intersection kernel between x and y + // Here, we'll consider a simple example by using the Euclidean distance between x and y as the similarity measure. + // You can replace this with your specific similarity function. + float distance = (x - y).norm(); + return expf(-distance); // You can adjust the scaling or use a different similarity function here. + } + + __host__ __device__ NormalDistribution operator()(const Eigen::Vector3f& x) const { + const float exp_factor = *thrust::raw_pointer_cast(exp_factor_ptr); + const float max_dist = *thrust::raw_pointer_cast(max_dist_ptr); + const float max_dist_sq = max_dist * max_dist; + const Eigen::Vector3f* points = thrust::raw_pointer_cast(points_ptr); + + NormalDistribution dist = NormalDistribution::zero(); + for (int i = 0; i < BLOCK_SIZE; i++) { + float sq_d = (x - points[i]).squaredNorm(); + if (sq_d > max_dist_sq) { + continue; + } + + float w = histogram_intersection_kernel(x, points[i]); + dist.accumulate(w, points[i]); + } + + return dist; + } + + thrust::device_ptr exp_factor_ptr; + thrust::device_ptr max_dist_ptr; + thrust::device_ptr points_ptr; +}; + + +struct finalization_kernel { + finalization_kernel(const int stride, const thrust::device_vector& accumulated_dists) + : stride(stride), + accumulated_dists_first(accumulated_dists.data()), + accumulated_dists_last(accumulated_dists.data() + accumulated_dists.size()) {} + + __host__ __device__ Eigen::Matrix3f operator()(int index) const { + const NormalDistribution* dists = thrust::raw_pointer_cast(accumulated_dists_first); + const NormalDistribution* dists_last = thrust::raw_pointer_cast(accumulated_dists_last); + const int num_dists = dists_last - dists; + + NormalDistribution sum = dists[index]; + for (int dist_index = index + stride; dist_index < num_dists; dist_index += stride) { + sum += dists[dist_index]; + } + + return sum.finalize().cov; + } + + const int stride; + thrust::device_ptr accumulated_dists_first; + thrust::device_ptr accumulated_dists_last; +}; + + +void covariance_estimation_histogram_intersection( + const thrust::device_vector& points, + double kernel_width, + double max_dist, + thrust::device_vector& covariances) { + covariances.resize(points.size()); + + thrust::device_vector constants(2); + constants[0] = kernel_width; + constants[1] = max_dist; + thrust::device_ptr exp_factors = constants.data(); + thrust::device_ptr max_dists = constants.data() + 1; + + int num_blocks = (points.size() + (covariance_estimation_kernel::BLOCK_SIZE - 1)) / covariance_estimation_kernel::BLOCK_SIZE; + // padding + thrust::device_vector ext_points(num_blocks * covariance_estimation_kernel::BLOCK_SIZE); + thrust::copy(points.begin(), points.end(), ext_points.begin()); + thrust::fill(ext_points.begin() + points.size(), ext_points.end(), Eigen::Vector3f(0.0f, 0.0f, 0.0f)); + + thrust::device_vector accumulated_dists(points.size() * num_blocks); + + thrust::system::cuda::detail::unique_stream stream; + std::vector events(num_blocks); + + // accumulate kerneled point distributions + for (int i = 0; i < num_blocks; i++) { + thrust::device_ptr block_begin = ext_points.data() + covariance_estimation_kernel::BLOCK_SIZE * i; + thrust::device_ptr block_end = block_begin + thrust::min(covariance_estimation_kernel::BLOCK_SIZE, static_cast(points.size()) - covariance_estimation_kernel::BLOCK_SIZE * i); + + covariance_estimation_kernel kernel(exp_factors + i, max_dists + i, block_begin); + auto event = thrust::async::transform(points.begin(), points.end(), accumulated_dists.begin() + points.size() * i, kernel); + events[i] = std::move(event); + thrust::system::cuda::detail::create_dependency(stream, events[i]); + } + + // finalize distributions + thrust::transform( + thrust::cuda::par.on(stream.native_handle()), + thrust::counting_iterator(0), + thrust::counting_iterator(points.size()), + covariances.begin(), + finalization_kernel(points.size(), accumulated_dists)); +} + +} // namespace cuda +} // namespace fast_gicp \ No newline at end of file diff --git a/src/fast_gicp/cuda/covariance_estimation_laplacian.cu b/src/fast_gicp/cuda/covariance_estimation_laplacian.cu new file mode 100644 index 0000000..80149fe --- /dev/null +++ b/src/fast_gicp/cuda/covariance_estimation_laplacian.cu @@ -0,0 +1,154 @@ +#include + +#include + +#include +#include + +namespace fast_gicp { +namespace cuda { + +struct NormalDistribution { +public: + EIGEN_MAKE_ALIGNED_OPERATOR_NEW + + __host__ __device__ NormalDistribution() {} + + static __host__ __device__ NormalDistribution zero() { + NormalDistribution dist; + dist.sum_weights = 0.0f; + dist.mean.setZero(); + dist.cov.setZero(); + return dist; + } + + __host__ __device__ NormalDistribution operator+(const NormalDistribution& rhs) const { + NormalDistribution sum; + sum.sum_weights = sum_weights + rhs.sum_weights; + sum.mean = mean + rhs.mean; + sum.cov = cov + rhs.cov; + return sum; + } + + __host__ __device__ NormalDistribution& operator+=(const NormalDistribution& rhs) { + sum_weights += rhs.sum_weights; + mean += rhs.mean; + cov += rhs.cov; + return *this; + } + + __host__ __device__ void accumulate(const float w, const Eigen::Vector3f& x) { + sum_weights += w; + mean += w * x; + cov += w * x * x.transpose(); + } + + __host__ __device__ NormalDistribution& finalize() { + Eigen::Vector3f sum_pt = mean; + mean /= sum_weights; + cov = (cov - mean * sum_pt.transpose()) / sum_weights; + + return *this; + } + + float sum_weights; + Eigen::Vector3f mean; + Eigen::Matrix3f cov; +}; + +struct covariance_estimation_kernel_laplacian { + static const int BLOCK_SIZE = 512; + + covariance_estimation_kernel_laplacian(thrust::device_ptr exp_factor_ptr, thrust::device_ptr max_dist_ptr, thrust::device_ptr points_ptr) + : exp_factor_ptr(exp_factor_ptr), + max_dist_ptr(max_dist_ptr), + points_ptr(points_ptr) {} + + __host__ __device__ NormalDistribution operator()(const Eigen::Vector3f& x) const { + const float exp_factor = *thrust::raw_pointer_cast(exp_factor_ptr); + const float max_dist = *thrust::raw_pointer_cast(max_dist_ptr); + const float max_dist_sq = max_dist * max_dist; + const Eigen::Vector3f* points = thrust::raw_pointer_cast(points_ptr); + + NormalDistribution dist = NormalDistribution::zero(); + for (int i = 0; i < BLOCK_SIZE; i++) { + float sq_d = (x - points[i]).squaredNorm(); + if (sq_d > max_dist_sq) { + continue; + } + + float r = sqrt(sq_d); + float w = expf(-r / exp_factor); + dist.accumulate(w, points[i]); + } + + return dist; + } + + thrust::device_ptr exp_factor_ptr; + thrust::device_ptr max_dist_ptr; + thrust::device_ptr points_ptr; +}; +struct finalization_kernel { + finalization_kernel(const int stride, const thrust::device_vector& accumulated_dists) + : stride(stride), + accumulated_dists_first(accumulated_dists.data()), + accumulated_dists_last(accumulated_dists.data() + accumulated_dists.size()) {} + + __host__ __device__ Eigen::Matrix3f operator()(int index) const { + const NormalDistribution* dists = thrust::raw_pointer_cast(accumulated_dists_first); + const NormalDistribution* dists_last = thrust::raw_pointer_cast(accumulated_dists_last); + const int num_dists = dists_last - dists; + + NormalDistribution sum = dists[index]; + for (int dist_index = index + stride; dist_index < num_dists; dist_index += stride) { + sum += dists[dist_index]; + } + + return sum.finalize().cov; + } + + const int stride; + thrust::device_ptr accumulated_dists_first; + thrust::device_ptr accumulated_dists_last; +}; + +void covariance_estimation_laplacian(const thrust::device_vector& points, double kernel_width, double max_dist, thrust::device_vector& covariances) { + covariances.resize(points.size()); + + thrust::device_vector constants(2); + constants[0] = kernel_width; + constants[1] = max_dist; + thrust::device_ptr exp_factor_ptr = constants.data(); + thrust::device_ptr max_dist_ptr = constants.data() + 1; + + int num_blocks = (points.size() + (covariance_estimation_kernel_laplacian::BLOCK_SIZE - 1)) / covariance_estimation_kernel_laplacian::BLOCK_SIZE; + // padding + thrust::device_vector ext_points(num_blocks * covariance_estimation_kernel_laplacian::BLOCK_SIZE); + thrust::copy(points.begin(), points.end(), ext_points.begin()); + thrust::fill(ext_points.begin() + points.size(), ext_points.end(), Eigen::Vector3f(0.0f, 0.0f, 0.0f)); + + thrust::device_vector accumulated_dists(points.size() * num_blocks); + + thrust::system::cuda::detail::unique_stream stream; + std::vector events(num_blocks); + + // accumulate kerneled point distributions + for (int i = 0; i < num_blocks; i++) { + covariance_estimation_kernel_laplacian kernel(exp_factor_ptr, max_dist_ptr, ext_points.data() + covariance_estimation_kernel_laplacian::BLOCK_SIZE * i); + auto event = thrust::async::transform(points.begin(), points.end(), accumulated_dists.begin() + points.size() * i, kernel); + events[i] = std::move(event); + thrust::system::cuda::detail::create_dependency(stream, events[i]); + } + + // finalize distributions + thrust::transform( + thrust::cuda::par.on(stream.native_handle()), + thrust::counting_iterator(0), + thrust::counting_iterator(points.size()), + covariances.begin(), + finalization_kernel(points.size(), accumulated_dists)); +} + +} // namespace cuda +} // namespace fast_gicp \ No newline at end of file diff --git a/src/fast_gicp/cuda/covariance_estimation_polynomial.cu b/src/fast_gicp/cuda/covariance_estimation_polynomial.cu new file mode 100644 index 0000000..f3c8c9f --- /dev/null +++ b/src/fast_gicp/cuda/covariance_estimation_polynomial.cu @@ -0,0 +1,167 @@ +#include + +#include + +#include +#include + +namespace fast_gicp { +namespace cuda { + +struct NormalDistribution { +public: + EIGEN_MAKE_ALIGNED_OPERATOR_NEW + + __host__ __device__ NormalDistribution() {} + + static __host__ __device__ NormalDistribution zero() { + NormalDistribution dist; + dist.sum_weights = 0.0f; + dist.mean.setZero(); + dist.cov.setZero(); + return dist; + } + + __host__ __device__ NormalDistribution operator+(const NormalDistribution& rhs) const { + NormalDistribution sum; + sum.sum_weights = sum_weights + rhs.sum_weights; + sum.mean = mean + rhs.mean; + sum.cov = cov + rhs.cov; + return sum; + } + + __host__ __device__ NormalDistribution& operator+=(const NormalDistribution& rhs) { + sum_weights += rhs.sum_weights; + mean += rhs.mean; + cov += rhs.cov; + return *this; + } + + __host__ __device__ void accumulate(const float w, const Eigen::Vector3f& x) { + sum_weights += w; + mean += w * x; + cov += w * x * x.transpose(); + } + + __host__ __device__ NormalDistribution& finalize() { + Eigen::Vector3f sum_pt = mean; + mean /= sum_weights; + cov = (cov - mean * sum_pt.transpose()) / sum_weights; + + return *this; + } + + float sum_weights; + Eigen::Vector3f mean; + Eigen::Matrix3f cov; +}; + +struct covariance_estimation_kernel { + static const int BLOCK_SIZE = 512; + + covariance_estimation_kernel(thrust::device_ptr exp_factor_ptr, thrust::device_ptr max_dist_ptr, thrust::device_ptr points_ptr) + : exp_factor_ptr(exp_factor_ptr), + max_dist_ptr(max_dist_ptr), + points_ptr(points_ptr) {} + + __host__ __device__ NormalDistribution operator()(const Eigen::Vector3f& x) const { + const float exp_factor = *thrust::raw_pointer_cast(exp_factor_ptr); + const float max_dist = *thrust::raw_pointer_cast(max_dist_ptr); + const float max_dist_sq = max_dist * max_dist; + const Eigen::Vector3f* points = thrust::raw_pointer_cast(points_ptr); + + NormalDistribution dist = NormalDistribution::zero(); + for (int i = 0; i < BLOCK_SIZE; i++) { + float sq_d = (x - points[i]).squaredNorm(); + if (sq_d > max_dist_sq) { + continue; + } + + float w = expf(-exp_factor * sq_d); + dist.accumulate(w, points[i]); + } + + return dist; + } + + thrust::device_ptr exp_factor_ptr; + thrust::device_ptr max_dist_ptr; + thrust::device_ptr points_ptr; +}; + +struct finalization_kernel_polynomial { + finalization_kernel_polynomial(const int stride, const thrust::device_vector& accumulated_dists) + : stride(stride), + accumulated_dists_first(accumulated_dists.data()), + accumulated_dists_last(accumulated_dists.data() + accumulated_dists.size()) {} + + __host__ __device__ Eigen::Matrix3f operator()(int index) const { + const double* dists = thrust::raw_pointer_cast(accumulated_dists_first); + const double* dists_last = thrust::raw_pointer_cast(accumulated_dists_last); + const int num_dists = dists_last - dists; + + double sum = dists[index]; + for (int dist_index = index + stride; dist_index < num_dists; dist_index += stride) { + sum += dists[dist_index]; + } + + return sum * Eigen::Matrix3f::Identity(); // Since we used a polynomial kernel, we only accumulate the distances. + } + + const int stride; + thrust::device_ptr accumulated_dists_first; + thrust::device_ptr accumulated_dists_last; +}; + +// 수정된 다항식 커널 함수 +struct PolynomialKernel { + double alpha; // 커널 계수 + double constant; // 상수항 + int degree; // 다항식의 차수 + + __host__ __device__ + PolynomialKernel(double alpha, double constant, int degree) : alpha(alpha), constant(constant), degree(degree) {} + + // 다항식 커널 계산 + __host__ __device__ + double operator()(const Eigen::Vector3f& x, const Eigen::Vector3f& y) const { + return pow((alpha * x.dot(y) + constant), degree); + } +}; + +// covariance_estimation_polynomial 함수 수정 +void covariance_estimation_polynomial(const thrust::device_vector& points, double alpha, double constant, int degree, thrust::device_vector& covariances) { + covariances.resize(points.size()); + + int num_blocks = (points.size() + (covariance_estimation_kernel::BLOCK_SIZE - 1)) / covariance_estimation_kernel::BLOCK_SIZE; + // padding + thrust::device_vector ext_points(num_blocks * covariance_estimation_kernel::BLOCK_SIZE); + thrust::copy(points.begin(), points.end(), ext_points.begin()); + thrust::fill(ext_points.begin() + points.size(), ext_points.end(), Eigen::Vector3f(0.0f, 0.0f, 0.0f)); + + thrust::device_vector accumulated_dists(points.size() * num_blocks); + + thrust::system::cuda::detail::unique_stream stream; + std::vector events(num_blocks); + + // accumulate polynomial kerneled point distributions + for (int i = 0; i < num_blocks; i++) { + thrust::device_ptr block_begin = ext_points.data() + covariance_estimation_kernel::BLOCK_SIZE * i; + thrust::device_ptr block_end = block_begin + thrust::min(covariance_estimation_kernel::BLOCK_SIZE, static_cast(points.size()) - covariance_estimation_kernel::BLOCK_SIZE * i); + + // Apply the polynomial kernel to the data and accumulate the results + thrust::transform(block_begin, block_end, points.begin(), accumulated_dists.begin() + points.size() * i, PolynomialKernel(alpha, constant, degree)); + } + + // finalize distributions + thrust::transform( + thrust::cuda::par.on(stream.native_handle()), + thrust::counting_iterator(0), + thrust::counting_iterator(points.size()), + covariances.begin(), + finalization_kernel_polynomial(points.size(), accumulated_dists)); +} + + +} // namespace cuda +} // namespace fast_gicp \ No newline at end of file