diff --git a/.clang-format b/.clang-format index 335a74a..dd3a3c5 100644 --- a/.clang-format +++ b/.clang-format @@ -42,7 +42,7 @@ BreakBeforeBinaryOperators: None BreakBeforeTernaryOperators: true BreakConstructorInitializers: BeforeColon BreakInheritanceList: BeforeColon -ColumnLimit: 80 +ColumnLimit: 120 CompactNamespaces: false ContinuationIndentWidth: 4 Cpp11BracedListStyle: true @@ -52,7 +52,7 @@ FixNamespaceComments: true IncludeBlocks: Preserve IndentCaseLabels: true IndentPPDirectives: None -IndentWidth: 2 +IndentWidth: 4 KeepEmptyLinesAtTheStartOfBlocks: true MaxEmptyLinesToKeep: 1 NamespaceIndentation: None diff --git a/.gitmodules b/.gitmodules index 2bb4aed..e07fd62 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,9 @@ [submodule "externals/mdspan"] path = externals/mdspan url = https://github.com/kokkos/mdspan +[submodule "externals/magic_enum"] + path = externals/magic_enum + url = https://github.com/mhaseeb123/magic_enum [submodule "externals/argparse"] path = externals/argparse url = https://github.com/mhaseeb123/argparse diff --git a/CMakeLists.txt b/CMakeLists.txt index 558c8b6..2c6ee0c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -74,7 +74,7 @@ set(GCC_EXPECTED_VERSION 11.2) if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS GCC_EXPECTED_VERSION) message( FATAL_ERROR - "GCC: GCB requires GCC v${GCC_EXPECTED_VERSION} or higher to build but found v${CMAKE_CXX_COMPILER_VERSION}" + "GCC: nvstdpar requires GCC v${GCC_EXPECTED_VERSION} or higher to build but found v${CMAKE_CXX_COMPILER_VERSION}" ) endif() @@ -84,10 +84,11 @@ endif() set(CXX_STANDARD_REQUIRED ON) # required minimum CXX standard -set(CMAKE_CXX_STANDARD_REQUIRED 20) +set(CMAKE_CXX_STANDARD_REQUIRED 23) if(NOT CXX_STANDARD OR (CXX_STANDARD LESS ${CMAKE_CXX_STANDARD_REQUIRED})) set(CXX_STANDARD ${CMAKE_CXX_STANDARD_REQUIRED}) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++${CXX_STANDARD}") message(STATUS "Setting CXX_STANDARD to ${CMAKE_CXX_STANDARD_REQUIRED}") endif() diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index 03c6283..1bf106a 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -27,3 +27,9 @@ add_subdirectory(1d_stencil) # ----------------------------------------------------------------------------------------# message(STATUS "Adding choleskey example...") add_subdirectory(choleskey) + +# ----------------------------------------------------------------------------------------# +# Add fft demo +# ----------------------------------------------------------------------------------------# +message(STATUS "Adding fft...") +add_subdirectory(fft) diff --git a/apps/choleskey/choleskey_stdpar_snd.cpp b/apps/choleskey/choleskey_stdpar_snd.cpp index f7e5644..020dbc1 100644 --- a/apps/choleskey/choleskey_stdpar_snd.cpp +++ b/apps/choleskey/choleskey_stdpar_snd.cpp @@ -1,8 +1,8 @@ /* * MIT License * - * Copyright (c) 2023 Chuanqiu He - * Copyright (c) 2023 Weile Wei + * Copyright (c) 2023 Chuanqiu He + * Copyright (c) 2023 Weile Wei * Copyright (c) 2023 The Regents of the University of California, * through Lawrence Berkeley National Laboratory (subject to receipt of any * required approvals from the U.S. Dept. of Energy).All rights reserved. diff --git a/apps/comm-study/comm-study-no-senders.cpp b/apps/comm-study/comm-study-no-senders.cpp index 1377745..1550094 100644 --- a/apps/comm-study/comm-study-no-senders.cpp +++ b/apps/comm-study/comm-study-no-senders.cpp @@ -74,7 +74,7 @@ auto work(P& A, P& B, P& Y, int N) { // get sum(Y) - one last memcpy (not USM) D2H sum += - std::reduce(std::execution::par_unseq, &Y[0], &Y[N], 0.0, std::plus()); + std::transform_reduce(std::execution::par_unseq, &Y[0], &Y[N], 0.0, std::plus(), [](T &val){return val * val;}); return sum / N; } diff --git a/apps/fft/CMakeLists.txt b/apps/fft/CMakeLists.txt new file mode 100644 index 0000000..e957d46 --- /dev/null +++ b/apps/fft/CMakeLists.txt @@ -0,0 +1,40 @@ +project(fft LANGUAGES CXX) + +file(GLOB CPP_SOURCES "*.cpp") + +foreach(source_file ${CPP_SOURCES}) + if(NOT STDPAR STREQUAL "gpu") + if("${source_file}" MATCHES ".*gpu.*scheduler.*" OR "${source_file}" + MATCHES ".*cuda.*") + message(STATUS "Skipping ${source_file} as stdpar=${STDPAR}") + continue() + endif() + endif() + + # get the file name without an extension + get_filename_component(exec_name ${source_file} NAME_WE) + + # add an executable with the same name as the source file + add_executable(${exec_name} ${_EXCLUDE} ${source_file}) + + # add dependency on argparse + add_dependencies(${exec_name} argparse magic_enum) + + set_source_files_properties(${source_file} PROPERTIES LANGUAGE CXX + LINKER_LANGUAGE CXX) + target_include_directories( + ${exec_name} + PRIVATE ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_LIST_DIR}/../../include + ${ARGPARSE_INCLUDE_DIR} ${MAGICENUM_INCLUDE_DIR} ${MDSPAN_INCLUDE_DIR}) + + target_link_libraries(${exec_name} PUBLIC ${MPI_LIBS} stdexec) + + set_target_properties( + ${exec_name} + PROPERTIES CXX_STANDARD ${CXX_STANDARD} + CXX_EXTENSIONS NO + INSTALL_RPATH_USE_LINK_PATH ON) + + # installation + install(TARGETS ${exec_name} DESTINATION ${CMAKE_INSTALL_BINDIR}) +endforeach() diff --git a/apps/fft/fft-serial.cpp b/apps/fft/fft-serial.cpp new file mode 100644 index 0000000..21d66f6 --- /dev/null +++ b/apps/fft/fft-serial.cpp @@ -0,0 +1,138 @@ +/* + * MIT License + * + * Copyright (c) 2023 The Regents of the University of California, + * through Lawrence Berkeley National Laboratory (subject to receipt of any + * required approvals from the U.S. Dept. of Energy).All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * commons for the fft codes + */ + +#include "fft.hpp" + +// +// simulation +// +int main(int argc, char* argv[]) +{ + // parse params + fft_params_t args = argparse::parse(argc, argv); + + // see if help wanted + if (args.help) + { + args.print(); // prints all variables + return 0; + } + + // simulation variables + int N = args.N; + sig_type_t sig_type = args.sig; + int freq = args.freq; + bool print_sig = args.print_sig; + bool print_time = args.print_time; + + // x[n] signal + //std::vector test_sig{2,1,-1,5,0,3,0,-4}; + //N = test_sig.size(); + + Timer timer; + + sig_t x_n(N, sig_type); + + if (!isPowOf2(N)) + { + N = ceilPowOf2(N); + std::cout << "log_2(N) != integer. Padding zeros for N = " << N << std::endl; + + x_n.resize(N); + } + + sig_t y_n(x_n); + + if (print_sig) + { + std::cout << std::endl << "x[n] = "; + x_n.printSignal(); + std::cout << std::endl; + } + + // niterations + int niters = ilog2(N); + + std::function fft = [&](data_t *x, int lN, const int N) + { + int stride = N/lN; + + if (lN == 2) + { + auto x_0 = x[0] + x[1]* WNk(N, 0); + x[1] = x[0] - x[1]* WNk(N, 0); + x[0] = x_0; + return; + } + + // vectors for left and right + std::vector e(lN/2); + std::vector o(lN/2); + + // copy data into vectors + for (auto k = 0; k < lN/2; k++) + { + e[k] = x[2*k]; + o[k] = x[2*k+1]; + } + + // compute N/2 pt FFT on even + fft(e.data(), lN/2, N); + + // compute N/2 pt FFT on odd + fft(o.data(), lN/2, N); + + // combine even and odd FFTs + for (int k = 0; k < lN/2; k++) + { + x[k] = e[k] + o[k] * WNk(N, k * stride); + x[k+lN/2] = e[k] - o[k] * WNk(N, k * stride); + } + + return; + }; + + // fft radix-2 algorithm with senders + fft(y_n.data(), N, N); + + if (print_sig) + { + std::cout << "X[k] = "; + y_n.printSignal(); + std::cout << std::endl; + } + + auto elapsed = timer.stop(); + + if (print_time) + std::cout << "Elapsed Time: " << elapsed << " ms" << std::endl; + + return 0; +} diff --git a/apps/fft/fft.hpp b/apps/fft/fft.hpp new file mode 100644 index 0000000..80a7446 --- /dev/null +++ b/apps/fft/fft.hpp @@ -0,0 +1,213 @@ +/* + * MIT License + * + * Copyright (c) 2023 The Regents of the University of California, + * through Lawrence Berkeley National Laboratory (subject to receipt of any + * required approvals from the U.S. Dept. of Energy).All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * commons for the fft codes + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "argparse/argparse.hpp" +#include "commons.hpp" + +namespace ex = stdexec; +using namespace std::complex_literals; + +// data type +using Real_t = double; +using data_t = std::complex; + +enum class sig_type { square, sinusoid, sawtooth, triangle, sinc, box }; +using sig_type_t = sig_type; + +// fft radix +constexpr int radix = 2; + +// parameters +struct fft_params_t : public argparse::Args { + sig_type_t& sig = kwarg("sig", "input signal type: square, sinusoid, sawtooth, triangle, box").set_default(sig_type_t::box); + int& freq = kwarg("f,freq", "Signal frequency").set_default(1024); + int& N = kwarg("N", "N-point FFT").set_default(1024); + bool& print_sig = flag("p,print", "print x[n] and X(k)"); + +#if defined(USE_OMP) + int& nthreads = kwarg("nthreads", "number of threads").set_default(1); +#endif // USE_OMP + + bool& help = flag("h, help", "print help"); + bool& print_time = flag("t,time", "print fft time"); +}; + +inline bool isPowOf2(long long int x) { + return !(x == 0) && !(x & (x - 1)); +} + +template +void printVec(T &vec, int len) +{ + std::cout << "[ "; + for (int i = 0; i < len; i++) + std::cout << vec[i] << " "; + + std::cout << "]" << std::endl; +} + +inline std::complex WNk(int N, int k) +{ + return std::complex(exp(-2*M_PI*1/N*k*1i)); +} + +inline int ceilPowOf2(unsigned int v) +{ + return static_cast(std::bit_ceil(v)); +} + +inline int ilog2(uint32_t x) +{ + return static_cast(log2(x)); +} + +class signal +{ +public: + + signal() = default; + signal(int N) + { + if (N <= 0) + { + std::cerr << "FATAL: N must be > 0. exiting.." << std::endl; + exit(1); + } + y.reserve(ceilPowOf2(N)); + y.resize(N); + } + + signal(signal &rhs) + { + y = rhs.y; + } + signal(std::vector &in) + { + y = std::move(in); + } + + signal(int N, sig_type type) + { + if (N <= 0) + { + std::cerr << "FATAL: N must be > 0. exiting.." << std::endl; + exit(1); + } + y.reserve(ceilPowOf2(N)); + y.resize(N); + signalGenerator(type); + } + + void signalGenerator(sig_type type=sig_type::box) + { + int N = y.size(); + + switch (type) { + case sig_type::square: + for (int n = 0; n < N; ++n) + y[n] = (n < N / 4 || n > 3 * N/4) ? 1.0 : -1.0; + break; + case sig_type::sinusoid: + for (int n = 0; n < N; ++n) + y[n] = std::sin(2.0 * M_PI * n / N); + break; + case sig_type::sawtooth: + for (int n = 0; n < N; ++n) + y[n] = 2.0 * (n / N) - 1.0; + break; + case sig_type::triangle: + for (int n = 0; n < N; ++n) + y[n] = 2.0 * std::abs(2.0 * (n / N) - 1.0) - 1.0; + break; + case sig_type::sinc: + y[0] = 1.0; + for (int n = 1; n < N; ++n) + y[n] = std::sin(2.0 * M_PI * n / N) / (2.0 * M_PI * n / N); + break; + case sig_type::box: + for (int n = 0; n < N; ++n) + y[n] = (n < N / 4 || n > 3 * N / 4) ? 1.0 : 0.0; + break; + default: + std::cerr << "FATAL: Unknown signal type. exiting.." << std::endl; + exit(1); + } + } + + ~signal() + { + y.clear(); + } + + data_t *data() { return y.data(); } + int len() { return y.size(); } + + void resize(int N) + { + if (N != y.size()) + y.resize(N, 0); + } + + data_t &operator[](int n) + { + return y[n]; + } + + data_t &operator()(int n) + { + return y[n]; + } + + void printSignal() { + std::cout << std::fixed << std::setprecision(2); + + std::cout << "[ "; + for (auto &el : y) + std::cout << el << " "; + + std::cout << "]" << std::endl; + } + +private: + // y[n] + std::vector y; +}; + +using sig_t = signal; diff --git a/argparse b/argparse new file mode 160000 index 0000000..9770626 --- /dev/null +++ b/argparse @@ -0,0 +1 @@ +Subproject commit 9770626123d491bc9d27851a150da20fc47fc994 diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 8c6216c..b6e9828 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -156,6 +156,9 @@ add_subdirectory(mdspan) message(STATUS "Adding externals/argparse...") add_subdirectory(argparse) +message(STATUS "Adding externals/magic_enum...") +add_subdirectory(magic_enum) + set(MDSPAN_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/mdspan/include CACHE PATH "mdspan include directory") @@ -163,3 +166,7 @@ set(MDSPAN_INCLUDE_DIR set(ARGPARSE_INCLUDE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/argparse/include CACHE PATH "argparse include directory") + +set(MAGICENUM_INCLUDE_DIR + ${CMAKE_CURRENT_SOURCE_DIR}/magic_enum/include + CACHE PATH "magic_enum include directory") \ No newline at end of file diff --git a/externals/argparse b/externals/argparse index dee5935..9770626 160000 --- a/externals/argparse +++ b/externals/argparse @@ -1 +1 @@ -Subproject commit dee59359be9a2a023ceb59384c735b4e711cc18d +Subproject commit 9770626123d491bc9d27851a150da20fc47fc994 diff --git a/externals/magic_enum b/externals/magic_enum new file mode 160000 index 0000000..d67973d --- /dev/null +++ b/externals/magic_enum @@ -0,0 +1 @@ +Subproject commit d67973d1181ff986ba63c756b47cc854f4d51d32 diff --git a/externals/mdspan b/externals/mdspan index 124b860..f840358 160000 --- a/externals/mdspan +++ b/externals/mdspan @@ -1 +1 @@ -Subproject commit 124b860f458e5c06c9b96d7510dc35b7acdd642b +Subproject commit f84035865a92241a5163d8d0e5100aea037892ca