Skip to content

Commit

Permalink
implement mpi-distribution of observe expected value computations
Browse files Browse the repository at this point in the history
Signed-off-by: Alex McCaskey <amccaskey@nvidia.com>
  • Loading branch information
amccaskey committed Jun 10, 2023
1 parent 7c4f762 commit 2a53df6
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 10 deletions.
9 changes: 9 additions & 0 deletions runtime/cudaq.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,17 @@ void initialize();
/// against MPI. Takes program arguments as input.
void initialize(int argc, char **argv);

/// @brief Return the rank of the calling process.
int rank();

/// @brief Return the number of MPI ranks.
int num_ranks();

/// @brief Return true if MPI is already initialized, false otherwise.
bool is_initialized();

double allreduce_double_add(double localValue);

/// @brief Finalize MPI. This function
/// is a no-op if there CUDA Quantum has not been built
/// against MPI.
Expand Down
101 changes: 91 additions & 10 deletions runtime/cudaq/algorithms/observe.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,28 @@

namespace cudaq {

namespace mpi {
int rank();
int num_ranks();
bool is_initialized();
double allreduce_double_add(double localValue);
} // namespace mpi

/// @brief Return type for asynchronous observation.
using async_observe_result = async_result<observe_result>;

/// @brief Multi-GPU Multi-Node (MPI)
/// Distribution Type for observe
struct mgmn {};

/// @brief Multi-GPU Single-Node
/// Distribution Type for observe
struct mgsn {};

/// @brief Multi-Node, no GPU,
/// Distribution Type for observe
struct mn {};

/// @brief Define a combined sample function validation concept.
/// These concepts provide much better error messages than old-school SFINAE
template <typename QuantumKernel, typename... Args>
Expand Down Expand Up @@ -176,25 +195,87 @@ observe_result observe(QuantumKernel &&kernel, spin_op H, Args &&...args) {
// Run this SHOTS times
auto &platform = cudaq::get_platform();
auto shots = platform.get_shots().value_or(-1);
auto kernelName = cudaq::getKernelName(kernel);
return details::runObservation(
[&kernel, ... args = std::forward<Args>(args)]() mutable {
kernel(args...);
},
H, platform, shots, kernelName)
.value();
}

// Does this platform expose more than 1 QPU
// If so, let's distribute the work among the QPUs
if (auto nQpus = platform.num_qpus(); nQpus > 1)
/// @brief Compute the expected value of `H` with respect to `kernel(Args...)`.
/// Distribute the work amongst available QPUs on the platform in parallel. This
/// distribution can occur on multi-gpu multi-node platforms, multi-gpu
/// single-node platforms, or multi-node no-gpu platforms. Programmers must
/// indicate the distribution type via the corresponding template types
/// (cudaq::mgmn, cudaq::mgsn, cudaq::mn).
template <typename DistributionType, typename QuantumKernel, typename... Args>
requires ObserveCallValid<QuantumKernel, Args...>
observe_result observe(std::size_t shots, QuantumKernel &&kernel, spin_op H,
Args &&...args) {
// Run this SHOTS times
auto &platform = cudaq::get_platform();
auto nQpus = platform.num_qpus();
if constexpr (std::is_same_v<DistributionType, mgsn>) {
if (nQpus == 1)
printf(
"[cudaq::observe warning] distributed observe requested but only 1 "
"QPU available. no speedup expected.\n");
// Let's distribute the work among the QPUs on this node
return details::distributeComputations(
[&kernel, ... args = std::forward<Args>(args)](std::size_t i,
spin_op &op) mutable {
return observe_async(i, std::forward<QuantumKernel>(kernel), op,
std::forward<Args>(args)...);
},
H, nQpus);
} else if (std::is_same_v<DistributionType, mgmn>) {

auto kernelName = cudaq::getKernelName(kernel);
return details::runObservation(
[&kernel, ... args = std::forward<Args>(args)]() mutable {
kernel(args...);
},
H, platform, shots, kernelName)
.value();
// This is an MPI distribution, where each node has N GPUs.
if (!mpi::is_initialized())
throw std::runtime_error(
"Cannot use mgmn or mn multi-node observe() without MPI.");

// Note - For MGMN, we assume that nQpus == num visible GPUs for this local
// rank.

// Get the rank and the number of ranks
auto rank = mpi::rank();
auto nRanks = mpi::num_ranks();

// Each rank gets a subset of the spin terms
auto spins = H.distribute_terms(nRanks);

// Get this rank's set of spins to compute
auto localH = spins[rank];

// Distribute locally, i.e. to the local nodes QPUs
auto localRankResult = details::distributeComputations(
[&kernel, ... args = std::forward<Args>(args)](std::size_t i,
spin_op &op) mutable {
return observe_async(i, std::forward<QuantumKernel>(kernel), op,
std::forward<Args>(args)...);
},
localH, nQpus);

// combine all the data via an all_reduce
auto exp_val = localRankResult.exp_val_z();
auto globalExpVal = mpi::allreduce_double_add(exp_val);
return observe_result(globalExpVal, H);

} else {
throw std::runtime_error("Not implemented.");
}
}

template <typename DistributionType, typename QuantumKernel, typename... Args>
requires ObserveCallValid<QuantumKernel, Args...>
observe_result observe(QuantumKernel &&kernel, spin_op H, Args &&...args) {
auto &platform = cudaq::get_platform();
auto shots = platform.get_shots().value_or(-1);
return observe<DistributionType>(shots, std::forward<QuantumKernel>(kernel),
H, std::forward<Args>(args)...);
}

/// \brief Compute the expected value of `H` with respect to `kernel(Args...)`.
Expand Down
19 changes: 19 additions & 0 deletions runtime/cudaq/cudaq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,28 @@ void initialize(int argc, char **argv) {
if (pid == 0)
cudaq::info("MPI Enabled, nRanks = {}", np);
}
int rank() {
int pid;
MPI_Comm_rank(MPI_COMM_WORLD, &pid);
return pid;
}
int num_ranks() {
int np;
MPI_Comm_size(MPI_COMM_WORLD, &np);
return np;
}
bool is_initialized() {
int i;
MPI_Initialized(&i);
return i == 1;
}

double allreduce_double_add(double localValue) {
double result;
MPI_Allreduce(&localValue, &result, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
return result;
}

void finalize() {
int mpi_error = MPI_Finalize();
assert(mpi_error == MPI_SUCCESS);
Expand All @@ -53,6 +69,9 @@ namespace cudaq::mpi {
void initialize() {}
void initialize(int argc, char **argv) {}
bool is_initialized() { return false; }
int rank() { return 0; }
int num_ranks() { return 1; }
double allreduce_double_add(double value) { return 0.0; }
void finalize() {}
} // namespace cudaq::mpi
#endif
Expand Down

0 comments on commit 2a53df6

Please sign in to comment.