Skip to content

Commit

Permalink
use get_num_multiprocessor
Browse files Browse the repository at this point in the history
  • Loading branch information
yhmtsai authored and pratikvn committed Dec 12, 2023
1 parent ff2c436 commit b550ed1
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions cuda/solver/async_jacobi_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,10 @@ void apply(std::shared_ptr<const DefaultExecutor> exec,
#if USE_DYNAMIC
int oscb = DYNAMIC_OSCB;
constexpr int subwarp_size = SUBWARP_SIZE;
int v100 = 80 * oscb; // V100 contains 80 SM
auto num_subwarp = v100 * default_block_size / subwarp_size;
int gridx = v100;
int num_blocks =
exec->get_num_multiprocessor() * oscb; // V100 contains 80 SM
auto num_subwarp = num_blocks * default_block_size / subwarp_size;
int gridx = num_blocks;
if (num_subwarp > a->get_size()[0]) {
gridx = a->get_size()[0] * subwarp_size / default_block_size;
}
Expand Down

0 comments on commit b550ed1

Please sign in to comment.