diff --git a/catch/multiproc/hipMemCoherencyTstMProc.cc b/catch/multiproc/hipMemCoherencyTstMProc.cc index 576f8d8bc..6098cf9e4 100644 --- a/catch/multiproc/hipMemCoherencyTstMProc.cc +++ b/catch/multiproc/hipMemCoherencyTstMProc.cc @@ -81,13 +81,16 @@ static void TstCoherency(int *Ptr, bool HmmMem) { // Getting gpu frequency if (IsGfx11()) { - HIPCHECK(hipDeviceGetAttribute(&peak_clk, hipDeviceAttributeWallClockRate, 0)); + HIPCHECK(hipDeviceGetAttribute(&peak_clk, + hipDeviceAttributeWallClockRate, 0)); } else { - HIPCHECK(hipDeviceGetAttribute(&peak_clk, hipDeviceAttributeClockRate, 0)); + HIPCHECK(hipDeviceGetAttribute(&peak_clk, + hipDeviceAttributeClockRate, 0)); } if (!HmmMem) { - HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&Dptr), Ptr, 0)); + HIP_CHECK(hipHostGetDevicePointer(reinterpret_cast(&Dptr), + Ptr, 0)); if (IsGfx11()) { CoherentTst_gfx11<<<1, 1, 0, strm>>>(Dptr, peak_clk); } else { @@ -122,58 +125,13 @@ static void TstCoherency(int *Ptr, bool HmmMem) { // The following test is failing on Nvidia platform hence disabled it for now #if HT_AMD TEST_CASE("Unit_malloc_CoherentTst") { - if ((setenv("HSA_XNACK", "1", 1)) != 0) { - WARN("Unable to turn on HSA_XNACK, hence terminating the Test case!"); - REQUIRE(false); - } - // The following code block is used to check for gfx906/8 so as to skip if - // any of the gpus available - int fd1[2]; // Used to store two ends of first pipe - pid_t p; - if (pipe(fd1) == -1) { - fprintf(stderr, "Pipe Failed"); - REQUIRE(false); - } - - /* GpuId[0] for gfx90a exists--> 1 for yes and 0 for no*/ - int GpuId[1] = {0}; - p = fork(); - - if (p < 0) { - fprintf(stderr, "fork Failed"); - REQUIRE(false); - } else if (p > 0) { // parent process - close(fd1[1]); // Close writing end of first pipe - // Wait for child to send a string - wait(NULL); - // Read string from child and close reading end. - read(fd1[0], GpuId, 2 * sizeof(int)); - close(fd1[0]); - if (GpuId[0] == 0) { - WARN("This test is applicable for MI200." - "Skipping the test!!"); - exit(0); - } - } else { // child process - close(fd1[0]); // Close read end of first pipe - hipDeviceProp_t prop; - HIPCHECK(hipGetDeviceProperties(&prop, 0)); - char *p = NULL; - - if (CheckIfFeatSupported(CTFeatures::CT_FEATURE_FINEGRAIN_HWSUPPORT, prop.gcnArchName)) { - WARN("gfx90a gpu found on this system!!"); - GpuId[0] = 1; - } - - // Write concatenated string and close writing end - write(fd1[1], GpuId, 2 * sizeof(int)); - close(fd1[1]); - exit(0); - } - - // Test Case execution begins from here - int stat = 0; - if (fork() == 0) { + hipDeviceProp_t prop; + HIPCHECK(hipGetDeviceProperties(&prop, 0)); + char *p = NULL; + p = strstr(prop.gcnArchName, "xnack+"); + if (p) { + // Test Case execution begins from here + int stat = 0; int managed = 0; HIPCHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, 0)); @@ -185,23 +143,10 @@ TEST_CASE("Unit_malloc_CoherentTst") { Ptr = reinterpret_cast(malloc(SIZE)); TstCoherency(Ptr, HmmMem); free(Ptr); - if (YES_COHERENT) { - // exit() with code 10 which indicates pass - exit(10); - } else { - // exit() with code 9 which indicates fail - exit(9); - } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); - } + REQUIRE(YES_COHERENT); + } } else { - wait(&stat); - int Result = WEXITSTATUS(stat); - if (Result != 10) { - REQUIRE(false); - } + HipTest::HIP_SKIP_TEST("GPU is not xnack enabled hence skipping the test...\n"); } } #endif @@ -212,55 +157,12 @@ TEST_CASE("Unit_malloc_CoherentTst") { // The following test is failing on Nvidia platform hence disabling it for now #if HT_AMD TEST_CASE("Unit_malloc_CoherentTstWthAdvise") { - if ((setenv("HSA_XNACK", "1", 1)) != 0) { - WARN("Unable to turn on HSA_XNACK, hence terminating the Test case!"); - REQUIRE(false); - } - // The following code block is used to check for gfx906/8 so as to skip if - // any of the gpus available - int fd1[2]; // Used to store two ends of first pipe - pid_t p; - if (pipe(fd1) == -1) { - fprintf(stderr, "Pipe Failed"); - REQUIRE(false); - } - - /* GpuId[0] for gfx90a exists--> 1 for yes and 0 for no */ - int GpuId[1] = {0}; - p = fork(); - - if (p < 0) { - fprintf(stderr, "fork Failed"); - REQUIRE(false); - } else if (p > 0) { // parent process - close(fd1[1]); // Close writing end of first pipe - // Wait for child to send a string - wait(NULL); - // Read string from child and close reading end. - read(fd1[0], GpuId, 2 * sizeof(int)); - close(fd1[0]); - if (GpuId[0] == 0) { - WARN("This test is applicable for MI200." - "Skipping the test!!"); - exit(0); - } - } else { // child process - close(fd1[0]); // Close read end of first pipe - hipDeviceProp_t prop; - HIPCHECK(hipGetDeviceProperties(&prop, 0)); - char *p = NULL; - p = strstr(prop.gcnArchName, "gfx90a"); - if (p) { - WARN("gfx90a gpu found on this system!!"); - GpuId[0] = 1; - } - // Write concatenated string and close writing end - write(fd1[1], GpuId, 2 * sizeof(int)); - close(fd1[1]); - exit(0); - } - int stat = 0; - if (fork() == 0) { + hipDeviceProp_t prop; + HIPCHECK(hipGetDeviceProperties(&prop, 0)); + char *p = NULL; + p = strstr(prop.gcnArchName, "xnack+"); + if (p) { + int stat = 0; int managed = 0; HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, 0)); @@ -275,25 +177,10 @@ TEST_CASE("Unit_malloc_CoherentTstWthAdvise") { SquareKrnl<<<1, 1, 0, strm>>>(Ptr); HIP_CHECK(hipStreamSynchronize(strm)); HIP_CHECK(hipStreamDestroy(strm)); - if (*Ptr == 16) { - // exit() with code 10 which indicates pass - free(Ptr); - exit(10); - } else { - // exit() with code 9 which indicates fail - free(Ptr); - exit(9); - } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); + REQUIRE (*Ptr == 16); } } else { - wait(&stat); - int Result = WEXITSTATUS(stat); - if (Result != 10) { - REQUIRE(false); - } + HipTest::HIP_SKIP_TEST("GPU is not xnack enabled hence skipping the test...\n"); } } #endif @@ -303,55 +190,12 @@ TEST_CASE("Unit_malloc_CoherentTstWthAdvise") { // The following test is failing on Nvidia platform hence disabling it for now #if HT_AMD TEST_CASE("Unit_mmap_CoherentTst") { - if ((setenv("HSA_XNACK", "1", 1)) != 0) { - WARN("Unable to turn on HSA_XNACK, hence terminating the Test case!"); - REQUIRE(false); - } - // The following code block is used to check for gfx906/8 so as to skip if - // any of the gpus available - int fd1[2]; // Used to store two ends of first pipe - pid_t p; - if (pipe(fd1) == -1) { - fprintf(stderr, "Pipe Failed"); - REQUIRE(false); - } - - /* GpuId[0] for gfx90a exists--> 1 for yes and 0 for no */ - int GpuId[1] = {0}; - p = fork(); - - if (p < 0) { - fprintf(stderr, "fork Failed"); - REQUIRE(false); - } else if (p > 0) { // parent process - close(fd1[1]); // Close writing end of first pipe - // Wait for child to send a string - wait(NULL); - // Read string from child and close reading end. - read(fd1[0], GpuId, 2 * sizeof(int)); - close(fd1[0]); - if (GpuId[0] == 0) { - WARN("This test is not applicable for MI200." - "Skipping the test!!"); - exit(0); - } - } else { // child process - close(fd1[0]); // Close read end of first pipe - hipDeviceProp_t prop; - HIPCHECK(hipGetDeviceProperties(&prop, 0)); - char *p = NULL; - p = strstr(prop.gcnArchName, "gfx90a"); - if (p) { - WARN("gfx90a gpu found on this system!!"); - GpuId[0] = 1; - } - // Write concatenated string and close writing end - write(fd1[1], GpuId, 2 * sizeof(int)); - close(fd1[1]); - exit(0); - } - int stat = 0; - if (fork() == 0) { + hipDeviceProp_t prop; + HIPCHECK(hipGetDeviceProperties(&prop, 0)); + char *p = NULL; + p = strstr(prop.gcnArchName, "xnack+"); + if (p) { + int stat = 0; int managed = 0; HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, 0)); @@ -371,21 +215,10 @@ TEST_CASE("Unit_mmap_CoherentTst") { if (err != 0) { WARN("munmap failed\n"); } - if (YES_COHERENT) { - exit(10); - } else { - exit(9); - } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); - } + REQUIRE(YES_COHERENT); + } } else { - wait(&stat); - int Result = WEXITSTATUS(stat); - if (Result != 10) { - REQUIRE(false); - } + HipTest::HIP_SKIP_TEST("GPU is not xnack enabled hence skipping the test...\n"); } } #endif @@ -395,55 +228,12 @@ TEST_CASE("Unit_mmap_CoherentTst") { // The following test is failing on Nvidia platform hence disabling it for now #if HT_AMD TEST_CASE("Unit_mmap_CoherentTstWthAdvise") { - if ((setenv("HSA_XNACK", "1", 1)) != 0) { - WARN("Unable to turn on HSA_XNACK, hence terminating the Test case!"); - REQUIRE(false); - } - // The following code block is used to check for gfx906/8 so as to skip if - // any of the gpus available - int fd1[2]; // Used to store two ends of first pipe - pid_t p; - if (pipe(fd1) == -1) { - fprintf(stderr, "Pipe Failed"); - REQUIRE(false); - } - - /* GpuId[0] for gfx90a exists--> 1 for yes and 0 for no */ - int GpuId[1] = {0}; - p = fork(); - - if (p < 0) { - fprintf(stderr, "fork Failed"); - REQUIRE(false); - } else if (p > 0) { // parent process - close(fd1[1]); // Close writing end of first pipe - // Wait for child to send a string - wait(NULL); - // Read string from child and close reading end. - read(fd1[0], GpuId, 2 * sizeof(int)); - close(fd1[0]); - if (GpuId[0] == 0) { - WARN("This test is applicable for MI200." - "Skipping the test!!"); - exit(0); - } - } else { // child process - close(fd1[0]); // Close read end of first pipe - hipDeviceProp_t prop; - HIPCHECK(hipGetDeviceProperties(&prop, 0)); - char *p = NULL; - p = strstr(prop.gcnArchName, "gfx90a"); - if (p) { - WARN("gfx90a gpu found on this system!!"); - GpuId[0] = 1; - } - // Write concatenated string and close writing end - write(fd1[1], GpuId, 2 * sizeof(int)); - close(fd1[1]); - exit(0); - } - int stat = 0; - if (fork() == 0) { + hipDeviceProp_t prop; + HIPCHECK(hipGetDeviceProperties(&prop, 0)); + char *p = NULL; + p = strstr(prop.gcnArchName, "xnack+"); + if (p) { + int stat = 0; int managed = 0; HIP_CHECK(hipDeviceGetAttribute(&managed, hipDeviceAttributeManagedMemory, 0)); @@ -466,26 +256,15 @@ TEST_CASE("Unit_mmap_CoherentTstWthAdvise") { bool IfTstPassed = false; if (*Ptr == 81) { IfTstPassed = true; - } + } int err = munmap(Ptr, SIZE); if (err != 0) { WARN("munmap failed\n"); } - if (IfTstPassed) { - exit(10); - } else { - exit(9); - } - } else { - SUCCEED("GPU 0 doesn't support hipDeviceAttributeManagedMemory " - "attribute. Hence skipping the testing with Pass result.\n"); - } + REQUIRE(IfTstPassed); + } } else { - wait(&stat); - int Result = WEXITSTATUS(stat); - if (Result != 10) { - REQUIRE(false); - } + HipTest::HIP_SKIP_TEST("GPU is not xnack enabled hence skipping the test...\n"); } } #endif diff --git a/catch/unit/memory/CMakeLists.txt b/catch/unit/memory/CMakeLists.txt index f09f0579f..c5ca3c4c9 100644 --- a/catch/unit/memory/CMakeLists.txt +++ b/catch/unit/memory/CMakeLists.txt @@ -108,7 +108,8 @@ set(TEST_SRC hipMemRangeGetAttributes.cc hipStreamAttachMemAsync.cc hipMemRangeGetAttributes_old.cc - hipMemGetAddressRange.cc) + hipMemGetAddressRange.cc + hipHmmOvrSubscriptionTst.cc) if(HIP_PLATFORM MATCHES "amd") set(TEST_SRC diff --git a/catch/unit/memory/hipHmmOvrSubscriptionTst.cc b/catch/unit/memory/hipHmmOvrSubscriptionTst.cc new file mode 100644 index 000000000..80a8e0d84 --- /dev/null +++ b/catch/unit/memory/hipHmmOvrSubscriptionTst.cc @@ -0,0 +1,104 @@ +/* +Copyright (c) 2021-Present Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +/* Test Case Description: This test case tests the working of OverSubscription + feature which is part of HMM.*/ + +#include + +#define INIT_VAL 2.5 +#define NUM_ELMS 268435456 // 268435456 * 4 = 1GB +#define ITERATIONS 10 +#define ONE_GB 1024 * 1024 * 1024 + +// Kernel function +__global__ void Square(int n, float *x) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (int i = index; i < n; i += stride) { + x[i] = x[i] + 10; + } +} + +static void OneGBMemTest(int dev) { + int DataMismatch = 0; + float *HmmAG = nullptr; + hipStream_t strm; + HIP_CHECK(hipStreamCreate(&strm)); + // Testing hipMemAttachGlobal Flag + HIP_CHECK(hipMallocManaged(&HmmAG, NUM_ELMS * sizeof(float), + hipMemAttachGlobal)); + + // Initializing HmmAG memory + for (int i = 0; i < NUM_ELMS; i++) { + HmmAG[i] = INIT_VAL; + } + + int blockSize = 256; + int numBlocks = (NUM_ELMS + blockSize - 1) / blockSize; + dim3 dimGrid(numBlocks, 1, 1); + dim3 dimBlock(blockSize, 1, 1); + HIP_CHECK(hipSetDevice(dev)); + for (int i = 0; i < ITERATIONS; ++i) { + Square<<>>(NUM_ELMS, HmmAG); + } + HIP_CHECK(hipStreamSynchronize(strm)); + for (int j = 0; j < NUM_ELMS; ++j) { + if (HmmAG[j] != (INIT_VAL + ITERATIONS * 10)) { + DataMismatch++; + break; + } + } + if (DataMismatch != 0) { + WARN("Data Mismatch observed when kernel launched on device: " << dev); + REQUIRE(false); + } + HIP_CHECK(hipFree(HmmAG)); + HIP_CHECK(hipStreamDestroy(strm)); +} + +TEST_CASE("Unit_HMM_OverSubscriptionTst") { + // Checking if xnack is enabled + hipDeviceProp_t prop; + HIP_CHECK(hipGetDeviceProperties(&prop, 0)); + char *p = NULL; + p = strstr(prop.gcnArchName, "xnack+"); + if (p) { + size_t FreeMem, TotGpuMem; + HIP_CHECK(hipMemGetInfo(&FreeMem, &TotGpuMem)); + int NumGB = (TotGpuMem/(ONE_GB)); + int TotalThreads = (NumGB + 10); + WARN("Launching " << TotalThreads); + WARN(" processes to test OverSubscription."); + + std::thread Thrds[TotalThreads]; + + for (int k = 0; k < TotalThreads; ++k) { + Thrds[k] = std::thread(OneGBMemTest, 0); + } + for (int k = 0; k < TotalThreads; ++k) { + Thrds[k].join(); + } + } else { + HipTest::HIP_SKIP_TEST("GPU is not xnack enabled hence skipping the test...\n"); + } +}