diff --git a/platform/windows/syspovtask.cpp b/platform/windows/syspovtask.cpp index 003f9da16..e1d47a90a 100644 --- a/platform/windows/syspovtask.cpp +++ b/platform/windows/syspovtask.cpp @@ -34,9 +34,9 @@ //****************************************************************************** #include "syspovtask.h" - +#if defined (_WIN32) || defined (_WIN64) || defined (WIN32) || defined (WIN64) #include - +#endif // this must be the last file included #include "base/povdebug.h" @@ -59,22 +59,6 @@ void WinMemThreadCleanup(); namespace vfePlatform { -// TODO - Maybe move this somewhere else as well. -static unsigned int GetNumberofCPUs (void) -{ - SYSTEM_INFO sysinfo; - static unsigned int result = 0; - - // we cache the result, since this function is called on each thread startup - // TODO - this isn't ideal on systems with hot-pluggable CPUs - if (result != 0) - return result; - - GetSystemInfo (&sysinfo) ; - result = sysinfo.dwNumberOfProcessors; - return result; -} - } // end of namespace vfeplatform @@ -91,12 +75,6 @@ using namespace vfePlatform; void Task::Initialize () { - // NB This is not thread-safe, but we currently don't care. - static volatile unsigned int count = 0; - unsigned int numCPUs = GetNumberofCPUs(); - // TODO - if numCPUs > 64, we need to do more than this - if (numCPUs > 1) - SetThreadIdealProcessor (GetCurrentThread(), (count++) % numCPUs); #ifndef _CONSOLE povwin::WinMemThreadStartup(); #endif diff --git a/source/backend/support/processorGroups.cpp b/source/backend/support/processorGroups.cpp new file mode 100644 index 000000000..494e035fb --- /dev/null +++ b/source/backend/support/processorGroups.cpp @@ -0,0 +1,105 @@ +#if defined (_WIN32) || defined (_WIN64) || defined (WIN32) || defined (WIN64) + +#include +#include +#include "processorGroups.h" + +namespace pov +{ + namespace processorGroups + { + // used to dynamically load processor group utility functions from Kernel32.dll at runtime. + typedef WORD(*PFN_GETACTIVEPROCESSORGROUPCOUNT)(void); + typedef WORD(*PFN_GETACTIVEPROCESSORCOUNT)(WORD); + typedef BOOL(*PFN_SETTHREADGROUPAFFINITY)(HANDLE, const GROUP_AFFINITY*, PGROUP_AFFINITY); + static bool g_Initialized = false; + static HMODULE g_Kernel32 = NULL; + static PFN_GETACTIVEPROCESSORGROUPCOUNT g_GetActiveProcessorGroupCount = NULL; + static PFN_GETACTIVEPROCESSORCOUNT g_GetActiveProcessorCount = NULL; + static PFN_SETTHREADGROUPAFFINITY g_SetThreadGroupAffinity = NULL; + + void Initialize() + { + // Initialize internal state if it hasn't already been done. + if (!g_Initialized) + { + // Prevent multiple threads from entering. + static std::mutex mutex; + std::lock_guard lock(mutex); + if (!g_Initialized) + { + // Default all pointers to null. + g_Kernel32 = NULL; + g_GetActiveProcessorGroupCount = NULL; + g_GetActiveProcessorCount = NULL; + g_SetThreadGroupAffinity = 0; + + // Load Kernel32.dll and retrieve required entry points. + g_Kernel32 = LoadLibrary("Kernel32.dll"); + if (g_Kernel32 != NULL) + { + g_GetActiveProcessorGroupCount = (PFN_GETACTIVEPROCESSORGROUPCOUNT)GetProcAddress(g_Kernel32, "GetActiveProcessorGroupCount"); + g_GetActiveProcessorCount = (PFN_GETACTIVEPROCESSORCOUNT)GetProcAddress(g_Kernel32, "GetActiveProcessorCount"); + g_SetThreadGroupAffinity = (PFN_SETTHREADGROUPAFFINITY)GetProcAddress(g_Kernel32, "SetThreadGroupAffinity"); + } + + g_Initialized = true; + } + } + } + + int GetNumberOfProcessors() + { + // Safety check. + Initialize(); + + // Check to see if processor group support is present, else default to old Windows 7 function. + bool hasProcessorGroupsSupport = (g_Kernel32 && g_GetActiveProcessorGroupCount && g_GetActiveProcessorCount && g_SetThreadGroupAffinity); + if (hasProcessorGroupsSupport) return g_GetActiveProcessorCount(ALL_PROCESSOR_GROUPS); + else + { + SYSTEM_INFO sysInfo = {}; + GetSystemInfo(&sysInfo); + return sysInfo.dwNumberOfProcessors; + } + } + + void SetThreadAffinity(int index) + { + // Skip setting thread affinity if incoming value was never set or isn't positive (Task class defaults taskIndex to -1). + if (index == -1) + return; + + // Safety check. + Initialize(); + + // Check to see if processor group support is present. + bool hasProcessorGroupsSupport = (g_Kernel32 && g_GetActiveProcessorGroupCount && g_GetActiveProcessorCount && g_SetThreadGroupAffinity); + if (hasProcessorGroupsSupport) + { + // Since thread index may continue to increase unbounded, wrap it back around within the range of available processors. + int processorIndex = index % g_GetActiveProcessorCount(ALL_PROCESSOR_GROUPS); + + // Determine which processor group to bind the thread to. + auto totalProcessors = 0U; + for (auto i = 0U; i < g_GetActiveProcessorGroupCount(); ++i) + { + totalProcessors += g_GetActiveProcessorCount(i); + if (totalProcessors >= processorIndex) + { + GROUP_AFFINITY groupAffinity = { ~0ULL, static_cast(i), { 0, 0, 0 } }; + g_SetThreadGroupAffinity(GetCurrentThread(), &groupAffinity, nullptr); + break; + } + } + } + } + + void Deinitialize() + { + if (g_Kernel32) + FreeLibrary(g_Kernel32); + } + } +} +#endif diff --git a/source/backend/support/processorGroups.h b/source/backend/support/processorGroups.h new file mode 100644 index 000000000..b90ce32a0 --- /dev/null +++ b/source/backend/support/processorGroups.h @@ -0,0 +1,35 @@ +#if defined (_WIN32) || defined (_WIN64) || defined (WIN32) || defined (WIN64) +#ifndef POVRAY_BACKEND_PROCESSORGROUPS_H +#define POVRAY_BACKEND_PROCESSORGROUPS_H + +// This module enables support for processor groups in Windows 8 and above. +// The implementation only requires a thread index be passed into SetThreadAffinity +// which will then be used to select an appropriate logical processor on a system with +// one or more processor groups. +// +// The module is meant to support earlier versions of Windows by dynamically loading appropriate +// dlls and entry points at runtime if they exist. If support for processor groups is not present +// (e.g. Windows 7) then the older SetThreadAffinityMask will be used. +namespace pov +{ + namespace processorGroups + { + // Initializes internal state (e.g. loading Kernel32.dll and getting required entry points). + // This does not have to be called explicitly. The utility functions below will also call Initialize + // if it has not already been called. + extern void Initialize(); + + // Returns the total number of available processors. + extern int GetNumberOfProcessors(); + + // Sets the thread affinity for the calling thread given the specified index. + // Here index can pertain to any linearly increasing value (e.g. thread index, task index). + extern void SetThreadAffinity(int index); + + // Frees and deinitializes internal state. + extern void Deinitialize(); + } +} + +#endif +#endif \ No newline at end of file diff --git a/source/backend/support/task.cpp b/source/backend/support/task.cpp index 90296980c..0eb8e6ab4 100644 --- a/source/backend/support/task.cpp +++ b/source/backend/support/task.cpp @@ -58,6 +58,10 @@ #include "backend/control/messagefactory.h" #include "backend/scene/backendscenedata.h" +#if defined (_WIN32) || defined (_WIN64) || defined (WIN32) || defined (_WIN32) +// Support for processor groups. +#include "processorGroups.h" +#endif // this must be the last file included #include "base/povdebug.h" @@ -77,7 +81,8 @@ Task::Task(ThreadData *td, const boost::function1& f) : realTime(-1), cpuTime(-1), taskThread(nullptr), - povmsContext(nullptr) + povmsContext(nullptr), + taskIndex(-1) { if (td == nullptr) throw POV_EXCEPTION_STRING("Internal error: TaskData is NULL in Task constructor"); @@ -171,7 +176,10 @@ POV_LONG Task::ElapsedThreadCPUTime() const } void Task::TaskThread(const boost::function0& completion) -{ +{ +#if defined (_WIN32) || defined (_WIN64) || defined (WIN32) || defined (WIN64) + processorGroups::SetThreadAffinity(taskIndex); +#endif int result; if((result = POVMS_OpenContext(&povmsContext)) != kNoErr) diff --git a/source/backend/support/task.h b/source/backend/support/task.h index cf0916b55..2966ae178 100644 --- a/source/backend/support/task.h +++ b/source/backend/support/task.h @@ -92,6 +92,9 @@ class Task inline POVMSContext GetPOVMSContext() { return povmsContext; } + /// assigned task index. + int taskIndex; + protected: struct StopThreadException final {}; // TODO - consider subclassing from std::exception hierarchy. @@ -126,7 +129,7 @@ class Task /// task thread std::thread *taskThread; /// POVMS message receiving context - POVMSContext povmsContext; + POVMSContext povmsContext; inline void FatalErrorHandler(const Exception& e) { diff --git a/source/backend/support/taskqueue.cpp b/source/backend/support/taskqueue.cpp index 80db09b93..e4e64db52 100644 --- a/source/backend/support/taskqueue.cpp +++ b/source/backend/support/taskqueue.cpp @@ -34,6 +34,7 @@ //****************************************************************************** // Unit header file must be the first file included within POV-Ray *.cpp files (pulls in config) +#include "base/types.h" #include "backend/support/taskqueue.h" // C++ variants of C standard header files @@ -59,7 +60,7 @@ namespace pov using std::list; using std::shared_ptr; -TaskQueue::TaskQueue() : failed(kNoError) +TaskQueue::TaskQueue() : failed(kNoError), nextTaskIndex(0) { } @@ -169,6 +170,7 @@ ThreadData *TaskQueue::AppendTask(Task *task) Notify(); + task->taskIndex = nextTaskIndex++; return task->GetDataPtr(); } @@ -195,7 +197,6 @@ void TaskQueue::AppendFunction(const boost::function1& fn) std::lock_guard lock(queueMutex); queuedTasks.push(TaskEntry(fn)); - Notify(); } @@ -229,7 +230,7 @@ bool TaskQueue::Process() switch(queuedTasks.front().GetEntryType()) { case TaskEntry::kTask: - { + { activeTasks.push_back(queuedTasks.front()); queuedTasks.front().GetTask()->Start(boost::bind(&TaskQueue::Notify, this)); queuedTasks.pop(); diff --git a/source/backend/support/taskqueue.h b/source/backend/support/taskqueue.h index a5a0ae457..24ef8de69 100644 --- a/source/backend/support/taskqueue.h +++ b/source/backend/support/taskqueue.h @@ -128,6 +128,8 @@ class TaskQueue final int failed; /// wait for data in queue or related operation to be processed std::condition_variable_any processCondition; + /// next task index. + int nextTaskIndex; TaskQueue(const TaskQueue&) = delete; TaskQueue& operator=(const TaskQueue&) = delete; diff --git a/windows/pvengine.cpp b/windows/pvengine.cpp index 2aa31190c..fc882ad65 100644 --- a/windows/pvengine.cpp +++ b/windows/pvengine.cpp @@ -90,6 +90,7 @@ #include "backend/control/benchmark.h" #include "pvdisplay.h" #include "backend/povray.h" +#include "backend/support/processorGroups.h" #ifdef RTR_SUPPORT #include "rtrsupport.h" @@ -5123,7 +5124,7 @@ void GenerateDumpMeta(bool brief) s += sprintf(s, "installdate=%s\n", InstalledOn) ; } s += sprintf(s, "cpuarchitecture=%u\n", (DWORD) sysinfo.wProcessorArchitecture) ; - s += sprintf(s, "numberofcpus=%u\n", sysinfo.dwNumberOfProcessors) ; + s += sprintf(s, "numberofcpus=%u\n", processorGroups::GetNumberOfProcessors()) ; s += sprintf(s, "processortype=%u\n", sysinfo.dwProcessorType) ; s += sprintf(s, "processorlevel=%u\n", (DWORD) sysinfo.wProcessorLevel) ; s += sprintf(s, "processorrevision=%u\n", (DWORD) sysinfo.wProcessorRevision) ; @@ -5430,10 +5431,8 @@ int PASCAL WinMain (HINSTANCE hInst, HINSTANCE hPrev, LPSTR szCmdLine, int sw) hInstance = hInst ; hMainThread = GetCurrentThread () ; - - GetSystemInfo (&sysinfo) ; - ThreadCount = sysinfo.dwNumberOfProcessors ; - NumberOfCPUs = sysinfo.dwNumberOfProcessors ; + + ThreadCount = NumberOfCPUs = processorGroups::GetNumberOfProcessors(); while (*s == ' ' || *s == '\t') s++ ; diff --git a/windows/pvupdate.cpp b/windows/pvupdate.cpp index ad2af3dbc..9ca71d0ff 100644 --- a/windows/pvupdate.cpp +++ b/windows/pvupdate.cpp @@ -43,7 +43,9 @@ #include #include "pvengine.h" - +#if defined (_WIN32) || defined (_WIN64) || defined (WIN32) || defined (WIN64) +#include "backend/support/processorGroups.h" +#endif // this must be the last file included #include "syspovdebug.h" @@ -155,7 +157,7 @@ int IsUpdateAvailable (bool SendSysinfo, char *CurrentVersion, std::string& NewV strcpy (s, "&NoInfo=false\n") ; GetSystemInfo (&sysinfo) ; s += sprintf (s, "CPUArchitecture=0x%04x\n", (DWORD) sysinfo.wProcessorArchitecture) ; - s += sprintf (s, "NumberOfCPUs=0x%04x\n", sysinfo.dwNumberOfProcessors) ; + s += sprintf (s, "NumberOfCPUs=0x%04x\n", pov::processorGroups::GetNumberOfProcessors()) ; s += sprintf (s, "ProcessorType=0x%04x\n", sysinfo.dwProcessorType) ; s += sprintf (s, "ProcessorLevel=0x%04x\n", (DWORD) sysinfo.wProcessorLevel) ; s += sprintf (s, "ProcessorRevision=0x%04x\n", (DWORD) sysinfo.wProcessorRevision) ; diff --git a/windows/vs2015/povbackend.vcxproj b/windows/vs2015/povbackend.vcxproj index 8707cf355..2e063fcac 100644 --- a/windows/vs2015/povbackend.vcxproj +++ b/windows/vs2015/povbackend.vcxproj @@ -413,6 +413,7 @@ + @@ -452,6 +453,7 @@ + diff --git a/windows/vs2015/povbackend.vcxproj.filters b/windows/vs2015/povbackend.vcxproj.filters index abbf2109b..04ec61168 100644 --- a/windows/vs2015/povbackend.vcxproj.filters +++ b/windows/vs2015/povbackend.vcxproj.filters @@ -114,6 +114,9 @@ Backend Source\Control + + Backend Source\Support + @@ -209,5 +212,8 @@ Backend Headers\Control + + Backend Headers\Support + \ No newline at end of file