From f21b2b83e6480720c2a4c439ba8c06858c7dcaad Mon Sep 17 00:00:00 2001 From: haowhsu Date: Wed, 2 Jul 2025 00:26:53 +0800 Subject: [PATCH] Qualcomm AI Engine Direct - gpu support part1 - rename folders in backends/qualcomm/runtime/backends - add gpu infra --- backends/qualcomm/README.md | 2 +- .../qualcomm/runtime/backends/CMakeLists.txt | 69 ++++++++++++------- .../runtime/backends/QnnBackendFactory.cpp | 64 +++++++++++++++-- .../runtime/backends/QnnBackendFactory.h | 14 ++-- .../runtime/backends/QnnDeviceCommon.h | 2 +- .../qualcomm/runtime/backends/QnnDlcManager.h | 2 +- .../runtime/backends/gpu/GpuBackend.cpp | 62 +++++++++++++++++ .../runtime/backends/gpu/GpuBackend.h | 40 +++++++++++ .../backends/gpu/GpuBackendCustomConfig.cpp | 44 ++++++++++++ .../backends/gpu/GpuBackendCustomConfig.h | 41 +++++++++++ .../runtime/backends/gpu/GpuContext.cpp | 50 ++++++++++++++ .../runtime/backends/gpu/GpuContext.h | 39 +++++++++++ .../backends/gpu/GpuContextCustomConfig.h | 48 +++++++++++++ .../qualcomm/runtime/backends/gpu/GpuDevice.h | 29 ++++++++ .../runtime/backends/gpu/GpuGraph.cpp | 49 +++++++++++++ .../qualcomm/runtime/backends/gpu/GpuGraph.h | 37 ++++++++++ .../backends/gpu/GpuGraphCustomConfig.cpp | 44 ++++++++++++ .../backends/gpu/GpuGraphCustomConfig.h | 40 +++++++++++ .../gpu/aarch64/GpuContextCustomConfig.cpp | 30 ++++++++ .../gpu/x86_64/GpuContextCustomConfig.cpp | 22 ++++++ .../backends/{htpbackend => htp}/HtpBackend.h | 0 .../{htpbackend => htp}/HtpBackendCache.cpp | 2 +- .../{htpbackend => htp}/HtpBackendCache.h | 0 .../{htpbackend => htp}/HtpContext.cpp | 2 +- .../backends/{htpbackend => htp}/HtpContext.h | 2 +- .../HtpContextCustomConfig.h | 0 .../{htpbackend => htp}/HtpDevice.cpp | 2 +- .../backends/{htpbackend => htp}/HtpDevice.h | 6 +- .../HtpDeviceCustomConfig.h | 0 .../HtpDevicePlatformInfoConfig.h | 0 .../backends/{htpbackend => htp}/HtpGraph.cpp | 2 +- .../backends/{htpbackend => htp}/HtpGraph.h | 2 +- .../HtpGraphCustomConfig.cpp | 2 +- .../HtpGraphCustomConfig.h | 0 .../aarch64/HtpContextCustomConfig.cpp | 4 +- .../aarch64/HtpDeviceCustomConfig.cpp | 2 +- .../aarch64/HtpDevicePlatformInfoConfig.cpp | 2 +- .../aarch64/HtpGraphCustomConfig.cpp | 2 +- .../x86_64/HtpContextCustomConfig.cpp | 2 +- .../x86_64/HtpDeviceCustomConfig.cpp | 2 +- .../x86_64/HtpDevicePlatformInfoConfig.cpp | 2 +- .../x86_64/HtpGraphCustomConfig.cpp | 2 +- .../backends/{irbackend => ir}/IrBackend.h | 0 .../backends/{irbackend => ir}/IrContext.h | 0 .../{irbackend => ir}/aarch64/IrContext.cpp | 2 +- .../aarch64/QnnDlcManager.cpp | 0 .../{irbackend => ir}/x86_64/IrContext.cpp | 2 +- .../x86_64/QnnDlcManager.cpp | 2 +- backends/qualcomm/runtime/targets.bzl | 15 ++-- .../serialization/qc_compiler_spec.fbs | 48 ++++++++++++- backends/qualcomm/serialization/qc_schema.py | 34 ++++++++- backends/qualcomm/tests/test_qnn_delegate.py | 20 +++++- backends/qualcomm/tests/utils.py | 12 +++- backends/qualcomm/utils/utils.py | 42 +++++++++++ examples/qualcomm/utils.py | 48 ++++++++----- 55 files changed, 901 insertions(+), 90 deletions(-) create mode 100644 backends/qualcomm/runtime/backends/gpu/GpuBackend.cpp create mode 100644 backends/qualcomm/runtime/backends/gpu/GpuBackend.h create mode 100644 backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.cpp create mode 100644 backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.h create mode 100644 backends/qualcomm/runtime/backends/gpu/GpuContext.cpp create mode 100644 backends/qualcomm/runtime/backends/gpu/GpuContext.h create mode 100644 backends/qualcomm/runtime/backends/gpu/GpuContextCustomConfig.h create mode 100644 backends/qualcomm/runtime/backends/gpu/GpuDevice.h create mode 100644 backends/qualcomm/runtime/backends/gpu/GpuGraph.cpp create mode 100644 backends/qualcomm/runtime/backends/gpu/GpuGraph.h create mode 100644 backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.cpp create mode 100644 backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.h create mode 100644 backends/qualcomm/runtime/backends/gpu/aarch64/GpuContextCustomConfig.cpp create mode 100644 backends/qualcomm/runtime/backends/gpu/x86_64/GpuContextCustomConfig.cpp rename backends/qualcomm/runtime/backends/{htpbackend => htp}/HtpBackend.h (100%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/HtpBackendCache.cpp (96%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/HtpBackendCache.h (100%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/HtpContext.cpp (94%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/HtpContext.h (94%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/HtpContextCustomConfig.h (100%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/HtpDevice.cpp (99%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/HtpDevice.h (92%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/HtpDeviceCustomConfig.h (100%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/HtpDevicePlatformInfoConfig.h (100%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/HtpGraph.cpp (93%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/HtpGraph.h (93%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/HtpGraphCustomConfig.cpp (97%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/HtpGraphCustomConfig.h (100%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/aarch64/HtpContextCustomConfig.cpp (87%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/aarch64/HtpDeviceCustomConfig.cpp (84%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/aarch64/HtpDevicePlatformInfoConfig.cpp (83%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/aarch64/HtpGraphCustomConfig.cpp (85%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/x86_64/HtpContextCustomConfig.cpp (90%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/x86_64/HtpDeviceCustomConfig.cpp (90%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/x86_64/HtpDevicePlatformInfoConfig.cpp (96%) rename backends/qualcomm/runtime/backends/{htpbackend => htp}/x86_64/HtpGraphCustomConfig.cpp (85%) rename backends/qualcomm/runtime/backends/{irbackend => ir}/IrBackend.h (100%) rename backends/qualcomm/runtime/backends/{irbackend => ir}/IrContext.h (100%) rename backends/qualcomm/runtime/backends/{irbackend => ir}/aarch64/IrContext.cpp (88%) rename backends/qualcomm/runtime/backends/{irbackend => ir}/aarch64/QnnDlcManager.cpp (100%) rename backends/qualcomm/runtime/backends/{irbackend => ir}/x86_64/IrContext.cpp (94%) rename backends/qualcomm/runtime/backends/{irbackend => ir}/x86_64/QnnDlcManager.cpp (98%) diff --git a/backends/qualcomm/README.md b/backends/qualcomm/README.md index 7c5853b3a6f..4fe319d40fd 100644 --- a/backends/qualcomm/README.md +++ b/backends/qualcomm/README.md @@ -51,7 +51,7 @@ backends/qualcomm | | # Meanwhile, this is also the runtime responsbile for executing compiled | | # models on a device. | └── backends # Backends supported by QNN. -| └── htpbackend +| └── gpu / htp | ├── aarch64 # Configuration required to run on device. (Device Part). | └── x86_64 # Configuration required to compile graph on host. (AoT Part). ├── scripts # Misc supporting scripts, not related to core functionality. diff --git a/backends/qualcomm/runtime/backends/CMakeLists.txt b/backends/qualcomm/runtime/backends/CMakeLists.txt index 2497aa48340..276eb3be409 100644 --- a/backends/qualcomm/runtime/backends/CMakeLists.txt +++ b/backends/qualcomm/runtime/backends/CMakeLists.txt @@ -43,57 +43,72 @@ target_sources( ${CMAKE_CURRENT_LIST_DIR}/QnnProfiler.cpp ) -# qnn_device -set(HOST_ARCHITECTURE - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/${CMAKE_SYSTEM_PROCESSOR} +set(HOST_ARCHITECTURE_GPU + ${CMAKE_CURRENT_LIST_DIR}/gpu/${CMAKE_SYSTEM_PROCESSOR} +) +set(HOST_ARCHITECTURE_HTP + ${CMAKE_CURRENT_LIST_DIR}/htp/${CMAKE_SYSTEM_PROCESSOR} +) +set(HOST_ARCHITECTURE_IR + ${CMAKE_CURRENT_LIST_DIR}/ir/${CMAKE_SYSTEM_PROCESSOR} ) +# qnn_device target_sources( qnn_device PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnDeviceCommon.h - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDevice.h + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuDevice.h + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpDevice.h PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnDeviceCommon.cpp - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDevice.cpp - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDevicePlatformInfoConfig.h - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDeviceCustomConfig.h + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpDevice.cpp + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpDevicePlatformInfoConfig.h + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpDeviceCustomConfig.h # When offline prepare context cache in x86 host we have to provide # platform infomation and SocModel to Qnn - ${HOST_ARCHITECTURE}/HtpDevicePlatformInfoConfig.cpp - ${HOST_ARCHITECTURE}/HtpDeviceCustomConfig.cpp + ${HOST_ARCHITECTURE_HTP}/HtpDevicePlatformInfoConfig.cpp + ${HOST_ARCHITECTURE_HTP}/HtpDeviceCustomConfig.cpp ) # qnn_context target_sources( qnn_context PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnContextCommon.h - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContext.h - ${CMAKE_CURRENT_LIST_DIR}/irbackend/IrContext.h + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuContext.h + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpContext.h + ${CMAKE_CURRENT_LIST_DIR}/ir/IrContext.h PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnContextCommon.cpp - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContext.cpp - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContextCustomConfig.h - ${HOST_ARCHITECTURE}/HtpContextCustomConfig.cpp - ${CMAKE_CURRENT_LIST_DIR}/irbackend/${CMAKE_SYSTEM_PROCESSOR}/IrContext.cpp + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuContext.cpp + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuContextCustomConfig.h + ${HOST_ARCHITECTURE_GPU}/GpuContextCustomConfig.cpp + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpContext.cpp + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpContextCustomConfig.h + ${HOST_ARCHITECTURE_HTP}/HtpContextCustomConfig.cpp + ${HOST_ARCHITECTURE_IR}/IrContext.cpp ) # qnn_backend_cache target_sources( qnn_backend_cache PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCache.h - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackendCache.h + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpBackendCache.h PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCache.cpp - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackendCache.cpp + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpBackendCache.cpp ) # qnn_graph target_sources( qnn_graph PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnGraphCommon.h - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraph.h + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraph.h + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraph.h PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnGraphCommon.cpp - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraph.cpp - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraphCustomConfig.h - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraphCustomConfig.cpp - ${HOST_ARCHITECTURE}/HtpGraphCustomConfig.cpp + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraph.cpp + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraphCustomConfig.h + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraphCustomConfig.cpp + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraph.cpp + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraphCustomConfig.h + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraphCustomConfig.cpp + ${HOST_ARCHITECTURE_HTP}/HtpGraphCustomConfig.cpp ) # qnn_op_package_manager @@ -107,9 +122,13 @@ target_sources( target_sources( qnn_backend PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCommon.h - ${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackend.h - ${CMAKE_CURRENT_LIST_DIR}/irbackend/IrBackend.h + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackend.h + ${CMAKE_CURRENT_LIST_DIR}/htp/HtpBackend.h + ${CMAKE_CURRENT_LIST_DIR}/ir/IrBackend.h PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCommon.cpp + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackend.cpp + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackendCustomConfig.h + ${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackendCustomConfig.cpp ) # qnn_mem_manager @@ -137,5 +156,5 @@ target_sources( target_sources( qnn_dlc_manager PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnDlcManager.h - PRIVATE ${CMAKE_CURRENT_LIST_DIR}/irbackend/${CMAKE_SYSTEM_PROCESSOR}/QnnDlcManager.cpp + PRIVATE ${HOST_ARCHITECTURE_IR}/QnnDlcManager.cpp ) diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp index 2fbb2243d8d..11e8dfb0913 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp +++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp @@ -51,8 +51,16 @@ std::unique_ptr QnnBackendFactory::Create( EnumNameQnnExecuTorchHtpPdSession(htp_options->pd_session())); QNN_EXECUTORCH_LOG_INFO( "use_conv_hmx in htp_options: %d", htp_options->use_conv_hmx()); + QNN_EXECUTORCH_LOG_INFO( + "use_dlbc in htp_options: %d", htp_options->use_dlbc()); QNN_EXECUTORCH_LOG_INFO( "use_fold_relu in htp_options: %d", htp_options->use_fold_relu()); + QNN_EXECUTORCH_LOG_INFO( + "use_multi_contexts in htp_options: %d", + htp_options->use_multi_contexts()); + QNN_EXECUTORCH_LOG_INFO( + "use_weight_sharing in htp_options: %d", + htp_options->use_weight_sharing()); } backend_params->qnn_backend_ptr_ = std::make_unique(implementation, logger); @@ -78,19 +86,67 @@ std::unique_ptr QnnBackendFactory::Create( options->profile_level(), options->soc_info(), htp_options); - backend_params->qnn_mem_manager_ptr_ = std::make_unique( + } break; + case QnnExecuTorchBackendType::kGpuBackend: { + auto gpu_options = options->backend_options()->gpu_options(); + if (options->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo) { + QNN_EXECUTORCH_LOG_INFO( + "performance_mode in gpu_options: %s", + EnumNameQnnExecuTorchGpuPerformanceMode( + gpu_options->performance_mode())); + QNN_EXECUTORCH_LOG_INFO( + "precision in gpu_options: %s", + EnumNameQnnExecuTorchGpuPrecision(gpu_options->precision())); + QNN_EXECUTORCH_LOG_INFO( + "use_memory_optimizations in gpu_options: %d", + gpu_options->use_memory_optimizations()); + QNN_EXECUTORCH_LOG_INFO( + "use_node_optimizations in gpu_options: %d", + gpu_options->use_node_optimizations()); + QNN_EXECUTORCH_LOG_INFO( + "use_queue_recording in gpu_options: %d", + gpu_options->use_queue_recording()); + QNN_EXECUTORCH_LOG_INFO( + "use_weight_sharing in gpu_options: %d", + gpu_options->use_weight_sharing()); + } + backend_params->qnn_backend_ptr_ = + std::make_unique(implementation, logger, gpu_options); + + backend_params->qnn_device_ptr_ = + std::make_unique(implementation, logger); + + backend_params->qnn_backend_cache_ptr_ = + std::make_unique(qnn_context_blob); + + backend_params->qnn_context_ptr_ = std::make_unique( implementation, + backend_params->qnn_backend_ptr_.get(), + backend_params->qnn_device_ptr_.get(), + backend_params->qnn_backend_cache_ptr_.get(), + qnn_dlc_manager, + gpu_options); + + backend_params->qnn_graph_ptr_ = std::make_unique( + implementation, + backend_params->qnn_backend_ptr_.get(), backend_params->qnn_context_ptr_.get(), - options->log_level()); - backend_params->backend_init_state_ = BackendInitializeState::INITIALIZED; + options->profile_level(), + gpu_options); } break; - case QnnExecuTorchBackendType::kGpuBackend: case QnnExecuTorchBackendType::kDspBackend: case QnnExecuTorchBackendType::kUndefinedBackend: default: return nullptr; } + backend_params->qnn_mem_manager_ptr_ = std::make_unique( + implementation, + backend_params->qnn_context_ptr_.get(), + options->log_level()); + + backend_params->backend_init_state_ = BackendInitializeState::INITIALIZED; + if (backend_params->qnn_backend_ptr_->VerifyQNNSDKVersion() == Error::Ok) { return backend_params; } diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.h b/backends/qualcomm/runtime/backends/QnnBackendFactory.h index 3d78a36b9f0..c6c112ccf2c 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendFactory.h +++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.h @@ -17,11 +17,15 @@ #include #include #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include namespace executorch { diff --git a/backends/qualcomm/runtime/backends/QnnDeviceCommon.h b/backends/qualcomm/runtime/backends/QnnDeviceCommon.h index 85de00f8623..f0f1b5b0fbd 100644 --- a/backends/qualcomm/runtime/backends/QnnDeviceCommon.h +++ b/backends/qualcomm/runtime/backends/QnnDeviceCommon.h @@ -29,7 +29,7 @@ class QnnDevice { return handle_; } - executorch::runtime::Error Configure(); + virtual executorch::runtime::Error Configure(); protected: virtual executorch::runtime::Error MakeConfig( diff --git a/backends/qualcomm/runtime/backends/QnnDlcManager.h b/backends/qualcomm/runtime/backends/QnnDlcManager.h index a57906df4e3..940c73e518a 100644 --- a/backends/qualcomm/runtime/backends/QnnDlcManager.h +++ b/backends/qualcomm/runtime/backends/QnnDlcManager.h @@ -10,7 +10,7 @@ #include #include -#include +#include #include "QnnWrapperUtils.hpp" namespace executorch { diff --git a/backends/qualcomm/runtime/backends/gpu/GpuBackend.cpp b/backends/qualcomm/runtime/backends/gpu/GpuBackend.cpp new file mode 100644 index 00000000000..2332193d30d --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuBackend.cpp @@ -0,0 +1,62 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include "GPU/QnnGpuCommon.h" + +namespace executorch { +namespace backends { +namespace qnn { + +using executorch::runtime::Error; + +GpuBackend::GpuBackend( + const QnnImplementation& implementation, + QnnLogger* logger, + const QnnExecuTorchGpuBackendOptions* gpu_options) + : QnnBackend(implementation, logger) { + gpu_backend_custom_config_ = + std::make_unique(gpu_options); +} + +Qnn_Version_t GpuBackend::GetExpectedBackendVersion() const { + Qnn_Version_t backend_version; + backend_version.major = QNN_GPU_API_VERSION_MAJOR; + backend_version.minor = QNN_GPU_API_VERSION_MINOR; + backend_version.patch = QNN_GPU_API_VERSION_PATCH; + return backend_version; +} + +bool GpuBackend::IsProfileEventTypeParentOfNodeTime( + QnnProfile_EventType_t event_type) { + return (event_type == QNN_PROFILE_EVENTTYPE_EXECUTE); +} + +Error GpuBackend::MakeConfig(std::vector& config) { + const std::vector& backend_custom_config = + gpu_backend_custom_config_->CreateBackendCustomConfig(); + + uint32_t num_custom_configs = backend_custom_config.size(); + backend_config_.resize(num_custom_configs); + // +1 for null terminated + config.reserve(num_custom_configs + 1); + + for (std::size_t i = 0; i < num_custom_configs; ++i) { + backend_config_[i].option = QNN_BACKEND_CONFIG_OPTION_CUSTOM; + backend_config_[i].customConfig = backend_custom_config[i]; + config.push_back(&backend_config_[i]); + } + + config.push_back(nullptr); + return Error::Ok; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuBackend.h b/backends/qualcomm/runtime/backends/gpu/GpuBackend.h new file mode 100644 index 00000000000..f0a2de2fc8c --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuBackend.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +class GpuBackend : public QnnBackend { + public: + GpuBackend( + const QnnImplementation& implementation, + QnnLogger* logger, + const QnnExecuTorchGpuBackendOptions* gpu_options); + + Qnn_Version_t GetExpectedBackendVersion() const override; + + bool IsProfileEventTypeParentOfNodeTime( + QnnProfile_EventType_t event_type) override; + + protected: + executorch::runtime::Error MakeConfig( + std::vector& config) override; + + private: + std::vector backend_config_; + std::unique_ptr gpu_backend_custom_config_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.cpp b/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.cpp new file mode 100644 index 00000000000..60e289493d0 --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +GpuBackendCustomConfig::GpuBackendCustomConfig( + const QnnExecuTorchGpuBackendOptions* gpu_options) + : gpu_options_(gpu_options) {} + +QnnGpuBackend_CustomConfig_t* +GpuBackendCustomConfig::AllocBackendCustomConfig() { + gpu_backend_config_.emplace_back( + std::make_unique()); + gpu_backend_config_.back()->option = QNN_GPU_BACKEND_CONFIG_OPTION_UNDEFINED; + return gpu_backend_config_.back().get(); +} + +std::vector +GpuBackendCustomConfig::CreateBackendCustomConfig() { + std::vector ret; + QnnGpuBackend_CustomConfig_t* p_custom_config = nullptr; + + if (gpu_options_->use_weight_sharing()) { + p_custom_config = AllocBackendCustomConfig(); + p_custom_config->option = + QNN_GPU_BACKEND_CONFIG_OPTION_WEIGHT_SHARING_ENABLED; + p_custom_config->weightSharingEnabled = 1; + ret.push_back(static_cast(p_custom_config)); + } + return ret; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.h b/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.h new file mode 100644 index 00000000000..150235a82e6 --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#include +#include + +#include "GPU/QnnGpuBackend.h" + +namespace executorch { +namespace backends { +namespace qnn { + +using namespace qnn_delegate; + +class GpuBackendCustomConfig { + public: + explicit GpuBackendCustomConfig( + const QnnExecuTorchGpuBackendOptions* gpu_options); + + std::vector CreateBackendCustomConfig(); + + private: + QnnGpuBackend_CustomConfig_t* AllocBackendCustomConfig(); + std::vector> + gpu_backend_config_; + const QnnExecuTorchGpuBackendOptions* gpu_options_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuContext.cpp b/backends/qualcomm/runtime/backends/gpu/GpuContext.cpp new file mode 100644 index 00000000000..d3816fc560e --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuContext.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +using executorch::runtime::Error; + +GpuContext::GpuContext( + const QnnImplementation& implementation, + QnnBackend* backend, + QnnDevice* device, + QnnBackendCache* cache, + QnnDlcManager* qnn_dlc_manager, + const QnnExecuTorchGpuBackendOptions* gpu_options) + : QnnContext(implementation, backend, device, cache, qnn_dlc_manager) { + gpu_context_custom_config_ = + std::make_unique(gpu_options); +} + +Error GpuContext::MakeConfig(std::vector& config) { + const std::vector& context_custom_config = + gpu_context_custom_config_->CreateContextCustomConfig(); + + uint32_t num_custom_configs = context_custom_config.size(); + context_config_.resize(num_custom_configs); + // +1 for null terminated + config.reserve(num_custom_configs + 1); + + for (std::size_t i = 0; i < num_custom_configs; ++i) { + context_config_[i].option = QNN_CONTEXT_CONFIG_OPTION_CUSTOM; + context_config_[i].customConfig = context_custom_config[i]; + config.push_back(&context_config_[i]); + } + + config.push_back(nullptr); + return Error::Ok; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuContext.h b/backends/qualcomm/runtime/backends/gpu/GpuContext.h new file mode 100644 index 00000000000..873117c0e50 --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuContext.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +class QnnDlcManager; +class GpuContext : public QnnContext { + public: + GpuContext( + const QnnImplementation& implementation, + QnnBackend* backend, + QnnDevice* device, + QnnBackendCache* cache, + QnnDlcManager* qnn_dlc_manager, + const QnnExecuTorchGpuBackendOptions* gpu_options); + + protected: + executorch::runtime::Error MakeConfig( + std::vector& config) override; + + private: + std::vector context_config_; + std::unique_ptr gpu_context_custom_config_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuContextCustomConfig.h b/backends/qualcomm/runtime/backends/gpu/GpuContextCustomConfig.h new file mode 100644 index 00000000000..c88e94d045f --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuContextCustomConfig.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#include +#include + +#include "GPU/QnnGpuContext.h" + +namespace executorch { +namespace backends { +namespace qnn { + +using namespace qnn_delegate; + +class GpuContextCustomConfig { + public: + explicit GpuContextCustomConfig( + const QnnExecuTorchGpuBackendOptions* gpu_options) + : gpu_options_(gpu_options) {} + + std::vector CreateContextCustomConfig(); + + private: + QnnGpuContext_CustomConfig_t* AllocContextCustomConfig() { + gpu_context_config_.emplace_back( + std::make_unique()); + gpu_context_config_.back()->option = + QNN_GPU_CONTEXT_CONFIG_OPTION_UNDEFINED; + return gpu_context_config_.back().get(); + } + std::vector> + gpu_context_config_; + const QnnExecuTorchGpuBackendOptions* gpu_options_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuDevice.h b/backends/qualcomm/runtime/backends/gpu/GpuDevice.h new file mode 100644 index 00000000000..20d6568ecc3 --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuDevice.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +#pragma once + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +class GpuDevice : public QnnDevice { + public: + GpuDevice(const QnnImplementation& implementation, QnnLogger* logger) + : QnnDevice(implementation, logger){}; + + // GPU backend does not support device creation + executorch::runtime::Error Configure() override { + return executorch::runtime::Error::Ok; + } +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuGraph.cpp b/backends/qualcomm/runtime/backends/gpu/GpuGraph.cpp new file mode 100644 index 00000000000..d626ac47c7d --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuGraph.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +using executorch::runtime::Error; + +GpuGraph::GpuGraph( + const QnnImplementation& implementation, + QnnBackend* backend, + QnnContext* context, + const QnnExecuTorchProfileLevel& profile_level, + const QnnExecuTorchGpuBackendOptions* gpu_options) + : QnnGraph(implementation, backend, context, profile_level) { + gpu_graph_custom_config_ = + std::make_unique(gpu_options); +} + +Error GpuGraph::MakeConfig(std::vector& config) { + const std::vector& graph_custom_config = + gpu_graph_custom_config_->CreateGraphCustomConfig(); + + uint32_t num_custom_configs = graph_custom_config.size(); + graph_config_.resize(num_custom_configs); + // +1 for null terminated + config.reserve(num_custom_configs + 1); + + for (std::size_t i = 0; i < num_custom_configs; ++i) { + graph_config_[i].option = QNN_GRAPH_CONFIG_OPTION_CUSTOM; + graph_config_[i].customConfig = graph_custom_config[i]; + config.push_back(&graph_config_[i]); + } + + config.push_back(nullptr); + return Error::Ok; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuGraph.h b/backends/qualcomm/runtime/backends/gpu/GpuGraph.h new file mode 100644 index 00000000000..c2b5bf2832d --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuGraph.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +class GpuGraph : public QnnGraph { + public: + GpuGraph( + const QnnImplementation& implementation, + QnnBackend* backend, + QnnContext* context, + const QnnExecuTorchProfileLevel& profile_level, + const QnnExecuTorchGpuBackendOptions* gpu_options); + + protected: + executorch::runtime::Error MakeConfig( + std::vector& config) override; + + private: + std::vector graph_config_; + std::unique_ptr gpu_graph_custom_config_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.cpp b/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.cpp new file mode 100644 index 00000000000..17f094db805 --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +GpuGraphCustomConfig::GpuGraphCustomConfig( + const QnnExecuTorchGpuBackendOptions* gpu_options) + : gpu_options_(gpu_options) {} + +QnnGpuGraph_CustomConfig_t* GpuGraphCustomConfig::AllocGraphCustomConfig() { + gpu_graph_config_.emplace_back( + std::make_unique()); + return gpu_graph_config_.back().get(); +} + +std::vector +GpuGraphCustomConfig::CreateGraphCustomConfig() { + std::vector ret; + QnnGpuGraph_CustomConfig_t* p_custom_config = nullptr; + + p_custom_config = AllocGraphCustomConfig(); + p_custom_config->precision = + static_cast(gpu_options_->precision()); + p_custom_config->disableMemoryOptimizations = + !gpu_options_->use_memory_optimizations(); + p_custom_config->disableNodeOptimizations = + !gpu_options_->use_node_optimizations(); + p_custom_config->disableQueueRecording = !gpu_options_->use_queue_recording(); + ret.push_back(static_cast(p_custom_config)); + return ret; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.h b/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.h new file mode 100644 index 00000000000..a47cd1a3345 --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/GpuGraphCustomConfig.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +#include +#include + +#include "GPU/QnnGpuGraph.h" + +namespace executorch { +namespace backends { +namespace qnn { + +using namespace qnn_delegate; + +class GpuGraphCustomConfig { + public: + explicit GpuGraphCustomConfig( + const QnnExecuTorchGpuBackendOptions* gpu_options); + + std::vector CreateGraphCustomConfig(); + + private: + QnnGpuGraph_CustomConfig_t* AllocGraphCustomConfig(); + std::vector> gpu_graph_config_; + const QnnExecuTorchGpuBackendOptions* gpu_options_; +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/aarch64/GpuContextCustomConfig.cpp b/backends/qualcomm/runtime/backends/gpu/aarch64/GpuContextCustomConfig.cpp new file mode 100644 index 00000000000..b4f200897ba --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/aarch64/GpuContextCustomConfig.cpp @@ -0,0 +1,30 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +std::vector +GpuContextCustomConfig::CreateContextCustomConfig() { + std::vector ret; + QnnGpuContext_CustomConfig_t* p_custom_config = nullptr; + + p_custom_config = AllocContextCustomConfig(); + p_custom_config->option = QNN_GPU_CONTEXT_CONFIG_OPTION_PERF_HINT; + p_custom_config->perfHint = + static_cast(gpu_options_->performance_mode()); + ret.push_back(static_cast(p_custom_config)); + return ret; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/gpu/x86_64/GpuContextCustomConfig.cpp b/backends/qualcomm/runtime/backends/gpu/x86_64/GpuContextCustomConfig.cpp new file mode 100644 index 00000000000..69784c1797f --- /dev/null +++ b/backends/qualcomm/runtime/backends/gpu/x86_64/GpuContextCustomConfig.cpp @@ -0,0 +1,22 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace executorch { +namespace backends { +namespace qnn { + +std::vector +GpuContextCustomConfig::CreateContextCustomConfig() { + return {}; +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h b/backends/qualcomm/runtime/backends/htp/HtpBackend.h similarity index 100% rename from backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h rename to backends/qualcomm/runtime/backends/htp/HtpBackend.h diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.cpp b/backends/qualcomm/runtime/backends/htp/HtpBackendCache.cpp similarity index 96% rename from backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.cpp rename to backends/qualcomm/runtime/backends/htp/HtpBackendCache.cpp index 030b5666daf..3038a100d03 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.cpp +++ b/backends/qualcomm/runtime/backends/htp/HtpBackendCache.cpp @@ -5,7 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ -#include +#include #include "HTP/QnnHtpSystemContext.h" namespace executorch { diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.h b/backends/qualcomm/runtime/backends/htp/HtpBackendCache.h similarity index 100% rename from backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.h rename to backends/qualcomm/runtime/backends/htp/HtpBackendCache.h diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.cpp b/backends/qualcomm/runtime/backends/htp/HtpContext.cpp similarity index 94% rename from backends/qualcomm/runtime/backends/htpbackend/HtpContext.cpp rename to backends/qualcomm/runtime/backends/htp/HtpContext.cpp index 50d299b55e9..0056a2c0917 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.cpp +++ b/backends/qualcomm/runtime/backends/htp/HtpContext.cpp @@ -7,7 +7,7 @@ */ #include -#include +#include #include "HTP/QnnHtpCommon.h" diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h b/backends/qualcomm/runtime/backends/htp/HtpContext.h similarity index 94% rename from backends/qualcomm/runtime/backends/htpbackend/HtpContext.h rename to backends/qualcomm/runtime/backends/htp/HtpContext.h index 88660db080a..ff937593434 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h +++ b/backends/qualcomm/runtime/backends/htp/HtpContext.h @@ -10,7 +10,7 @@ #include #include -#include +#include namespace executorch { namespace backends { diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpContextCustomConfig.h b/backends/qualcomm/runtime/backends/htp/HtpContextCustomConfig.h similarity index 100% rename from backends/qualcomm/runtime/backends/htpbackend/HtpContextCustomConfig.h rename to backends/qualcomm/runtime/backends/htp/HtpContextCustomConfig.h diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.cpp b/backends/qualcomm/runtime/backends/htp/HtpDevice.cpp similarity index 99% rename from backends/qualcomm/runtime/backends/htpbackend/HtpDevice.cpp rename to backends/qualcomm/runtime/backends/htp/HtpDevice.cpp index 46ba3117269..9ce0c5e5aac 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.cpp +++ b/backends/qualcomm/runtime/backends/htp/HtpDevice.cpp @@ -7,7 +7,7 @@ */ #include -#include +#include #include "HTP/QnnHtpCommon.h" #include "Saver/QnnSaverCommon.h" diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h b/backends/qualcomm/runtime/backends/htp/HtpDevice.h similarity index 92% rename from backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h rename to backends/qualcomm/runtime/backends/htp/HtpDevice.h index f75e15fc77c..daf4753cb69 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h +++ b/backends/qualcomm/runtime/backends/htp/HtpDevice.h @@ -8,8 +8,8 @@ #pragma once #include -#include -#include +#include +#include #include #include "HTP/QnnHtpDevice.h" @@ -37,7 +37,7 @@ class HtpDevice : public QnnDevice { } ~HtpDevice(); - // Defines Qnn performance mode vote types for htpbackend + // Defines Qnn performance mode vote types for htp enum PerformanceModeVoteType { kNoVote = 0, kUpVote = 1, diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpDeviceCustomConfig.h b/backends/qualcomm/runtime/backends/htp/HtpDeviceCustomConfig.h similarity index 100% rename from backends/qualcomm/runtime/backends/htpbackend/HtpDeviceCustomConfig.h rename to backends/qualcomm/runtime/backends/htp/HtpDeviceCustomConfig.h diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpDevicePlatformInfoConfig.h b/backends/qualcomm/runtime/backends/htp/HtpDevicePlatformInfoConfig.h similarity index 100% rename from backends/qualcomm/runtime/backends/htpbackend/HtpDevicePlatformInfoConfig.h rename to backends/qualcomm/runtime/backends/htp/HtpDevicePlatformInfoConfig.h diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.cpp b/backends/qualcomm/runtime/backends/htp/HtpGraph.cpp similarity index 93% rename from backends/qualcomm/runtime/backends/htpbackend/HtpGraph.cpp rename to backends/qualcomm/runtime/backends/htp/HtpGraph.cpp index 29dcf0a58c3..6208febe61a 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.cpp +++ b/backends/qualcomm/runtime/backends/htp/HtpGraph.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ -#include +#include namespace executorch { namespace backends { namespace qnn { diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h b/backends/qualcomm/runtime/backends/htp/HtpGraph.h similarity index 93% rename from backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h rename to backends/qualcomm/runtime/backends/htp/HtpGraph.h index c3add50d08b..db24a64cdfd 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h +++ b/backends/qualcomm/runtime/backends/htp/HtpGraph.h @@ -8,7 +8,7 @@ #pragma once #include -#include +#include #include diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.cpp similarity index 97% rename from backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.cpp rename to backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.cpp index d43f8320285..17b8438880d 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.cpp @@ -7,7 +7,7 @@ */ #include #include -#include +#include namespace executorch { namespace backends { namespace qnn { diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.h b/backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.h similarity index 100% rename from backends/qualcomm/runtime/backends/htpbackend/HtpGraphCustomConfig.h rename to backends/qualcomm/runtime/backends/htp/HtpGraphCustomConfig.h diff --git a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpContextCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/aarch64/HtpContextCustomConfig.cpp similarity index 87% rename from backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpContextCustomConfig.cpp rename to backends/qualcomm/runtime/backends/htp/aarch64/HtpContextCustomConfig.cpp index 04a5d844dd0..676795797f8 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpContextCustomConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/aarch64/HtpContextCustomConfig.cpp @@ -6,8 +6,8 @@ * LICENSE file in the root directory of this source tree. */ -#include -#include +#include +#include namespace executorch { namespace backends { diff --git a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpDeviceCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/aarch64/HtpDeviceCustomConfig.cpp similarity index 84% rename from backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpDeviceCustomConfig.cpp rename to backends/qualcomm/runtime/backends/htp/aarch64/HtpDeviceCustomConfig.cpp index 81ac4a14372..8207f5071ba 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpDeviceCustomConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/aarch64/HtpDeviceCustomConfig.cpp @@ -5,7 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ -#include +#include namespace executorch { namespace backends { namespace qnn { diff --git a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpDevicePlatformInfoConfig.cpp b/backends/qualcomm/runtime/backends/htp/aarch64/HtpDevicePlatformInfoConfig.cpp similarity index 83% rename from backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpDevicePlatformInfoConfig.cpp rename to backends/qualcomm/runtime/backends/htp/aarch64/HtpDevicePlatformInfoConfig.cpp index c191791fa63..91221a78fd6 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpDevicePlatformInfoConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/aarch64/HtpDevicePlatformInfoConfig.cpp @@ -5,7 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ -#include +#include namespace executorch { namespace backends { namespace qnn { diff --git a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpGraphCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/aarch64/HtpGraphCustomConfig.cpp similarity index 85% rename from backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpGraphCustomConfig.cpp rename to backends/qualcomm/runtime/backends/htp/aarch64/HtpGraphCustomConfig.cpp index 096fda7b059..faac23edc12 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/aarch64/HtpGraphCustomConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/aarch64/HtpGraphCustomConfig.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ -#include +#include namespace executorch { namespace backends { diff --git a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpContextCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/x86_64/HtpContextCustomConfig.cpp similarity index 90% rename from backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpContextCustomConfig.cpp rename to backends/qualcomm/runtime/backends/htp/x86_64/HtpContextCustomConfig.cpp index 1fc2940eaa7..4850afa14a2 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpContextCustomConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/x86_64/HtpContextCustomConfig.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ -#include +#include namespace executorch { namespace backends { diff --git a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpDeviceCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/x86_64/HtpDeviceCustomConfig.cpp similarity index 90% rename from backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpDeviceCustomConfig.cpp rename to backends/qualcomm/runtime/backends/htp/x86_64/HtpDeviceCustomConfig.cpp index 154433c10b0..9afbf489bc1 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpDeviceCustomConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/x86_64/HtpDeviceCustomConfig.cpp @@ -5,7 +5,7 @@ * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ -#include +#include namespace executorch { namespace backends { namespace qnn { diff --git a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpDevicePlatformInfoConfig.cpp b/backends/qualcomm/runtime/backends/htp/x86_64/HtpDevicePlatformInfoConfig.cpp similarity index 96% rename from backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpDevicePlatformInfoConfig.cpp rename to backends/qualcomm/runtime/backends/htp/x86_64/HtpDevicePlatformInfoConfig.cpp index b025f0b2aa6..15c677e8a68 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpDevicePlatformInfoConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/x86_64/HtpDevicePlatformInfoConfig.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ #include -#include +#include namespace executorch { namespace backends { namespace qnn { diff --git a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpGraphCustomConfig.cpp b/backends/qualcomm/runtime/backends/htp/x86_64/HtpGraphCustomConfig.cpp similarity index 85% rename from backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpGraphCustomConfig.cpp rename to backends/qualcomm/runtime/backends/htp/x86_64/HtpGraphCustomConfig.cpp index 330ca43e20b..ec01f2bbfdd 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/x86_64/HtpGraphCustomConfig.cpp +++ b/backends/qualcomm/runtime/backends/htp/x86_64/HtpGraphCustomConfig.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ -#include +#include namespace executorch { namespace backends { diff --git a/backends/qualcomm/runtime/backends/irbackend/IrBackend.h b/backends/qualcomm/runtime/backends/ir/IrBackend.h similarity index 100% rename from backends/qualcomm/runtime/backends/irbackend/IrBackend.h rename to backends/qualcomm/runtime/backends/ir/IrBackend.h diff --git a/backends/qualcomm/runtime/backends/irbackend/IrContext.h b/backends/qualcomm/runtime/backends/ir/IrContext.h similarity index 100% rename from backends/qualcomm/runtime/backends/irbackend/IrContext.h rename to backends/qualcomm/runtime/backends/ir/IrContext.h diff --git a/backends/qualcomm/runtime/backends/irbackend/aarch64/IrContext.cpp b/backends/qualcomm/runtime/backends/ir/aarch64/IrContext.cpp similarity index 88% rename from backends/qualcomm/runtime/backends/irbackend/aarch64/IrContext.cpp rename to backends/qualcomm/runtime/backends/ir/aarch64/IrContext.cpp index 44ce8de8f46..12a27b19ccd 100644 --- a/backends/qualcomm/runtime/backends/irbackend/aarch64/IrContext.cpp +++ b/backends/qualcomm/runtime/backends/ir/aarch64/IrContext.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include namespace executorch { namespace backends { diff --git a/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/ir/aarch64/QnnDlcManager.cpp similarity index 100% rename from backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp rename to backends/qualcomm/runtime/backends/ir/aarch64/QnnDlcManager.cpp diff --git a/backends/qualcomm/runtime/backends/irbackend/x86_64/IrContext.cpp b/backends/qualcomm/runtime/backends/ir/x86_64/IrContext.cpp similarity index 94% rename from backends/qualcomm/runtime/backends/irbackend/x86_64/IrContext.cpp rename to backends/qualcomm/runtime/backends/ir/x86_64/IrContext.cpp index f167aae9319..cf5df3de8e9 100644 --- a/backends/qualcomm/runtime/backends/irbackend/x86_64/IrContext.cpp +++ b/backends/qualcomm/runtime/backends/ir/x86_64/IrContext.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include namespace executorch { namespace backends { namespace qnn { diff --git a/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/ir/x86_64/QnnDlcManager.cpp similarity index 98% rename from backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp rename to backends/qualcomm/runtime/backends/ir/x86_64/QnnDlcManager.cpp index 050a679e62a..c645190e5d3 100644 --- a/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp +++ b/backends/qualcomm/runtime/backends/ir/x86_64/QnnDlcManager.cpp @@ -6,7 +6,7 @@ * LICENSE file in the root directory of this source tree. */ #include -#include +#include namespace executorch { namespace backends { diff --git a/backends/qualcomm/runtime/targets.bzl b/backends/qualcomm/runtime/targets.bzl index 1bd82f8f913..a32ca579452 100644 --- a/backends/qualcomm/runtime/targets.bzl +++ b/backends/qualcomm/runtime/targets.bzl @@ -44,10 +44,12 @@ def define_common_targets(): [ "*.cpp", "backends/*.cpp", - "backends/irbackend/*.cpp", - "backends/htpbackend/*.cpp", - ] + (["backends/htpbackend/x86_64/*.cpp"] if include_aot_qnn_lib else ["backends/htpbackend/aarch64/*.cpp"]) + ( - ["backends/irbackend/x86_64/*.cpp"] if include_aot_qnn_lib else ["backends/irbackend/aarch64/*.cpp"] + "backends/gpu/*.cpp", + "backends/htp/*.cpp", + "backends/ir/*.cpp", + ] + (["backends/gpu/x86_64/*.cpp"] if include_aot_qnn_lib else ["backends/gpu/aarch64/*.cpp"]) + ( + ["backends/htp/x86_64/*.cpp"] if include_aot_qnn_lib else ["backends/htp/aarch64/*.cpp"]) + ( + ["backends/ir/x86_64/*.cpp"] if include_aot_qnn_lib else ["backends/ir/aarch64/*.cpp"] ), exclude = ["Logging.cpp"], ), @@ -55,8 +57,9 @@ def define_common_targets(): [ "*.h", "backends/*.h", - "backends/irbackend/*.h", - "backends/htpbackend/*.h", + "backends/gpu/*.h", + "backends/htp/*.h", + "backends/ir/*.h", ], exclude = ["Logging.h"], ), diff --git a/backends/qualcomm/serialization/qc_compiler_spec.fbs b/backends/qualcomm/serialization/qc_compiler_spec.fbs index 8aeaa060a50..183cc8c53d6 100644 --- a/backends/qualcomm/serialization/qc_compiler_spec.fbs +++ b/backends/qualcomm/serialization/qc_compiler_spec.fbs @@ -54,6 +54,50 @@ table SocInfo { htp_info:HtpInfo; } +/// Defines performance modes available for GPU backend. +enum QnnExecuTorchGpuPerformanceMode: int { + kGpuPerfHintHigh = 0, + kGpuPerfHintNormal, + kGpuPerfHintLow, +} + +/// Defines the optimization levels of the graph tensors that are not input nor +/// output tensors. This enum controls the trade-off between performance and +/// accuracy. +enum QnnExecuTorchGpuPrecision: int { + kGpuPrecisionFp32 = 0, + kGpuPrecisionFp16, + kGpuPrecisionHybrid, + kGpuPrecisionUserProvided, +} + +/// Specifies the backend options for the GPU backend. +table QnnExecuTorchGpuBackendOptions { + /// kGpuPerfHintHigh - best inference latency at the expense of power consumption. + /// kGpuPerfHintNormal - balanced performance dependent upon power management. + /// kGpuPerfHintLow - lowest power consumption at the expense of inference latency. + performance_mode:QnnExecuTorchGpuPerformanceMode; + + /// kGpuPrecisionFp32 - best accuracy at the expense of performance. + /// kGpuPrecisionFp16 - best performance at the expense of accuracy. + /// kGpuPrecisionHybrid - good trade-off between performance and accuracy. + /// kGpuPrecisionUserProvided - backend will not optimize NATIVE tensor data types. + precision:QnnExecuTorchGpuPrecision; + + /// Backend will share NATIVE tensor memory based upon analysis of the network topology. + use_memory_optimizations:bool; + + /// Backend will fuse compatible operations into one operation to improve performance. + use_node_optimizations:bool; + + /// Backend will use queue recording to improve performance. + use_queue_recording:bool; + + /// When multiple graphs appear inside the same context, + /// weights could be reused across all graphs. + use_weight_sharing:bool; +} + /// Defines performance modes available for HTP backend. enum QnnExecuTorchHtpPerformanceMode: int { kHtpDefault = 0, @@ -165,7 +209,6 @@ enum QnnExecuTorchOpPackagePlatform: int { AARCH64_ANDROID, } - table QnnExecuTorchOpPackageInfo { /// The name of the op package. op_package_name:string; @@ -190,7 +233,6 @@ table QnnExecuTorchOpPackageInfo { platform:QnnExecuTorchOpPackagePlatform; } - table QnnExecuTorchOpPackageOptions { /// An array of QnnExecuTorchOpPackageInfo structures. op_package_infos:[QnnExecuTorchOpPackageInfo]; @@ -203,6 +245,8 @@ table QnnExecuTorchBackendOptions { backend_type:QnnExecuTorchBackendType; htp_options:QnnExecuTorchHtpBackendOptions; + + gpu_options:QnnExecuTorchGpuBackendOptions; } table QnnExecuTorchOptions { diff --git a/backends/qualcomm/serialization/qc_schema.py b/backends/qualcomm/serialization/qc_schema.py index f3b9e2cc1a5..7123ce541f4 100644 --- a/backends/qualcomm/serialization/qc_schema.py +++ b/backends/qualcomm/serialization/qc_schema.py @@ -10,7 +10,7 @@ from dataclasses import dataclass, field from enum import IntEnum, unique -from typing import List +from typing import List, Optional @dataclass @@ -72,6 +72,35 @@ class SocInfo: } +@unique +class QnnExecuTorchGpuPerformanceMode(IntEnum): + kGpuPerfHintHigh = 0 + kGpuPerfHintNormal = 1 + kGpuPerfHintLow = 2 + + +@unique +class QnnExecuTorchGpuPrecision(IntEnum): + kGpuPrecisionFp32 = 0 + kGpuPrecisionFp16 = 1 + kGpuPrecisionHybrid = 2 + kGpuPrecisionUserProvided = 3 + + +@dataclass +class QnnExecuTorchGpuBackendOptions: + performance_mode: QnnExecuTorchGpuPerformanceMode = ( + QnnExecuTorchGpuPerformanceMode.kGpuPerfHintHigh + ) + precision: QnnExecuTorchGpuPrecision = ( + QnnExecuTorchGpuPrecision.kGpuPrecisionUserProvided + ) + use_memory_optimizations: bool = True + use_node_optimizations: bool = True + use_queue_recording: bool = True + use_weight_sharing: bool = False + + @unique class QnnExecuTorchHtpPerformanceMode(IntEnum): kHtpDefault = 0 @@ -142,7 +171,8 @@ class QnnExecuTorchProfileLevel(IntEnum): @dataclass class QnnExecuTorchBackendOptions: backend_type: QnnExecuTorchBackendType - htp_options: QnnExecuTorchHtpBackendOptions + htp_options: Optional[QnnExecuTorchHtpBackendOptions] = None + gpu_options: Optional[QnnExecuTorchGpuBackendOptions] = None @unique diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index 4a0edaf471d..89d9cbbff4d 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -27,6 +27,7 @@ generate_context_binary, ModuleQConfig, prepare_pt2e, + QnnExecuTorchBackendType, QuantDtype, TestQNN, validate_context_binary, @@ -44,6 +45,7 @@ capture_program, dump_context_from_pte, from_context_binary, + generate_gpu_compiler_spec, generate_htp_compiler_spec, generate_qnn_executorch_compiler_spec, PyQnnManagerAdaptor, @@ -95,9 +97,16 @@ class TestQNNFloatingPointOperator(TestQNN): # TODO: refactor to support different backends def setUp(self): + match self.get_backend_type(): + case QnnExecuTorchBackendType.kHtpBackend: + backend_options = generate_htp_compiler_spec(use_fp16=True) + case QnnExecuTorchBackendType.kGpuBackend: + backend_options = generate_gpu_compiler_spec() + case _: + raise ValueError("Backend is not implemented yet") + TestQNN.atol = 1e-1 TestQNN.rtol = 1e-1 - backend_options = generate_htp_compiler_spec(use_fp16=True) TestQNN.compiler_specs = generate_qnn_executorch_compiler_spec( soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, @@ -5758,13 +5767,17 @@ def setup_environment(): default="", type=str, ) - + parser.add_argument( + "--backend", + help="Backend to be deployed ('htp'/'gpu' are currently supported).", + default="htp", + type=str, + ) parser.add_argument( "--pre_gen_pte", help="Run the pre-generated pte in the given directory.", type=str, ) - parser.add_argument( "--llama_artifacts", help="A folder that contains: weight, tokenizer, and params.", @@ -5793,6 +5806,7 @@ def setup_environment(): TestQNN.pre_gen_pte = args.pre_gen_pte TestQNN.llama_artifacts = args.llama_artifacts TestQNN.op_package_dir = args.op_package_dir + TestQNN.backend = args.backend return sys.argv[:1] + ns_args diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index 2e923b92250..05b0318f8c1 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -18,7 +18,10 @@ from executorch.backends.qualcomm.builders.node_visitor import dq_ops from executorch.backends.qualcomm.qnn_preprocess import QnnBackend from executorch.backends.qualcomm.quantizer.quantizer import ModuleQConfig, QuantDtype -from executorch.backends.qualcomm.serialization.qc_schema import QcomChipset +from executorch.backends.qualcomm.serialization.qc_schema import ( + QcomChipset, + QnnExecuTorchBackendType, +) from executorch.backends.qualcomm.utils.constants import ( QCOM_DTYPE, QCOM_PASS_ACTIVATE_KEY, @@ -178,6 +181,7 @@ class TestQNN(unittest.TestCase): compiler_specs: List[CompileSpec] = None chipset_table = get_soc_to_chipset_map() error_only = False + oss_repo: str = "" ip = "localhost" port = 8080 executorch_root: str = "" @@ -185,8 +189,10 @@ class TestQNN(unittest.TestCase): image_dataset: str = "" sentence_dataset: str = "" pretrained_weight: str = "" + model_name: str = "" enable_profile: bool = False op_package_dir: str = "" + backend: str = "" online_prepare: bool = False use_8a8w: str = "8a8w" use_16a16w: str = "16a16w" @@ -240,6 +246,9 @@ def _save_model_and_expected_output( return input_list, ref_outputs, pte_fname + def get_backend_type(self): + return getattr(QnnExecuTorchBackendType, f"k{self.backend.title()}Backend") + def required_envs(self, conditions=None) -> bool: conditions = [] if conditions is None else conditions return all( @@ -421,6 +430,7 @@ def validate_intermediate_tensor(): dump_intermediate_outputs=( True if expected_intermediate_events != -1 else False ), + backend=self.get_backend_type(), expected_input_shape=( (tensor.shape for tensor in processed_inputs) if check_io_shape diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py index 3471b0155bd..bb663077734 100644 --- a/backends/qualcomm/utils/utils.py +++ b/backends/qualcomm/utils/utils.py @@ -33,6 +33,8 @@ QcomChipset, QnnExecuTorchBackendOptions, QnnExecuTorchBackendType, + QnnExecuTorchGpuBackendOptions, + QnnExecuTorchGpuPrecision, QnnExecuTorchHtpBackendOptions, QnnExecuTorchHtpPerformanceMode, QnnExecuTorchHtpPrecision, @@ -925,6 +927,46 @@ def draw_graph(title, path, graph_module: torch.fx.GraphModule): f.write(graph.get_dot_graph().create_svg()) +def generate_gpu_compiler_spec( + precision: QnnExecuTorchGpuPrecision = QnnExecuTorchGpuPrecision.kGpuPrecisionUserProvided, + use_memory_optimizations: bool = True, + use_node_optimizations: bool = True, + use_queue_recording: bool = True, + use_weight_sharing: bool = False, +) -> QnnExecuTorchBackendOptions: + """ + Helper function generating backend options for QNN HTP + + Args: + precision: + kGpuPerfHintHigh - best inference latency at the expense of power consumption. + kGpuPerfHintNormal - balanced performance dependent upon power management. + kGpuPerfHintLow - lowest power consumption at the expense of inference latency. + use_memory_optimizations: If true, backend will share NATIVE tensor memory + based upon analysis of the network topology. + use_node_optimizations: If true, backend will fuse compatible operations into + one operation to improve performance. + use_queue_recording: If true, backend will use queue recording to improve performance. + use_weight_sharing: Used with multiple_graphs, where model size will be + reduced when operations have the same weights across multiple graphs. + + Returns: + QnnExecuTorchGpuBackendOptions: backend options for QNN GPU. + """ + # TODO: enable power config mechanism in runtime and make this as an option + gpu_options = QnnExecuTorchGpuBackendOptions() + gpu_options.precision = precision + gpu_options.use_memory_optimizations = use_memory_optimizations + gpu_options.use_node_optimizations = use_node_optimizations + gpu_options.use_queue_recording = use_queue_recording + gpu_options.use_weight_sharing = use_weight_sharing + + return QnnExecuTorchBackendOptions( + backend_type=QnnExecuTorchBackendType.kGpuBackend, + gpu_options=gpu_options, + ) + + def generate_htp_compiler_spec( use_fp16: bool, use_dlbc: bool = False, diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py index e70510b0b70..b7c35722b8b 100755 --- a/examples/qualcomm/utils.py +++ b/examples/qualcomm/utils.py @@ -28,6 +28,7 @@ ) from executorch.backends.qualcomm.serialization.qc_schema import ( QcomChipset, + QnnExecuTorchBackendType, QnnExecuTorchOpPackageOptions, ) from executorch.backends.qualcomm.utils.utils import ( @@ -78,6 +79,7 @@ def __init__( shared_buffer=False, dump_intermediate_outputs=False, runner="examples/qualcomm/executor_runner/qnn_executor_runner", + backend=QnnExecuTorchBackendType.kHtpBackend, expected_input_shape=None, expected_output_shape=None, ): @@ -97,6 +99,7 @@ def __init__( self.error_only = error_only self.shared_buffer = shared_buffer self.runner = runner + self.backend = backend self.expected_input_shape = expected_input_shape self.expected_output_shape = expected_output_shape self.extra_cmds = "" @@ -117,23 +120,34 @@ def push(self, inputs=None, input_list=None, files=None): self._adb(["shell", f"mkdir -p {self.workspace}"]) # necessary artifacts - artifacts = [ - *self.pte_path, - f"{self.qnn_sdk}/lib/aarch64-android/libQnnHtp.so", - ( - f"{self.qnn_sdk}/lib/hexagon-v{self.htp_arch}/" - f"unsigned/libQnnHtpV{self.htp_arch}Skel.so" - ), - ( - f"{self.qnn_sdk}/lib/aarch64-android/" - f"libQnnHtpV{self.htp_arch}Stub.so" - ), - f"{self.qnn_sdk}/lib/aarch64-android/libQnnHtpPrepare.so", - f"{self.qnn_sdk}/lib/aarch64-android/libQnnSystem.so", - f"{self.build_path}/{self.runner}", - f"{self.build_path}/backends/qualcomm/libqnn_executorch_backend.so", - f"{self.qnn_sdk}/lib/aarch64-android/libQnnModelDlc.so", - ] + artifacts = { + QnnExecuTorchBackendType.kHtpBackend: [ + f"{self.qnn_sdk}/lib/aarch64-android/libQnnHtp.so", + ( + f"{self.qnn_sdk}/lib/hexagon-v{self.htp_arch}/" + f"unsigned/libQnnHtpV{self.htp_arch}Skel.so" + ), + ( + f"{self.qnn_sdk}/lib/aarch64-android/" + f"libQnnHtpV{self.htp_arch}Stub.so" + ), + f"{self.qnn_sdk}/lib/aarch64-android/libQnnHtpPrepare.so", + ], + QnnExecuTorchBackendType.kGpuBackend: [ + f"{self.qnn_sdk}/lib/aarch64-android/libQnnGpu.so", + ], + }[self.backend] + + artifacts.extend( + [ + *self.pte_path, + f"{self.qnn_sdk}/lib/aarch64-android/libQnnSystem.so", + f"{self.build_path}/{self.runner}", + f"{self.build_path}/backends/qualcomm/libqnn_executorch_backend.so", + f"{self.qnn_sdk}/lib/aarch64-android/libQnnModelDlc.so", + ] + ) + input_list_file, input_files = generate_inputs( self.working_dir, self.input_list_filename, inputs, input_list )