Skip to content

Qualcomm AI Engine Direct - gpu support part1 #12165

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion backends/qualcomm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ backends/qualcomm
| | # Meanwhile, this is also the runtime responsbile for executing compiled
| | # models on a device.
| └── backends # Backends supported by QNN.
| └── htpbackend
| └── gpu / htp
| ├── aarch64 # Configuration required to run on device. (Device Part).
| └── x86_64 # Configuration required to compile graph on host. (AoT Part).
├── scripts # Misc supporting scripts, not related to core functionality.
Expand Down
69 changes: 44 additions & 25 deletions backends/qualcomm/runtime/backends/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,57 +43,72 @@ target_sources(
${CMAKE_CURRENT_LIST_DIR}/QnnProfiler.cpp
)

# qnn_device
set(HOST_ARCHITECTURE
${CMAKE_CURRENT_LIST_DIR}/htpbackend/${CMAKE_SYSTEM_PROCESSOR}
set(HOST_ARCHITECTURE_GPU
${CMAKE_CURRENT_LIST_DIR}/gpu/${CMAKE_SYSTEM_PROCESSOR}
)
set(HOST_ARCHITECTURE_HTP
${CMAKE_CURRENT_LIST_DIR}/htp/${CMAKE_SYSTEM_PROCESSOR}
)
set(HOST_ARCHITECTURE_IR
${CMAKE_CURRENT_LIST_DIR}/ir/${CMAKE_SYSTEM_PROCESSOR}
)

# qnn_device
target_sources(
qnn_device
PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnDeviceCommon.h
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDevice.h
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuDevice.h
${CMAKE_CURRENT_LIST_DIR}/htp/HtpDevice.h
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnDeviceCommon.cpp
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDevice.cpp
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDevicePlatformInfoConfig.h
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpDeviceCustomConfig.h
${CMAKE_CURRENT_LIST_DIR}/htp/HtpDevice.cpp
${CMAKE_CURRENT_LIST_DIR}/htp/HtpDevicePlatformInfoConfig.h
${CMAKE_CURRENT_LIST_DIR}/htp/HtpDeviceCustomConfig.h
# When offline prepare context cache in x86 host we have to provide
# platform infomation and SocModel to Qnn
${HOST_ARCHITECTURE}/HtpDevicePlatformInfoConfig.cpp
${HOST_ARCHITECTURE}/HtpDeviceCustomConfig.cpp
${HOST_ARCHITECTURE_HTP}/HtpDevicePlatformInfoConfig.cpp
${HOST_ARCHITECTURE_HTP}/HtpDeviceCustomConfig.cpp
)

# qnn_context
target_sources(
qnn_context
PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnContextCommon.h
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContext.h
${CMAKE_CURRENT_LIST_DIR}/irbackend/IrContext.h
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuContext.h
${CMAKE_CURRENT_LIST_DIR}/htp/HtpContext.h
${CMAKE_CURRENT_LIST_DIR}/ir/IrContext.h
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnContextCommon.cpp
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContext.cpp
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpContextCustomConfig.h
${HOST_ARCHITECTURE}/HtpContextCustomConfig.cpp
${CMAKE_CURRENT_LIST_DIR}/irbackend/${CMAKE_SYSTEM_PROCESSOR}/IrContext.cpp
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuContext.cpp
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuContextCustomConfig.h
${HOST_ARCHITECTURE_GPU}/GpuContextCustomConfig.cpp
${CMAKE_CURRENT_LIST_DIR}/htp/HtpContext.cpp
${CMAKE_CURRENT_LIST_DIR}/htp/HtpContextCustomConfig.h
${HOST_ARCHITECTURE_HTP}/HtpContextCustomConfig.cpp
${HOST_ARCHITECTURE_IR}/IrContext.cpp
)

# qnn_backend_cache
target_sources(
qnn_backend_cache
PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCache.h
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackendCache.h
${CMAKE_CURRENT_LIST_DIR}/htp/HtpBackendCache.h
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCache.cpp
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackendCache.cpp
${CMAKE_CURRENT_LIST_DIR}/htp/HtpBackendCache.cpp
)

# qnn_graph
target_sources(
qnn_graph
PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnGraphCommon.h
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraph.h
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraph.h
${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraph.h
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnGraphCommon.cpp
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraph.cpp
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraphCustomConfig.h
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpGraphCustomConfig.cpp
${HOST_ARCHITECTURE}/HtpGraphCustomConfig.cpp
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraph.cpp
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraphCustomConfig.h
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuGraphCustomConfig.cpp
${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraph.cpp
${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraphCustomConfig.h
${CMAKE_CURRENT_LIST_DIR}/htp/HtpGraphCustomConfig.cpp
${HOST_ARCHITECTURE_HTP}/HtpGraphCustomConfig.cpp
)

# qnn_op_package_manager
Expand All @@ -107,9 +122,13 @@ target_sources(
target_sources(
qnn_backend
PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCommon.h
${CMAKE_CURRENT_LIST_DIR}/htpbackend/HtpBackend.h
${CMAKE_CURRENT_LIST_DIR}/irbackend/IrBackend.h
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackend.h
${CMAKE_CURRENT_LIST_DIR}/htp/HtpBackend.h
${CMAKE_CURRENT_LIST_DIR}/ir/IrBackend.h
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnBackendCommon.cpp
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackend.cpp
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackendCustomConfig.h
${CMAKE_CURRENT_LIST_DIR}/gpu/GpuBackendCustomConfig.cpp
)

# qnn_mem_manager
Expand Down Expand Up @@ -137,5 +156,5 @@ target_sources(
target_sources(
qnn_dlc_manager
PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnDlcManager.h
PRIVATE ${CMAKE_CURRENT_LIST_DIR}/irbackend/${CMAKE_SYSTEM_PROCESSOR}/QnnDlcManager.cpp
PRIVATE ${HOST_ARCHITECTURE_IR}/QnnDlcManager.cpp
)
64 changes: 60 additions & 4 deletions backends/qualcomm/runtime/backends/QnnBackendFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,16 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
EnumNameQnnExecuTorchHtpPdSession(htp_options->pd_session()));
QNN_EXECUTORCH_LOG_INFO(
"use_conv_hmx in htp_options: %d", htp_options->use_conv_hmx());
QNN_EXECUTORCH_LOG_INFO(
"use_dlbc in htp_options: %d", htp_options->use_dlbc());
QNN_EXECUTORCH_LOG_INFO(
"use_fold_relu in htp_options: %d", htp_options->use_fold_relu());
QNN_EXECUTORCH_LOG_INFO(
"use_multi_contexts in htp_options: %d",
htp_options->use_multi_contexts());
QNN_EXECUTORCH_LOG_INFO(
"use_weight_sharing in htp_options: %d",
htp_options->use_weight_sharing());
}
backend_params->qnn_backend_ptr_ =
std::make_unique<HtpBackend>(implementation, logger);
Expand All @@ -78,19 +86,67 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
options->profile_level(),
options->soc_info(),
htp_options);
backend_params->qnn_mem_manager_ptr_ = std::make_unique<QnnMemManager>(
} break;
case QnnExecuTorchBackendType::kGpuBackend: {
auto gpu_options = options->backend_options()->gpu_options();
if (options->log_level() >= QnnExecuTorchLogLevel::kLogLevelInfo) {
QNN_EXECUTORCH_LOG_INFO(
"performance_mode in gpu_options: %s",
EnumNameQnnExecuTorchGpuPerformanceMode(
gpu_options->performance_mode()));
QNN_EXECUTORCH_LOG_INFO(
"precision in gpu_options: %s",
EnumNameQnnExecuTorchGpuPrecision(gpu_options->precision()));
QNN_EXECUTORCH_LOG_INFO(
"use_memory_optimizations in gpu_options: %d",
gpu_options->use_memory_optimizations());
QNN_EXECUTORCH_LOG_INFO(
"use_node_optimizations in gpu_options: %d",
gpu_options->use_node_optimizations());
QNN_EXECUTORCH_LOG_INFO(
"use_queue_recording in gpu_options: %d",
gpu_options->use_queue_recording());
QNN_EXECUTORCH_LOG_INFO(
"use_weight_sharing in gpu_options: %d",
gpu_options->use_weight_sharing());
}
backend_params->qnn_backend_ptr_ =
std::make_unique<GpuBackend>(implementation, logger, gpu_options);

backend_params->qnn_device_ptr_ =
std::make_unique<GpuDevice>(implementation, logger);

backend_params->qnn_backend_cache_ptr_ =
std::make_unique<QnnBackendCache>(qnn_context_blob);

backend_params->qnn_context_ptr_ = std::make_unique<GpuContext>(
implementation,
backend_params->qnn_backend_ptr_.get(),
backend_params->qnn_device_ptr_.get(),
backend_params->qnn_backend_cache_ptr_.get(),
qnn_dlc_manager,
gpu_options);

backend_params->qnn_graph_ptr_ = std::make_unique<GpuGraph>(
implementation,
backend_params->qnn_backend_ptr_.get(),
backend_params->qnn_context_ptr_.get(),
options->log_level());
backend_params->backend_init_state_ = BackendInitializeState::INITIALIZED;
options->profile_level(),
gpu_options);
} break;
case QnnExecuTorchBackendType::kGpuBackend:
case QnnExecuTorchBackendType::kDspBackend:
case QnnExecuTorchBackendType::kUndefinedBackend:
default:
return nullptr;
}

backend_params->qnn_mem_manager_ptr_ = std::make_unique<QnnMemManager>(
implementation,
backend_params->qnn_context_ptr_.get(),
options->log_level());

backend_params->backend_init_state_ = BackendInitializeState::INITIALIZED;

if (backend_params->qnn_backend_ptr_->VerifyQNNSDKVersion() == Error::Ok) {
return backend_params;
}
Expand Down
14 changes: 9 additions & 5 deletions backends/qualcomm/runtime/backends/QnnBackendFactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,15 @@
#include <executorch/backends/qualcomm/runtime/backends/QnnImplementation.h>
#include <executorch/backends/qualcomm/runtime/backends/QnnLogger.h>
#include <executorch/backends/qualcomm/runtime/backends/QnnMemManager.h>
#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h>
#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpBackendCache.h>
#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h>
#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h>
#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h>
#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuBackend.h>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm slightly worried about the runtime size increase, that usually is a requirement for production. Do we know how much size increase with this PR? If I have a model runs on HTP only, can the runtime include HTP only?

#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuContext.h>
#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuDevice.h>
#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuGraph.h>
#include <executorch/backends/qualcomm/runtime/backends/htp/HtpBackend.h>
#include <executorch/backends/qualcomm/runtime/backends/htp/HtpBackendCache.h>
#include <executorch/backends/qualcomm/runtime/backends/htp/HtpContext.h>
#include <executorch/backends/qualcomm/runtime/backends/htp/HtpDevice.h>
#include <executorch/backends/qualcomm/runtime/backends/htp/HtpGraph.h>

#include <memory>
namespace executorch {
Expand Down
2 changes: 1 addition & 1 deletion backends/qualcomm/runtime/backends/QnnDeviceCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class QnnDevice {
return handle_;
}

executorch::runtime::Error Configure();
virtual executorch::runtime::Error Configure();

protected:
virtual executorch::runtime::Error MakeConfig(
Expand Down
2 changes: 1 addition & 1 deletion backends/qualcomm/runtime/backends/QnnDlcManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

#include <QnnTypes.h>
#include <executorch/backends/qualcomm/runtime/backends/QnnBackendFactory.h>
#include <executorch/backends/qualcomm/runtime/backends/irbackend/IrContext.h>
#include <executorch/backends/qualcomm/runtime/backends/ir/IrContext.h>

#include "QnnWrapperUtils.hpp"
namespace executorch {
Expand Down
62 changes: 62 additions & 0 deletions backends/qualcomm/runtime/backends/gpu/GpuBackend.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright (c) Qualcomm Innovation Center, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuBackend.h>

#include "GPU/QnnGpuCommon.h"

namespace executorch {
namespace backends {
namespace qnn {

using executorch::runtime::Error;

GpuBackend::GpuBackend(
const QnnImplementation& implementation,
QnnLogger* logger,
const QnnExecuTorchGpuBackendOptions* gpu_options)
: QnnBackend(implementation, logger) {
gpu_backend_custom_config_ =
std::make_unique<GpuBackendCustomConfig>(gpu_options);
}

Qnn_Version_t GpuBackend::GetExpectedBackendVersion() const {
Qnn_Version_t backend_version;
backend_version.major = QNN_GPU_API_VERSION_MAJOR;
backend_version.minor = QNN_GPU_API_VERSION_MINOR;
backend_version.patch = QNN_GPU_API_VERSION_PATCH;
return backend_version;
}

bool GpuBackend::IsProfileEventTypeParentOfNodeTime(
QnnProfile_EventType_t event_type) {
return (event_type == QNN_PROFILE_EVENTTYPE_EXECUTE);
}

Error GpuBackend::MakeConfig(std::vector<const QnnBackend_Config_t*>& config) {
const std::vector<QnnBackend_CustomConfig_t>& backend_custom_config =
gpu_backend_custom_config_->CreateBackendCustomConfig();

uint32_t num_custom_configs = backend_custom_config.size();
backend_config_.resize(num_custom_configs);
// +1 for null terminated
config.reserve(num_custom_configs + 1);

for (std::size_t i = 0; i < num_custom_configs; ++i) {
backend_config_[i].option = QNN_BACKEND_CONFIG_OPTION_CUSTOM;
backend_config_[i].customConfig = backend_custom_config[i];
config.push_back(&backend_config_[i]);
}

config.push_back(nullptr);
return Error::Ok;
}

} // namespace qnn
} // namespace backends
} // namespace executorch
40 changes: 40 additions & 0 deletions backends/qualcomm/runtime/backends/gpu/GpuBackend.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright (c) Qualcomm Innovation Center, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <executorch/backends/qualcomm/runtime/backends/gpu/GpuBackendCustomConfig.h>

namespace executorch {
namespace backends {
namespace qnn {

class GpuBackend : public QnnBackend {
public:
GpuBackend(
const QnnImplementation& implementation,
QnnLogger* logger,
const QnnExecuTorchGpuBackendOptions* gpu_options);

Qnn_Version_t GetExpectedBackendVersion() const override;

bool IsProfileEventTypeParentOfNodeTime(
QnnProfile_EventType_t event_type) override;

protected:
executorch::runtime::Error MakeConfig(
std::vector<const QnnBackend_Config_t*>& config) override;

private:
std::vector<QnnBackend_Config_t> backend_config_;
std::unique_ptr<GpuBackendCustomConfig> gpu_backend_custom_config_;
};

} // namespace qnn
} // namespace backends
} // namespace executorch
Loading
Loading