Skip to content

Commit

Permalink
Allow TLAS viewing in TraceRays cmd
Browse files Browse the repository at this point in the history
  • Loading branch information
nyorain committed Jun 16, 2024
1 parent 47cbb87 commit 47ff94e
Show file tree
Hide file tree
Showing 12 changed files with 165 additions and 80 deletions.
2 changes: 1 addition & 1 deletion .launch
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
/home/jan/code/iro/build/npt
--vil /home/jan/art/assets/sponza-new/main.gltf --no-validation
--vil /home/jan/art/assets/sponza/Sponza.gltf --no-validation
3 changes: 3 additions & 0 deletions docs/own/todo.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ urgent, bugs:
- [ ] maybe show full image size on hover?
- [ ] also fix mip/layer selector that sometimes automatically resets itself
(seen with slice 3D selector e.g. npt surfel lookup tex)
- [ ] finish submissions in order, see CommandHookSubmission::finish
comment for accelStruct captures
- [ ] immediately free HookedRecords that are not to be re-used in finish.

- [ ] test more on laptop, intel gpu
- [ ] seems like we do some nasty stuff in the histogram shaders,
Expand Down
8 changes: 3 additions & 5 deletions src/accelStruct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,10 @@ namespace vil {

// util
AccelStruct& accelStructAtLocked(Device& dev, VkDeviceAddress address) {
assertOwnedOrShared(dev.mutex);
auto it = dev.accelStructAddresses.find(address);
dlg_assertm(it != dev.accelStructAddresses.end(),
auto* accelStruct = tryAccelStructAtLocked(dev, address);
dlg_assertm(accelStruct,
"Couldn't find VkAccelerationStructure at address {}", address);
dlg_assert(it->second);
return *it->second;
return *accelStruct;
}

AccelStruct& accelStructAt(Device& dev, VkDeviceAddress address) {
Expand Down
26 changes: 18 additions & 8 deletions src/commandHook/hook.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -774,15 +774,16 @@ VkCommandBuffer CommandHook::doHook(CommandRecord& record,
auto foundCompletedIt = completed_.end();
auto completedCount = 0u;
auto hookNeededForCmd = bool(!dstCommand.empty());

if(allowReuse && (!hookAccelStructBuilds || !record.buildsAccelStructs)) {
// PERF: we might be able to make accel-struct-state re-using work.
// When no-one else references it (besides record and accelStruct itself).
// TODO: when re-use is not allowed, clear out finished records
// immediately somehow! Otherwise we might just grow and grow
// when a single record is resubmitted.
bool allowRecordReuse = this->allowReuse &&
(!hookAccelStructBuilds || !record.buildsAccelStructs);

if(allowRecordReuse) {
for(auto& hookRecord : record.hookRecords) {
// we can't use this record since it didn't hook a command and
// was just use for accelStruct data copying
if(hookNeededForCmd == hookRecord->hcommand.empty()) {
continue;
}

// the record is currently pending on the device
if(hookRecord->writer) {
continue;
Expand All @@ -793,6 +794,15 @@ VkCommandBuffer CommandHook::doHook(CommandRecord& record,
continue;
}

// We want to capture an acceleration structure.
// CommandHookSubmission::{activate, finish} might write into
// the dst state, assumes it wasn't used before.
// PERF: we could make this work in theory. Might need
// more complicated state re-using logic
if(!hookRecord->accelStructCaptures.empty()) {
continue;
}

if(hookRecord->state->refCount > 1u) {
// We can't reuse this hook record, its state is still needded
// somewhere, e.g. referenced in gui or our completed list.
Expand Down
30 changes: 18 additions & 12 deletions src/commandHook/record.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ void CommandHookRecord::hookRecord(Command* cmd, RecordInfo& info) {
}

void CommandHookRecord::copyDs(Command& bcmd, RecordInfo& info,
const DescriptorCopyOp& copyDesc,
const DescriptorCopyOp& copyDesc, unsigned dstID,
CommandHookState::CopiedDescriptor& dst,
IntrusivePtr<DescriptorSetCow>& dstCow) {
auto& dev = *record->dev;
Expand Down Expand Up @@ -752,30 +752,36 @@ void CommandHookRecord::copyDs(Command& bcmd, RecordInfo& info,
} else if(cat == DescriptorCategory::accelStruct) {
auto& elem = accelStructs(ds, bindingID)[elemID];

using CapturedAccelStruct = CommandHookState::CapturedAccelerationStruct;
using CapturedAccelStruct = CommandHookState::CapturedAccelStruct;
auto& dstCapture = dst.data.emplace<CapturedAccelStruct>();

// We only have to store the state now if the tlas has been
// previously built in this record. Otherwise we cannot know
// its state at execution time right now. It will get captured
// in CommandHookSubmission::activate
// in CommandHookSubmission::finish
auto tlasBuildPtr = lastAccelStructBuild(elem.accelStruct->deviceAddress);
if(tlasBuildPtr) {
dstCapture.tlas = std::move(tlasBuildPtr);
auto& inis = std::get<AccelInstances>(dstCapture.tlas->data);

// NOTE: only capture the blases built by this record.
// The rest will be captured in CommandHookSubmission::activate.
// The rest will be captured in CommandHookSubmission::finish.
// For BLASes not built in this record, we cannot tell their
// pending state at execution time for sure.
for(auto ini : inis.instances) {
auto blasAddress = ini.accelerationStructureReference;
auto builtStatePtr = lastAccelStructBuild(blasAddress);
if(builtStatePtr) {
dstCapture.blases.insert({blasAddress, std::move(builtStatePtr)});
// We just write everything we build into the map, we cannot
// know which blases are referenced by the tlas at this point on cpu.
// Yes, this might include blas builds *after* the TLAS was build.
// But when a blas was built that is inside the tlas,
// the tlas would be invalid at this point (application error).
for(auto& cmd : accelStructBuilds) {
for(auto& build : cmd.builds) {
dlg_assert_or(build.dst && build.state, continue);
auto blasAddress = build.dst->deviceAddress;
dstCapture.blases.insert({blasAddress, build.state});
}
}
}

accelStructCaptures.push_back({dstID, elem.accelStruct});
} else if(cat == DescriptorCategory::bufferView) {
// TODO: copy as buffer or image? maybe best to copy
// as buffer but then create bufferView on our own?
Expand Down Expand Up @@ -1127,7 +1133,7 @@ void CommandHookRecord::beforeDstOutsideRp(Command& bcmd, RecordInfo& info) {
for(auto [i, dc] : enumerate(info.ops.descriptorCopies)) {
if(dc.before) {
IntrusivePtr<DescriptorSetCow> tmpCow; // TODO: due to dsState removal
copyDs(bcmd, info, dc, state->copiedDescriptors[i], tmpCow);
copyDs(bcmd, info, dc, i, state->copiedDescriptors[i], tmpCow);
}
}

Expand Down Expand Up @@ -1222,7 +1228,7 @@ void CommandHookRecord::afterDstOutsideRp(Command& bcmd, RecordInfo& info) {
for(auto [i, dc] : enumerate(info.ops.descriptorCopies)) {
if(!dc.before) {
IntrusivePtr<DescriptorSetCow> tmpCow; // TODO: due to dsState removal
copyDs(bcmd, info, dc, state->copiedDescriptors[i], tmpCow);
copyDs(bcmd, info, dc, i, state->copiedDescriptors[i], tmpCow);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/commandHook/record.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ struct CommandHookRecord {
// = Copying =
void copyTransfer(Command& bcmd, RecordInfo&, bool isBefore);
void copyDs(Command& bcmd, RecordInfo&,
const DescriptorCopyOp&,
const DescriptorCopyOp&, unsigned copyDstID,
CommandHookState::CopiedDescriptor& dst,
IntrusivePtr<DescriptorSetCow>& dstCow);
void copyAttachment(const Command& bcmd, const RecordInfo&,
Expand Down
4 changes: 2 additions & 2 deletions src/commandHook/state.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ struct AttachmentCopyOp {

// Collection of data we got out of a submission/command.
struct CommandHookState {
struct CapturedAccelerationStruct {
struct CapturedAccelStruct {
IntrusivePtr<AccelStructState> tlas;
std::unordered_map<u64, IntrusivePtr<AccelStructState>> blases;
};
Expand All @@ -69,7 +69,7 @@ struct CommandHookState {
CopiedImage,
OwnBuffer,
CopiedImageToBuffer,
CapturedAccelerationStruct> data;
CapturedAccelStruct> data;
};

struct CopiedAttachment {
Expand Down
117 changes: 69 additions & 48 deletions src/commandHook/submission.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,40 +35,6 @@ CommandHookSubmission::~CommandHookSubmission() {
}

void CommandHookSubmission::activate() {
auto& dev = *record->hook->dev_;

// NOTE: the order (first capture/first update pending) should not
// matter here. All acceleration structures that are build by this
// command buffer should have already been entered into all captures.

// capture pending accelStruct states
for(auto& capture : record->accelStructCaptures) {
dlg_assert(capture.id < record->state->copiedDescriptors.size());
dlg_assert(capture.accelStruct);
dlg_assert(capture.accelStruct->effectiveType ==
VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);

using CapturedAccelStruct = CommandHookState::CapturedAccelerationStruct;

auto& dstDescriptor = record->state->copiedDescriptors[capture.id];
auto& dstCapture = std::get<CapturedAccelStruct>(dstDescriptor.data);
if (!dstCapture.tlas) { // already set when built during commandBuffer
dstCapture.tlas = capture.accelStruct->pendingState;
}

auto& tlasState = *dstCapture.tlas;
auto& inis = std::get<AccelInstances>(tlasState.data);

for(auto ini : inis.instances) {
auto address = ini.accelerationStructureReference;
auto& accelStruct = accelStructAtLocked(dev, address);
// NOTE: we use insert here by design. When the blas was
// built by the given commandBuffer, it was already
// inserted during record-hook-recording.
dstCapture.blases.insert({address, accelStruct.pendingState});
}
}

// set AccelStruct::pendingState for all build accelStructs.
for(auto& buildCmd : record->accelStructBuilds) {
for(auto& build : buildCmd.builds) {
Expand All @@ -82,24 +48,12 @@ void CommandHookSubmission::finish(Submission& subm) {
ZoneScoped;
dlg_assert(record->writer == &subm);

// Notify all accel struct builds that they have finished.
// We are guaranteed by the standard that all accelStructs build
// by the submission are still valid at this point.
for(auto& buildCmd : record->accelStructBuilds) {
for(auto& build : buildCmd.builds) {
dlg_assert(build.state);
build.state->built = true;

if(build.dst->pendingState == build.state) {
build.dst->lastValid = build.dst->pendingState;
}
}
}

// In this case the hook was invalidated, no longer interested in results.
// Since we are the only submission left to the record, it can be
// destroyed.
if(!record->hook) {
finishAccelStructBuilds();

record->writer = nullptr;
dlg_assert(!contains(record->record->hookRecords, record));
delete record;
Expand All @@ -112,6 +66,7 @@ void CommandHookSubmission::finish(Submission& subm) {
// when the record has no state, we don't have to transmit anything
if(!record->state) {
dlg_assert(record->hcommand.empty());
finishAccelStructBuilds();
return;
}

Expand Down Expand Up @@ -169,6 +124,53 @@ void CommandHookSubmission::finish(Submission& subm) {
}
}

// capture pending accelStruct states
// TODO: logically, we should do this in activate(). Here, the blas
// might already have gotten new states. But in activate() we might not
// know the instances on CPU side, as the TLAS might still be building.
// On the other hand, doing this here should be *relatively* safe as
// the tlas is invalidated when a blas is rebuilt.
// We should guarantee that submissions are always finished in order
// (submission order?) for stronger guarantees here.
// Alternatively, we could capture a map of ALL blases to their state
// in activate (meh).
auto& dev = *record->hook->dev_;
for(auto& capture : record->accelStructCaptures) {
dlg_assert(capture.id < record->state->copiedDescriptors.size());
dlg_assert(capture.accelStruct);
dlg_assert(capture.accelStruct->effectiveType ==
VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR);

using CapturedAccelStruct = CommandHookState::CapturedAccelStruct;

auto& dstDescriptor = record->state->copiedDescriptors[capture.id];
auto& dstCapture = std::get<CapturedAccelStruct>(dstDescriptor.data);
if(!dstCapture.tlas) { // already set when built during commandBuffer
dstCapture.tlas = capture.accelStruct->pendingState;
}

auto& tlasState = *dstCapture.tlas;
// TODO: this can probably fail due to missing finish ordering.
dlg_assert(tlasState.built);

auto& inis = std::get<AccelInstances>(tlasState.data);
for(auto ini : inis.instances) {
auto address = ini.accelerationStructureReference;
if(address) {
auto* accelStruct = tryAccelStructAtLocked(dev, address);
dlg_assertm(accelStruct, "Invalid BLAS in TLAS");
if(accelStruct) {
// NOTE: we use insert here by design. When the blas was
// built by the given commandBuffer, it was already
// inserted during record-hook-recording.
dstCapture.blases.insert({address, accelStruct->lastValid});
}
}
}
}

finishAccelStructBuilds();

CompletedHook* dstCompleted {};
if(record->localCapture) {
if(record->localCapture->flags & LocalCaptureBits::once) {
Expand Down Expand Up @@ -240,5 +242,24 @@ void CommandHookSubmission::transmitTiming() {
record->state->neededTime = diff;
}

void CommandHookSubmission::finishAccelStructBuilds() {
// Notify all accel struct builds that they have finished.
// We are guaranteed by the standard that all accelStructs build
// by the submission are still valid at this point.
for(auto& buildCmd : record->accelStructBuilds) {
for(auto& build : buildCmd.builds) {
dlg_assert(build.state);
build.state->built = true;

// TODO: needed?
build.state->buffer.invalidateMap();

if(build.dst->pendingState == build.state) {
build.dst->lastValid = build.dst->pendingState;
}
}
}
}

} // namespace vil

2 changes: 2 additions & 0 deletions src/commandHook/submission.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ struct CommandHookSubmission {
void finish(Submission&);
void transmitTiming();
void transmitIndirect();

void finishAccelStructBuilds();
};

} // namespace vil
41 changes: 39 additions & 2 deletions src/gui/command.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1027,7 +1027,45 @@ void CommandViewer::displayDs(Draw& draw) {
} else if(dsCat == DescriptorCategory::accelStruct) {
auto& elem = accelStructs(dsState, bindingID)[elemID];
refButtonExpect(gui, elem.accelStruct);
// TODO: show data of acceleration structure

// content
if(!hookState) {
ImGui::Text("Waiting for a submission...");
return;
}

if(!copiedData) {
dlg_error("couldn't find copied descriptor data");
ImGui::Text("Error copying descriptor. See log output");
return;
}

using CapturedAccelStruct = CommandHookState::CapturedAccelStruct;
auto* capture = std::get_if<CapturedAccelStruct>(&copiedData->data);
if(!capture || !capture->tlas) {
imGuiText("Error capturing TLAS. See log output");
return;
}

dlg_assert(capture->tlas->built);
auto resolveBlas = [&](u64 address) -> AccelStructStatePtr {
auto it = capture->blases.find(address);
if(it == capture->blases.end()) {
dlg_error("Invalid blas address {}", address);
return {};
}
return it->second;
};

auto& instances = std::get<AccelInstances>(capture->tlas->data);

// TODO: also display other instance information
dlg_trace("{} inis", instances.instances.size());
dlg_trace(" [0]: {}", instances.instances[0].accelerationStructureReference);
dlg_trace(" [1]: {}", instances.instances[1].accelerationStructureReference);
dlg_trace(" [2]: {}", instances.instances[2].accelerationStructureReference);

vertexViewer_.displayInstances(draw, instances, gui_->dt(), resolveBlas);
} else if(dsCat == DescriptorCategory::inlineUniformBlock) {
auto blockData = inlineUniformBlock(dsState, bindingID);
imGuiText("Inline Uniform Block, Size {}", blockData.size());
Expand Down Expand Up @@ -1323,7 +1361,6 @@ void CommandViewer::displayVertexViewer(Draw& draw) {
viewData_.mesh.output = true;
updateHook();
} else {
vertexViewer_.updateInput(gui_->dt());
vertexViewer_.displayOutput(draw, *drawCmd, *hookState, gui_->dt());
}

Expand Down
1 change: 1 addition & 0 deletions src/gui/resources.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1431,6 +1431,7 @@ void ResourceGui::drawDesc(Draw& draw, AccelStruct& accelStruct) {
auto blasResolver = [&](u64 address) -> std::pair<AccelStruct*, AccelStructStatePtr> {
assertOwnedOrShared(dev.mutex);
auto* blas = tryAccelStructAtLocked(dev, address);
dlg_assert(blas);
if(!blas) {
return {nullptr, nullptr};
}
Expand Down
Loading

0 comments on commit 47ff94e

Please sign in to comment.