36 static constexpr int MAX_PACKED_ATTRIBUTES = 8;
41 const halp::dynamic_gpu_geometry&
mesh,
42 std::span<const packed_attribute_spec> specs)
44 if(specs.empty() || specs.size() > MAX_PACKED_ATTRIBUTES)
46 qDebug() <<
"PackedExtractionStrategy: Invalid attribute count:" << specs.size();
50 m_vertexCount =
mesh.vertices;
51 m_hasIndexBuffer =
mesh.index.buffer >= 0;
57 std::fill(std::begin(m_srcBufferMapping), std::end(m_srcBufferMapping), -1);
60 for(
const auto& spec : specs)
62 auto lookup = findAttribute(
mesh, spec.location);
65 qDebug() <<
"PackedExtractionStrategy: Attribute not found:"
66 << magic_enum::enum_name(spec.location);
70 auto& info = m_attributes[m_attributeCount];
71 info.is_active =
true;
72 info.src_buffer_index = lookup->input->buffer;
73 info.src_stride = lookup->binding->stride;
74 info.src_offset = lookup->attribute->byte_offset
75 +
static_cast<int32_t
>(lookup->input->byte_offset);
76 info.element_count = attributeFormatComponents(lookup->attribute->format);
77 info.is_float = isFloatFormat(lookup->attribute->format);
78 info.output_components
79 = (spec.pad_to_vec4 && info.element_count < 4 && info.is_float)
82 info.output_offset = m_outputStride;
84 m_outputStride += info.output_components *
sizeof(float);
87 int mappedIndex = m_srcBufferMapping[info.src_buffer_index];
90 if(m_srcBufferCount >= MAX_PACKED_ATTRIBUTES)
92 qDebug() <<
"PackedExtractionStrategy: Too many source buffers";
95 m_srcBufferMapping[info.src_buffer_index] = m_srcBufferCount;
96 m_srcBuffers[m_srcBufferCount]
97 =
static_cast<QRhiBuffer*
>(
mesh.buffers[info.src_buffer_index].handle);
104 if(m_attributeCount == 0)
106 qDebug() <<
"PackedExtractionStrategy: No valid attributes found";
113 if(
mesh.index.buffer < 0
114 ||
mesh.index.buffer >=
static_cast<int>(
mesh.buffers.size()))
116 qDebug() <<
"PackedExtractionStrategy: Invalid index buffer";
119 m_indexBuffer =
static_cast<QRhiBuffer*
>(
mesh.buffers[
mesh.index.buffer].handle);
120 m_indexOffset =
static_cast<int32_t
>(
mesh.index.byte_offset);
121 m_indexFormat32 = (
mesh.index.format == halp::index_format::uint32);
124 m_outputSize =
static_cast<int64_t
>(m_vertexCount) * m_outputStride;
126 if(m_outputSize == 0)
128 qDebug() <<
"PackedExtractionStrategy: Zero output size";
133 m_outputBuffer = rhi.newBuffer(
134 QRhiBuffer::Static, QRhiBuffer::StorageBuffer | QRhiBuffer::VertexBuffer,
135 static_cast<quint32
>(m_outputSize));
136 m_outputBuffer->setName(
"GeometryPacker::m_outputBuffer");
138 if(!m_outputBuffer || !m_outputBuffer->create())
140 qDebug() <<
"PackedExtractionStrategy: Failed to create output buffer";
144 return createPipeline(renderState, rhi);
148 QRhi& rhi,
const halp::dynamic_gpu_geometry&
mesh,
149 std::span<const packed_attribute_spec> specs)
152 const int64_t newSize =
static_cast<int64_t
>(
mesh.vertices) * m_outputStride;
154 if(newSize != m_outputSize ||
mesh.vertices != m_vertexCount)
156 m_vertexCount =
mesh.vertices;
157 m_outputSize = newSize;
161 m_outputBuffer->setSize(
static_cast<quint32
>(m_outputSize));
162 m_outputBuffer->create();
167 std::fill(std::begin(m_srcBufferMapping), std::end(m_srcBufferMapping), -1);
168 m_srcBufferCount = 0;
171 for(
const auto& spec : specs)
173 if(attrIdx >= m_attributeCount)
176 auto lookup = findAttribute(
mesh, spec.location);
180 auto& info = m_attributes[attrIdx];
181 info.src_buffer_index = lookup->input->buffer;
182 info.src_stride = lookup->binding->stride;
183 info.src_offset = lookup->attribute->byte_offset
184 +
static_cast<int32_t
>(lookup->input->byte_offset);
186 int mappedIndex = m_srcBufferMapping[info.src_buffer_index];
189 m_srcBufferMapping[info.src_buffer_index] = m_srcBufferCount;
190 m_srcBuffers[m_srcBufferCount]
191 =
static_cast<QRhiBuffer*
>(
mesh.buffers[info.src_buffer_index].handle);
199 if(m_hasIndexBuffer &&
mesh.index.buffer >= 0)
201 m_indexBuffer =
static_cast<QRhiBuffer*
>(
mesh.buffers[
mesh.index.buffer].handle);
202 m_indexOffset =
static_cast<int32_t
>(
mesh.index.byte_offset);
203 m_indexFormat32 = (
mesh.index.format == halp::index_format::uint32);
210 void release()
noexcept
213 m_pipeline =
nullptr;
218 delete m_uniformBuffer;
219 m_uniformBuffer =
nullptr;
225 std::fill(std::begin(m_srcBuffers), std::end(m_srcBuffers),
nullptr);
226 m_indexBuffer =
nullptr;
229 void runCompute(QRhi& rhi, QRhiCommandBuffer& cb, QRhiResourceUpdateBatch*& res)
231 if(!m_dirty || m_vertexCount == 0 || !m_pipeline)
235 struct alignas(16) AttributeParams
237 uint32_t srcBufferIndex;
238 uint32_t srcStrideBytes;
239 uint32_t srcOffsetBytes;
240 uint32_t elementCount;
241 uint32_t outputComponents;
242 uint32_t outputOffsetBytes;
247 struct alignas(64) Params
249 uint32_t vertexCount;
250 uint32_t attributeCount;
251 uint32_t outputStrideBytes;
252 uint32_t hasIndexBuffer;
253 uint32_t indexOffsetBytes;
256 alignas(64) AttributeParams attributes[MAX_PACKED_ATTRIBUTES];
258 static_assert(offsetof(Params, attributes[0]) == 64);
260 params.vertexCount =
static_cast<uint32_t
>(m_vertexCount);
261 params.attributeCount =
static_cast<uint32_t
>(m_attributeCount);
262 params.outputStrideBytes =
static_cast<uint32_t
>(m_outputStride);
263 params.hasIndexBuffer = m_hasIndexBuffer ? 1u : 0u;
264 params.indexOffsetBytes =
static_cast<uint32_t
>(m_indexOffset);
265 params.index32Bit = m_indexFormat32 ? 1u : 0u;
267 for(
int i = 0; i < MAX_PACKED_ATTRIBUTES; ++i)
269 const auto& info = m_attributes[i];
270 auto& ap = params.attributes[i];
275 =
static_cast<uint32_t
>(m_srcBufferMapping[info.src_buffer_index]);
276 ap.srcStrideBytes =
static_cast<uint32_t
>(info.src_stride);
277 ap.srcOffsetBytes =
static_cast<uint32_t
>(info.src_offset);
278 ap.elementCount =
static_cast<uint32_t
>(info.element_count);
279 ap.outputComponents =
static_cast<uint32_t
>(info.output_components);
280 ap.outputOffsetBytes =
static_cast<uint32_t
>(info.output_offset);
289 res->updateDynamicBuffer(m_uniformBuffer, 0,
sizeof(params), ¶ms);
291 cb.beginComputePass(res);
292 cb.setComputePipeline(m_pipeline);
293 cb.setShaderResources(m_srb);
295 const int workgroups = (m_vertexCount + 255) / 256;
296 cb.dispatch(workgroups, 1, 1);
300 res = rhi.nextResourceUpdateBatch();
308 .buffer = m_outputBuffer,
310 .size = m_outputSize,
314 [[nodiscard]] int32_t outputStride()
const noexcept {
return m_outputStride; }
316 [[nodiscard]] int32_t attributeCount()
const noexcept {
return m_attributeCount; }
320 return m_attributes[index];
323 [[nodiscard]]
static constexpr bool needsCompute()
noexcept {
return true; }
328 m_uniformBuffer = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, 512);
329 m_uniformBuffer->setName(
"GeometryPacker::m_uniformBuffer");
331 if(!m_uniformBuffer || !m_uniformBuffer->create())
333 qDebug() <<
"PackedExtractionStrategy: UBO creation failed";
337 static const QString shaderCode = QStringLiteral(R
"(#version 450
339layout(local_size_x = 256) in;
341struct AttributeParams {
346 uint outputComponents;
347 uint outputOffsetBytes;
352layout(std140, binding = 0) uniform Params {
355 uint outputStrideBytes;
357 uint indexOffsetBytes;
360 AttributeParams attributes[8];
363layout(std430, binding = 1) readonly buffer SrcBuffer0 { uint src0[]; };
364layout(std430, binding = 2) readonly buffer SrcBuffer1 { uint src1[]; };
365layout(std430, binding = 3) readonly buffer SrcBuffer2 { uint src2[]; };
366layout(std430, binding = 4) readonly buffer SrcBuffer3 { uint src3[]; };
367layout(std430, binding = 5) readonly buffer SrcBuffer4 { uint src4[]; };
368layout(std430, binding = 6) readonly buffer SrcBuffer5 { uint src5[]; };
369layout(std430, binding = 7) readonly buffer SrcBuffer6 { uint src6[]; };
370layout(std430, binding = 8) readonly buffer SrcBuffer7 { uint src7[]; };
372layout(std430, binding = 9) readonly buffer IndexBuffer { uint index_data[]; };
374layout(std430, binding = 10) writeonly buffer DstBuffer { uint dst_data[]; };
376uint readSrcData(uint bufferIndex, uint wordIndex)
380 case 0: return src0[wordIndex];
381 case 1: return src1[wordIndex];
382 case 2: return src2[wordIndex];
383 case 3: return src3[wordIndex];
384 case 4: return src4[wordIndex];
385 case 5: return src5[wordIndex];
386 case 6: return src6[wordIndex];
387 case 7: return src7[wordIndex];
392uint readIndex(uint i)
396 uint wordIndex = (indexOffsetBytes / 4) + i;
397 return index_data[wordIndex];
401 uint bytePos = indexOffsetBytes + i * 2;
402 uint wordIndex = bytePos / 4;
403 uint word = index_data[wordIndex];
404 uint shift = (bytePos % 4) * 8;
405 return (word >> shift) & 0xFFFFu;
411 uint outputIdx = gl_GlobalInvocationID.x;
412 if (outputIdx >= vertexCount)
415 // Get source vertex index (may differ from output index if indexed)
416 uint srcVertexIdx = hasIndexBuffer != 0 ? readIndex(outputIdx) : outputIdx;
418 // Output base position (in uints, since outputStrideBytes is in bytes)
419 uint dstBase = (outputIdx * outputStrideBytes) / 4;
421 // Process each attribute
422 for (uint a = 0; a < attributeCount; ++a)
424 AttributeParams attr = attributes[a];
425 if (attr.isActive == 0)
429 uint srcBase = (srcVertexIdx * attr.srcStrideBytes + attr.srcOffsetBytes) / 4;
431 // Destination position for this attribute
432 uint attrDstBase = dstBase + (attr.outputOffsetBytes / 4);
435 for (uint i = 0; i < attr.elementCount; ++i)
437 uint value = readSrcData(attr.srcBufferIndex, srcBase + i);
438 dst_data[attrDstBase + i] = value;
441 // Pad to vec4 if needed
442 if (attr.outputComponents > attr.elementCount)
444 for (uint i = attr.elementCount; i < attr.outputComponents; ++i)
446 // w = 1.0f (0x3f800000), others = 0
447 dst_data[attrDstBase + i] = (i == 3) ? 0x3f800000u : 0u;
455 if(!shader.isValid())
457 qDebug() <<
"PackedExtractionStrategy: Shader compilation failed";
461 m_srb = rhi.newShaderResourceBindings();
466 qDebug() <<
"PackedExtractionStrategy: SRB creation failed";
470 m_pipeline = rhi.newComputePipeline();
471 m_pipeline->setShaderResourceBindings(m_srb);
472 m_pipeline->setShaderStage({QRhiShaderStage::Compute, shader});
474 if(!m_pipeline->create())
476 qDebug() <<
"PackedExtractionStrategy: Pipeline creation failed";
484 void updateBindings()
489 QVarLengthArray<QRhiShaderResourceBinding, 12> bindings;
493 QRhiShaderResourceBinding::uniformBuffer(
494 0, QRhiShaderResourceBinding::ComputeStage, m_uniformBuffer));
497 QRhiBuffer* placeholderBuffer =
nullptr;
498 for(
int i = 0; i < m_srcBufferCount; ++i)
502 placeholderBuffer = m_srcBuffers[i];
506 if(!placeholderBuffer && m_outputBuffer)
508 placeholderBuffer = m_outputBuffer;
511 for(
int i = 0; i < MAX_PACKED_ATTRIBUTES; ++i)
513 QRhiBuffer* buf = (i < m_srcBufferCount && m_srcBuffers[i]) ? m_srcBuffers[i]
516 QRhiShaderResourceBinding::bufferLoad(
517 1 + i, QRhiShaderResourceBinding::ComputeStage, buf));
522 = m_hasIndexBuffer && m_indexBuffer ? m_indexBuffer : placeholderBuffer;
524 QRhiShaderResourceBinding::bufferLoad(
525 9, QRhiShaderResourceBinding::ComputeStage, idxBuf));
529 QRhiShaderResourceBinding::bufferStore(
530 10, QRhiShaderResourceBinding::ComputeStage, m_outputBuffer));
532 m_srb->setBindings(bindings.cbegin(), bindings.cend());
536 QRhiBuffer* m_srcBuffers[MAX_PACKED_ATTRIBUTES]{};
537 int m_srcBufferMapping[32]{};
538 int m_srcBufferCount{};
540 QRhiBuffer* m_indexBuffer{};
541 QRhiBuffer* m_outputBuffer{};
542 QRhiShaderResourceBindings* m_srb{};
543 QRhiComputePipeline* m_pipeline{};
544 QRhiBuffer* m_uniformBuffer{};
547 int32_t m_attributeCount{};
548 int32_t m_vertexCount{};
549 int32_t m_outputStride{};
550 int32_t m_indexOffset{};
551 int64_t m_outputSize{};
553 bool m_hasIndexBuffer{
false};
554 bool m_indexFormat32{
true};