Loading...
Searching...
No Matches
V210Compute.hpp
1#pragma once
2#include <Gfx/Graph/Utils.hpp>
4#include <Gfx/Graph/encoders/ComputeEncoder.hpp>
5#include <Gfx/Graph/encoders/GPUVideoEncoder.hpp>
6
7#include <private/qrhi_p.h>
8
9namespace score::gfx
10{
11
31{
32 // %1 = colorMatrixOut(...) shader defining convert_from_rgb(vec3)
33 static constexpr const char* compute_shader = R"_(#version 450
34 layout(local_size_x = 16, local_size_y = 1, local_size_z = 1) in;
35
36 layout(binding = 0) uniform sampler2D src_tex;
37 layout(std430, binding = 1) writeonly buffer V210Buf {
38 uint v210[];
39 };
40 layout(std140, binding = 2) uniform Params {
41 ivec2 src_size;
42 ivec2 _pad0;
43 uint line_stride_words; // bytes-per-row / 4
44 uint groups_per_row; // == src_width / 6
45 uint _pad1[2];
46 };
47
48 vec2 flip_y(vec2 tc) {
49 #if defined(QSHADER_MSL) || defined(QSHADER_HLSL)
50 return tc;
51 #else
52 return vec2(tc.x, 1.0 - tc.y);
53 #endif
54 }
55 )_" "%1" R"_(
56
57 uvec3 to_yuv10(vec3 rgb) {
58 vec3 yuv = clamp(convert_from_rgb(rgb), 0.0, 1.0);
59 return uvec3(yuv * 1023.0 + 0.5);
60 }
61
62 void main() {
63 uint group_x = gl_GlobalInvocationID.x;
64 uint y = gl_GlobalInvocationID.y;
65 if (group_x >= groups_per_row || int(y) >= src_size.y)
66 return;
67
68 // Y-flip on backends that need it (matches the fragment encoders).
69 ivec2 srcSize = src_size;
70 #if defined(QSHADER_MSL) || defined(QSHADER_HLSL)
71 int src_y = int(y);
72 #else
73 int src_y = srcSize.y - 1 - int(y);
74 #endif
75
76 uint x0 = group_x * 6u;
77 uvec3 a = to_yuv10(texelFetch(src_tex, ivec2(int(x0 ), src_y), 0).rgb);
78 uvec3 b = to_yuv10(texelFetch(src_tex, ivec2(int(x0 + 1u), src_y), 0).rgb);
79 uvec3 c = to_yuv10(texelFetch(src_tex, ivec2(int(x0 + 2u), src_y), 0).rgb);
80 uvec3 d = to_yuv10(texelFetch(src_tex, ivec2(int(x0 + 3u), src_y), 0).rgb);
81 uvec3 e = to_yuv10(texelFetch(src_tex, ivec2(int(x0 + 4u), src_y), 0).rgb);
82 uvec3 f = to_yuv10(texelFetch(src_tex, ivec2(int(x0 + 5u), src_y), 0).rgb);
83
84 uint cb01 = (a.y + b.y) >> 1;
85 uint cr01 = (a.z + b.z) >> 1;
86 uint cb23 = (c.y + d.y) >> 1;
87 uint cr23 = (c.z + d.z) >> 1;
88 uint cb45 = (e.y + f.y) >> 1;
89 uint cr45 = (e.z + f.z) >> 1;
90
91 // v210 packing: 6 pixels = 4 little-endian 32-bit words.
92 uint w0 = cb01 | (a.x << 10) | (cr01 << 20);
93 uint w1 = b.x | (cb23 << 10) | (c.x << 20);
94 uint w2 = cr23 | (d.x << 10) | (cb45 << 20);
95 uint w3 = e.x | (cr45 << 10) | (f.x << 20);
96
97 uint base = y * line_stride_words + group_x * 4u;
98 v210[base + 0u] = w0;
99 v210[base + 1u] = w1;
100 v210[base + 2u] = w2;
101 v210[base + 3u] = w3;
102 }
103 )_";
104
105 QRhiBuffer* m_paramsUBO{};
106 QRhiSampler* m_sampler{};
107 QRhiShaderResourceBindings* m_srb{};
108 QRhiComputePipeline* m_pipeline{};
109 int m_width{};
110 int m_height{};
111 int m_groupsPerRow{};
112 uint32_t m_lineStrideBytes{};
113
114 // Output buffer is owned by the caller. Pass it in init().
115 bool init(
116 QRhi& rhi, const RenderState& state, QRhiTexture* inputRGBA, int width,
117 int height, QRhiBuffer* outputBuffer,
118 const QString& colorConversion = colorMatrixOut()) override
119 {
120 if(!outputBuffer || width % 6 != 0)
121 return false;
122 if(!rhi.isFeatureSupported(QRhi::Compute))
123 return false;
124
125 m_width = width;
126 m_height = height;
127 m_groupsPerRow = width / 6;
128 m_lineStrideBytes = ((width + 47) / 48) * 128;
129
130 // Params UBO (std140, 32 bytes).
131 m_paramsUBO = rhi.newBuffer(QRhiBuffer::Dynamic, QRhiBuffer::UniformBuffer, 32);
132 m_paramsUBO->setName("V210ComputeEncoder::params");
133 if(!m_paramsUBO->create())
134 return false;
135
136 m_sampler = rhi.newSampler(
137 QRhiSampler::Nearest, QRhiSampler::Nearest, QRhiSampler::None,
138 QRhiSampler::ClampToEdge, QRhiSampler::ClampToEdge);
139 if(!m_sampler->create())
140 return false;
141
142 m_srb = rhi.newShaderResourceBindings();
143 m_srb->setBindings({
144 QRhiShaderResourceBinding::sampledTexture(
145 0, QRhiShaderResourceBinding::ComputeStage, inputRGBA, m_sampler),
146 QRhiShaderResourceBinding::bufferStore(
147 1, QRhiShaderResourceBinding::ComputeStage, outputBuffer),
148 QRhiShaderResourceBinding::uniformBuffer(
149 2, QRhiShaderResourceBinding::ComputeStage, m_paramsUBO),
150 });
151 if(!m_srb->create())
152 return false;
153
154 QShader cs = makeCompute(
155 state, QString::fromLatin1(compute_shader).arg(colorConversion));
156 m_pipeline = rhi.newComputePipeline();
157 m_pipeline->setShaderStage({QRhiShaderStage::Compute, cs});
158 m_pipeline->setShaderResourceBindings(m_srb);
159 if(!m_pipeline->create())
160 return false;
161
162 return true;
163 }
164
165 // Update params UBO + dispatch the compute pass. Caller has wrapped this
166 // inside an offscreen frame; the resource update batch must already be
167 // active (typical pattern: pass the batch in, encoder writes the UBO).
168 void exec(
169 QRhi& rhi, QRhiCommandBuffer& cb, QRhiResourceUpdateBatch* res) override
170 {
171 struct alignas(16) ParamsData
172 {
173 int32_t srcW, srcH;
174 int32_t pad0[2];
175 uint32_t lineStrideWords;
176 uint32_t groupsPerRow;
177 uint32_t pad1[2];
178 } p{
179 m_width, m_height,
180 {0, 0},
181 m_lineStrideBytes / 4,
182 static_cast<uint32_t>(m_groupsPerRow),
183 {0, 0}};
184 res->updateDynamicBuffer(m_paramsUBO, 0, sizeof(p), &p);
185
186 cb.beginComputePass(res);
187 cb.setComputePipeline(m_pipeline);
188 cb.setShaderResources(m_srb);
189 cb.dispatch(
190 (m_groupsPerRow + 15) / 16, // local_size_x = 16
191 m_height,
192 1);
193 cb.endComputePass();
194 }
195
196 void release() override
197 {
198 delete m_pipeline; m_pipeline = nullptr;
199 delete m_srb; m_srb = nullptr;
200 delete m_sampler; m_sampler = nullptr;
201 delete m_paramsUBO; m_paramsUBO = nullptr;
202 }
203};
204
205} // namespace score::gfx
GPU shader generation for RGB->YUV color space conversion (output/encoding).
Graphics rendering pipeline for ossia score.
Definition Filter/PreviewWidget.hpp:12
QShader makeCompute(const RenderState &v, QString compute)
Compile a compute shader.
Definition score-plugin-gfx/Gfx/Graph/Utils.cpp:674
Base interface for compute-shader RGBA -> AJA-format encoders.
Definition ComputeEncoder.hpp:36
Global state associated to a rendering context.
Definition RenderState.hpp:37
Compute-shader RGBA -> v210 encoder targeting an external SSBO.
Definition V210Compute.hpp:31