Optimize Lod and Grad texture sampling
If the Lod doesn't vary across any sample, then we can get away with
performing a single texture sample call. The added cost of a reactor If
is balanced by potentially removing 3 calls altogether.
Bug: b/133868964
Bug: b/163791974
Tests: dEQP-VK.texture.subgroup_lod.texelfetch
Tests: dEQP-VK.glsl.texture_functions.texelfetch.*
Tests: dEQP-VK.glsl.texture_functions.texturelod.*
Tests: dEQP-VK.glsl.texture_functions.texturegrad.*
Change-Id: Ib637653f78d3d5aa149352648c710d5f48526ede
Reviewed-on: https://swiftshader-review.googlesource.com/c/SwiftShader/+/55908
Tested-by: Sean Risser <srisser@google.com>
Commit-Queue: Sean Risser <srisser@google.com>
Reviewed-by: Nicolas Capens <nicolascapens@google.com>
diff --git a/src/Pipeline/SpirvShaderSampling.cpp b/src/Pipeline/SpirvShaderSampling.cpp
index 6aedc33..ef3bb5b 100644
--- a/src/Pipeline/SpirvShaderSampling.cpp
+++ b/src/Pipeline/SpirvShaderSampling.cpp
@@ -173,15 +173,18 @@
// For explicit-lod instructions the LOD can be different per SIMD lane. SamplerCore currently assumes
// a single LOD per four elements, so we sample the image again for each LOD separately.
- if(samplerFunction.method == Lod || samplerFunction.method == Grad) // TODO(b/133868964): Also handle divergent Bias and Fetch with Lod.
+ // TODO(b/133868964) Pass down 4 component lodOrBias, dsx, and dsy to sampleTexture
+ if(samplerFunction.method == Lod || samplerFunction.method == Grad)
{
+ // Only perform per-lane sampling if LOD diverges or we're doing Grad sampling.
+ Bool perLaneSampling = samplerFunction.method == Grad || lodOrBias.x != lodOrBias.y ||
+ lodOrBias.x != lodOrBias.z || lodOrBias.x != lodOrBias.w;
auto lod = Pointer<Float>(&lodOrBias);
-
- For(Int i = 0, i < SIMD::Width, i++)
+ Int i = 0;
+ Do
{
SIMD::Float dPdx;
SIMD::Float dPdy;
-
dPdx.x = Pointer<Float>(&dsx.x)[i];
dPdx.y = Pointer<Float>(&dsx.y)[i];
dPdx.z = Pointer<Float>(&dsx.z)[i];
@@ -192,12 +195,26 @@
Vector4f sample = s.sampleTexture(texture, uvwa, dRef, lod[i], dPdx, dPdy, offset, sampleId, samplerFunction);
- Pointer<Float> rgba = out;
- rgba[0 * SIMD::Width + i] = Pointer<Float>(&sample.x)[i];
- rgba[1 * SIMD::Width + i] = Pointer<Float>(&sample.y)[i];
- rgba[2 * SIMD::Width + i] = Pointer<Float>(&sample.z)[i];
- rgba[3 * SIMD::Width + i] = Pointer<Float>(&sample.w)[i];
+ If(perLaneSampling)
+ {
+ Pointer<Float> rgba = out;
+ rgba[0 * SIMD::Width + i] = Pointer<Float>(&sample.x)[i];
+ rgba[1 * SIMD::Width + i] = Pointer<Float>(&sample.y)[i];
+ rgba[2 * SIMD::Width + i] = Pointer<Float>(&sample.z)[i];
+ rgba[3 * SIMD::Width + i] = Pointer<Float>(&sample.w)[i];
+ i++;
+ }
+ Else
+ {
+ Pointer<SIMD::Float> rgba = out;
+ rgba[0] = sample.x;
+ rgba[1] = sample.y;
+ rgba[2] = sample.z;
+ rgba[3] = sample.w;
+ i = SIMD::Width;
+ }
}
+ Until(i == SIMD::Width);
}
else
{