Revert "Support large kernels on GPU in matrix convolution effect"
This reverts commit 1ed4391fe7d218caf21754de0932b5e96043621e.
Reason for revert: Looks like some bad images showed up at gold.skia.org and that the ProcessorCloneTest is crashing on Windows bots:
https://logs.chromium.org/logs/skia/4bfabe0bad476911/+/steps/dm/0/stdout
Original change's description:
> Support large kernels on GPU in matrix convolution effect
>
> Currently matrix convolution falls back to CPU execution for large kernels, due to the argument limit for fragment shaders.
>
> Now for large kernels, we store them in a texture and sample them in a shader to sidestep the limit.
>
> Change-Id: Icc069a701ea8e9cd0adf75f4bfd149fd22e31afd
> Bug: skia:8449
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/263495
> Reviewed-by: Michael Ludwig <michaelludwig@google.com>
> Commit-Queue: Adlai Holler <adlai@google.com>
TBR=robertphillips@google.com,michaelludwig@google.com,adlai@google.com
Change-Id: Iaf4858131046a343481bcf0fd9cc3919d9fc2bda
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: skia:8449
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/287736
Reviewed-by: Brian Salomon <bsalomon@google.com>
Commit-Queue: Brian Salomon <bsalomon@google.com>
diff --git a/gm/matrixconvolution.cpp b/gm/matrixconvolution.cpp
index 6164dbe..5897ffb 100644
--- a/gm/matrixconvolution.cpp
+++ b/gm/matrixconvolution.cpp
@@ -79,7 +79,7 @@
return SkImageFilters::MatrixConvolution({3,3}, kernel.data(), /* gain */ 0.3f, /* bias */ SkIntToScalar(100), kernelOffset, tileMode, convolveAlpha, nullptr, cropRect);
}
case kLarge_KernelFixture: {
- // Intentionally go over the uniform kernel size limit of 25.
+ // Intentionally go over the MAX_KERNEL_SIZE limit and trigger CPU fallback.
// All 1s except center value, which is -47 (sum of 1).
std::vector<SkScalar> kernel(49, SkIntToScalar(1));
kernel[24] = SkIntToScalar(-47);
diff --git a/src/core/SkGpuBlurUtils.cpp b/src/core/SkGpuBlurUtils.cpp
index 8dd06d1..7ff294c 100644
--- a/src/core/SkGpuBlurUtils.cpp
+++ b/src/core/SkGpuBlurUtils.cpp
@@ -128,9 +128,8 @@
SkIPoint kernelOffset = SkIPoint::Make(radiusX, radiusY);
GrPaint paint;
auto wm = SkTileModeToWrapMode(mode);
- auto conv = GrMatrixConvolutionEffect::MakeGaussian(context, std::move(srcView), srcBounds,
- size, 1.0, 0.0, kernelOffset, wm, true,
- sigmaX, sigmaY,
+ auto conv = GrMatrixConvolutionEffect::MakeGaussian(std::move(srcView), srcBounds, size, 1.0,
+ 0.0, kernelOffset, wm, true, sigmaX, sigmaY,
*renderTargetContext->caps());
paint.addColorFragmentProcessor(std::move(conv));
paint.setPorterDuffXPFactory(SkBlendMode::kSrc);
@@ -448,8 +447,7 @@
if (scaleFactorX == 1 && scaleFactorY == 1) {
// For really small blurs (certainly no wider than 5x5 on desktop GPUs) it is faster to just
// launch a single non separable kernel vs two launches.
- const int kernelSize = (2 * radiusX + 1) * (2 * radiusY + 1);
- if (sigmaX > 0 && sigmaY > 0 && kernelSize <= GrMatrixConvolutionEffect::kMaxUniformSize) {
+ if (sigmaX > 0 && sigmaY > 0 && (2 * radiusX + 1) * (2 * radiusY + 1) <= MAX_KERNEL_SIZE) {
// Apply the proxy offset to src bounds and offset directly
return convolve_gaussian_2d(context, std::move(srcView), srcColorType, srcBounds,
dstBounds, radiusX, radiusY, sigmaX, sigmaY, mode,
diff --git a/src/effects/imagefilters/SkMatrixConvolutionImageFilter.cpp b/src/effects/imagefilters/SkMatrixConvolutionImageFilter.cpp
index 0bac7eb..8b242c8 100644
--- a/src/effects/imagefilters/SkMatrixConvolutionImageFilter.cpp
+++ b/src/effects/imagefilters/SkMatrixConvolutionImageFilter.cpp
@@ -391,7 +391,9 @@
}
#if SK_SUPPORT_GPU
- if (ctx.gpuBacked()) {
+ // Note: if the kernel is too big, the GPU path falls back to SW
+ if (ctx.gpuBacked() &&
+ fKernelSize.width() * fKernelSize.height() <= MAX_KERNEL_SIZE) {
auto context = ctx.getContext();
// Ensure the input is in the destination color space. Typically applyCropRect will have
@@ -412,8 +414,7 @@
// Map srcBounds from input's logical image domain to that of the proxy
srcBounds.offset(input->subset().x(), input->subset().y());
- auto fp = GrMatrixConvolutionEffect::Make(context,
- std::move(inputView),
+ auto fp = GrMatrixConvolutionEffect::Make(std::move(inputView),
srcBounds,
fKernelSize,
fKernel,
diff --git a/src/gpu/GrFragmentProcessor.h b/src/gpu/GrFragmentProcessor.h
index efc4c9a..194bb29 100644
--- a/src/gpu/GrFragmentProcessor.h
+++ b/src/gpu/GrFragmentProcessor.h
@@ -490,8 +490,6 @@
TextureSampler(GrSurfaceProxyView, GrSamplerState = {});
- TextureSampler(TextureSampler&&) = default;
- TextureSampler& operator=(TextureSampler&&) = default;
TextureSampler& operator=(const TextureSampler&) = delete;
bool operator==(const TextureSampler& that) const {
diff --git a/src/gpu/effects/GrMatrixConvolutionEffect.cpp b/src/gpu/effects/GrMatrixConvolutionEffect.cpp
index c108d34..8bb5a56 100644
--- a/src/gpu/effects/GrMatrixConvolutionEffect.cpp
+++ b/src/gpu/effects/GrMatrixConvolutionEffect.cpp
@@ -6,10 +6,6 @@
*/
#include "src/gpu/effects/GrMatrixConvolutionEffect.h"
-#include "src/gpu/GrBitmapTextureMaker.h"
-#include "src/gpu/GrContextPriv.h"
-#include "src/gpu/GrProxyProvider.h"
-#include "src/gpu/GrRecordingContextPriv.h"
#include "src/gpu/GrTexture.h"
#include "src/gpu/GrTextureProxy.h"
#include "src/gpu/effects/GrTextureEffect.h"
@@ -34,151 +30,52 @@
UniformHandle fKernelOffsetUni;
UniformHandle fGainUni;
UniformHandle fBiasUni;
- UniformHandle fKernelBiasUni;
typedef GrGLSLFragmentProcessor INHERITED;
};
-GrMatrixConvolutionEffect::KernelWrapper GrMatrixConvolutionEffect::KernelWrapper::Make(
- GrRecordingContext* context, SkISize size, const SkScalar* values) {
- if (nullptr == context || nullptr == values || size.isEmpty()) {
- return {};
- }
- const int length = size.area();
- // Small kernel -> just fill the array.
- KernelWrapper result(size);
- if (length <= kMaxUniformSize) {
- for (int i = 0; i < length; i++) {
- result.fArray[i] = SkScalarToFloat(values[i]);
- }
- return result;
- }
-
- ScalableSampler& scalableSampler = result.fScalableSampler;
- // Determine min and max values to figure out inner gain & bias.
- SkScalar min = values[0];
- SkScalar max = values[0];
- for (int i = 1; i < length; i++) {
- if (values[i] < min) {
- min = values[i];
- }
- if (values[i] > max) {
- max = values[i];
- }
- }
- // Treat near-0 gain (i.e. box blur) as 1, and let the kernelBias
- // move everything up to the final value.
- const SkScalar computedGain = max - min;
- scalableSampler.fGain = SkScalarNearlyZero(computedGain) ? 1.0f : SkScalarToFloat(computedGain);
- // Inner bias is pre-inner-gain so we divide that out.
- scalableSampler.fBias = SkScalarToFloat(min) / scalableSampler.fGain;
-
- static const GrUniqueKey::Domain kDomain = GrUniqueKey::GenerateDomain();
- GrUniqueKey key;
- GrUniqueKey::Builder builder(&key, kDomain, length, "Matrix Convolution Kernel");
- // Texture cache key is the exact content of the kernel.
- static_assert(sizeof(float) == 4);
- for (int i = 0; i < length; i++) {
- builder[i] = *(const uint32_t*)&values[i];
- }
- builder.finish();
-
- // Find or create a texture.
- GrProxyProvider* proxyProvider = context->priv().proxyProvider();
- GrSurfaceProxyView view;
- if (sk_sp<GrTextureProxy> kernelProxy = proxyProvider->findOrCreateProxyByUniqueKey(key)) {
- GrSwizzle swizzle =
- context->priv().caps()->getReadSwizzle(kernelProxy->backendFormat(),
- GrColorType::kAlpha_8);
- view = {std::move(kernelProxy), kTopLeft_GrSurfaceOrigin, swizzle};
- } else {
- SkBitmap bm;
- if (!bm.tryAllocPixels(SkImageInfo::MakeA8(GrNextPow2(length), 1))) {
- return {};
- }
- for (int i = 0; i < length; i++) {
- *(bm.getAddr8(i, 0)) =
- SkScalarRoundToInt((values[i] - min) / scalableSampler.fGain * 255);
- }
- bm.setImmutable();
- GrBitmapTextureMaker maker(context, bm, GrImageTexGenPolicy::kNew_Uncached_Budgeted);
- view = maker.view(GrMipMapped::kNo);
- if (!view) {
- return {};
- }
- proxyProvider->assignUniqueKeyToProxy(key, view.asTextureProxy());
- }
- scalableSampler.fSampler = { std::move(view) };
- return result;
-}
-
-bool GrMatrixConvolutionEffect::KernelWrapper::operator==(const KernelWrapper& k) const {
- return fSize == k.fSize &&
- (this->isSampled() ? fScalableSampler == k.fScalableSampler : fArray == k.fArray);
-}
-
-bool GrMatrixConvolutionEffect::KernelWrapper::ScalableSampler::operator==(
- const ScalableSampler& k) const {
- return fSampler == k.fSampler && fGain == k.fGain && fBias == k.fBias;
-}
-
void GrGLMatrixConvolutionEffect::emitCode(EmitArgs& args) {
const GrMatrixConvolutionEffect& mce = args.fFp.cast<GrMatrixConvolutionEffect>();
- int kernelWidth = mce.kernelSize().width();
- int kernelHeight = mce.kernelSize().height();
+ int kWidth = mce.kernelSize().width();
+ int kHeight = mce.kernelSize().height();
- int arrayCount = (kernelWidth * kernelHeight + 3) / 4;
- SkASSERT(4 * arrayCount >= kernelWidth * kernelHeight);
+ int arrayCount = (kWidth * kHeight + 3) / 4;
+ SkASSERT(4 * arrayCount >= kWidth * kHeight);
GrGLSLUniformHandler* uniformHandler = args.fUniformHandler;
- if (mce.kernelIsSampled()) {
- fKernelBiasUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag,
- kFloat_GrSLType, "KernelBias");
- } else {
- fKernelUni = uniformHandler->addUniformArray(&mce, kFragment_GrShaderFlag,
- kFloat4_GrSLType, "Kernel", arrayCount);
- }
+ fKernelUni = uniformHandler->addUniformArray(&mce, kFragment_GrShaderFlag, kHalf4_GrSLType,
+ "Kernel",
+ arrayCount);
fKernelOffsetUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, kHalf2_GrSLType,
"KernelOffset");
- fGainUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, kFloat_GrSLType, "Gain");
- fBiasUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, kFloat_GrSLType, "Bias");
+ fGainUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, kHalf_GrSLType, "Gain");
+ fBiasUni = uniformHandler->addUniform(&mce, kFragment_GrShaderFlag, kHalf_GrSLType, "Bias");
const char* kernelOffset = uniformHandler->getUniformCStr(fKernelOffsetUni);
+ const char* kernel = uniformHandler->getUniformCStr(fKernelUni);
const char* gain = uniformHandler->getUniformCStr(fGainUni);
const char* bias = uniformHandler->getUniformCStr(fBiasUni);
GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
SkString coords2D = fragBuilder->ensureCoords2D(args.fTransformedCoords[0].fVaryingPoint,
mce.sampleMatrix());
- fragBuilder->codeAppend("float4 sum = float4(0, 0, 0, 0);");
+ fragBuilder->codeAppend("half4 sum = half4(0, 0, 0, 0);");
fragBuilder->codeAppendf("float2 coord = %s - %s;", coords2D.c_str(), kernelOffset);
- fragBuilder->codeAppend("float4 c;");
+ fragBuilder->codeAppend("half4 c;");
- for (int y = 0; y < kernelHeight; y++) {
- for (int x = 0; x < kernelWidth; x++) {
+ const char* kVecSuffix[4] = { ".x", ".y", ".z", ".w" };
+ for (int y = 0; y < kHeight; y++) {
+ for (int x = 0; x < kWidth; x++) {
GrGLSLShaderBuilder::ShaderBlock block(fragBuilder);
- int offset = y*kernelWidth + x;
+ int offset = y*kWidth + x;
- if (mce.kernelIsSampled()) {
- const char* kernelBias = uniformHandler->getUniformCStr(fKernelBiasUni);
- float xCoord = offset / (float)GrNextPow2(mce.kernelSize().area());
-
- fragBuilder->codeAppend("float k = ");
- fragBuilder->appendTextureLookup(args.fTexSamplers[0],
- SkSL::String::printf("half2(%f, 0.5)", xCoord).c_str());
- fragBuilder->codeAppendf(".w + %s;", kernelBias);
- } else {
- static constexpr const char* kVecSuffix[4] = { ".x", ".y", ".z", ".w" };
- const char* kernel = uniformHandler->getUniformCStr(fKernelUni);
- fragBuilder->codeAppendf("float k = %s[%d]%s;", kernel, offset / 4,
- kVecSuffix[offset & 0x3]);
- }
-
+ fragBuilder->codeAppendf("half k = %s[%d]%s;", kernel, offset / 4,
+ kVecSuffix[offset & 0x3]);
SkSL::String coord;
coord.appendf("coord + half2(%d, %d)", x, y);
auto sample = this->invokeChild(0, args, coord);
- fragBuilder->codeAppendf("float4 c = %s;", sample.c_str());
+ fragBuilder->codeAppendf("half4 c = %s;", sample.c_str());
if (!mce.convolveAlpha()) {
fragBuilder->codeAppend("c.rgb /= c.a;");
fragBuilder->codeAppend("c.rgb = saturate(c.rgb);");
@@ -187,16 +84,15 @@
}
}
if (mce.convolveAlpha()) {
- fragBuilder->codeAppendf("%s = half4(sum * %s + %s);", args.fOutputColor, gain, bias);
+ fragBuilder->codeAppendf("%s = sum * %s + %s;", args.fOutputColor, gain, bias);
fragBuilder->codeAppendf("%s.a = saturate(%s.a);", args.fOutputColor, args.fOutputColor);
fragBuilder->codeAppendf("%s.rgb = clamp(%s.rgb, 0.0, %s.a);",
args.fOutputColor, args.fOutputColor, args.fOutputColor);
} else {
auto sample = this->invokeChild(0, args, coords2D.c_str());
fragBuilder->codeAppendf("c = %s;", sample.c_str());
- fragBuilder->codeAppendf("%s.a = half(c.a);", args.fOutputColor);
- fragBuilder->codeAppendf("%s.rgb = half3(saturate(sum.rgb * %s + %s));",
- args.fOutputColor, gain, bias);
+ fragBuilder->codeAppendf("%s.a = c.a;", args.fOutputColor);
+ fragBuilder->codeAppendf("%s.rgb = saturate(sum.rgb * %s + %s);", args.fOutputColor, gain, bias);
fragBuilder->codeAppendf("%s.rgb *= %s.a;", args.fOutputColor, args.fOutputColor);
}
fragBuilder->codeAppendf("%s *= %s;\n", args.fOutputColor, args.fInputColor);
@@ -215,22 +111,17 @@
const GrFragmentProcessor& processor) {
const GrMatrixConvolutionEffect& conv = processor.cast<GrMatrixConvolutionEffect>();
pdman.set2fv(fKernelOffsetUni, 1, conv.kernelOffset().ptr());
- float totalGain = conv.gain();
- if (conv.kernelIsSampled()) {
- totalGain *= conv.kernelSampleGain();
- pdman.set1f(fKernelBiasUni, conv.kernelSampleBias());
- } else {
- int kernelCount = conv.kernelSize().area();
- int arrayCount = (kernelCount + 3) / 4;
- SkASSERT(4 * arrayCount >= kernelCount);
- pdman.set4fv(fKernelUni, arrayCount, conv.kernel());
- }
+ int kernelCount = conv.kernelSize().width() * conv.kernelSize().height();
+ int arrayCount = (kernelCount + 3) / 4;
+ SkASSERT(4 * arrayCount >= kernelCount);
+ pdman.set4fv(fKernelUni, arrayCount, conv.kernel());
+ pdman.set1f(fGainUni, conv.gain());
pdman.set1f(fBiasUni, conv.bias());
- pdman.set1f(fGainUni, totalGain);
}
GrMatrixConvolutionEffect::GrMatrixConvolutionEffect(std::unique_ptr<GrFragmentProcessor> child,
- KernelWrapper kernel,
+ const SkISize& kernelSize,
+ const SkScalar* kernel,
SkScalar gain,
SkScalar bias,
const SkIPoint& kernelOffset,
@@ -238,14 +129,14 @@
// To advertise either the modulation or opaqueness optimizations we'd have to examine the
// parameters.
: INHERITED(kGrMatrixConvolutionEffect_ClassID, kNone_OptimizationFlags)
- , fKernel(std::move(kernel))
+ , fKernelSize(kernelSize)
, fGain(SkScalarToFloat(gain))
, fBias(SkScalarToFloat(bias) / 255.0f)
, fConvolveAlpha(convolveAlpha) {
child->setSampledWithExplicitCoords();
this->registerChildProcessor(std::move(child));
- if (fKernel.isSampled()) {
- this->setTextureSamplerCnt(1);
+ for (int i = 0; i < kernelSize.width() * kernelSize.height(); i++) {
+ fKernel[i] = SkScalarToFloat(kernel[i]);
}
fKernelOffset = {static_cast<float>(kernelOffset.x()),
static_cast<float>(kernelOffset.y())};
@@ -254,7 +145,7 @@
GrMatrixConvolutionEffect::GrMatrixConvolutionEffect(const GrMatrixConvolutionEffect& that)
: INHERITED(kGrMatrixConvolutionEffect_ClassID, kNone_OptimizationFlags)
- , fKernel(that.fKernel)
+ , fKernelSize(that.fKernelSize)
, fGain(that.fGain)
, fBias(that.fBias)
, fKernelOffset(that.fKernelOffset)
@@ -262,9 +153,7 @@
auto child = that.childProcessor(0).clone();
child->setSampledWithExplicitCoords();
this->registerChildProcessor(std::move(child));
- if (fKernel.isSampled()) {
- this->setTextureSamplerCnt(1);
- }
+ std::copy_n(that.fKernel, fKernelSize.width() * fKernelSize.height(), fKernel);
this->addCoordTransform(&fCoordTransform);
}
@@ -283,18 +172,14 @@
bool GrMatrixConvolutionEffect::onIsEqual(const GrFragmentProcessor& sBase) const {
const GrMatrixConvolutionEffect& s = sBase.cast<GrMatrixConvolutionEffect>();
- return fKernel == s.fKernel &&
+ return fKernelSize == s.kernelSize() &&
+ std::equal(fKernel, fKernel + fKernelSize.area(), s.fKernel) &&
fGain == s.gain() &&
fBias == s.bias() &&
fKernelOffset == s.kernelOffset() &&
fConvolveAlpha == s.convolveAlpha();
}
-const GrFragmentProcessor::TextureSampler& GrMatrixConvolutionEffect::onTextureSampler(
- int index) const {
- return IthTextureSampler(index, fKernel.scalableSampler().fSampler);
-}
-
static void fill_in_1D_gaussian_kernel_with_stride(float* kernel, int size, int stride,
float twoSigmaSqrd) {
SkASSERT(!SkScalarNearlyZero(twoSigmaSqrd, SK_ScalarNearlyZero));
@@ -319,6 +204,7 @@
static void fill_in_2D_gaussian_kernel(float* kernel, int width, int height,
SkScalar sigmaX, SkScalar sigmaY) {
+ SkASSERT(width * height <= MAX_KERNEL_SIZE);
const float twoSigmaSqrdX = 2.0f * SkScalarToFloat(SkScalarSquare(sigmaX));
const float twoSigmaSqrdY = 2.0f * SkScalarToFloat(SkScalarSquare(sigmaY));
@@ -374,8 +260,7 @@
}
}
-std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::Make(GrRecordingContext* context,
- GrSurfaceProxyView srcView,
+std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::Make(GrSurfaceProxyView srcView,
const SkIRect& srcBounds,
const SkISize& kernelSize,
const SkScalar* kernel,
@@ -385,19 +270,14 @@
GrSamplerState::WrapMode wm,
bool convolveAlpha,
const GrCaps& caps) {
- auto kw = KernelWrapper::Make(context, kernelSize, kernel);
- if (!kw.isValid()) {
- return nullptr;
- }
GrSamplerState sampler(wm, GrSamplerState::Filter::kNearest);
auto child = GrTextureEffect::MakeSubset(std::move(srcView), kPremul_SkAlphaType, SkMatrix::I(),
sampler, SkRect::Make(srcBounds), caps);
return std::unique_ptr<GrFragmentProcessor>(new GrMatrixConvolutionEffect(
- std::move(child), std::move(kw), gain, bias, kernelOffset, convolveAlpha));
+ std::move(child), kernelSize, kernel, gain, bias, kernelOffset, convolveAlpha));
}
std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::MakeGaussian(
- GrRecordingContext* context,
GrSurfaceProxyView srcView,
const SkIRect& srcBounds,
const SkISize& kernelSize,
@@ -409,11 +289,11 @@
SkScalar sigmaX,
SkScalar sigmaY,
const GrCaps& caps) {
- SkAutoSTMalloc<32, float> kernel(kernelSize.area());
- fill_in_2D_gaussian_kernel(kernel.get(), kernelSize.width(), kernelSize.height(),
- sigmaX, sigmaY);
- return Make(context, std::move(srcView), srcBounds, kernelSize, kernel.get(),
- gain, bias, kernelOffset, wm, convolveAlpha, caps);
+ float kernel[MAX_KERNEL_SIZE];
+
+ fill_in_2D_gaussian_kernel(kernel, kernelSize.width(), kernelSize.height(), sigmaX, sigmaY);
+ return Make(std::move(srcView), srcBounds, kernelSize, kernel, gain, bias, kernelOffset, wm,
+ convolveAlpha, caps);
}
GR_DEFINE_FRAGMENT_PROCESSOR_TEST(GrMatrixConvolutionEffect);
@@ -422,9 +302,8 @@
std::unique_ptr<GrFragmentProcessor> GrMatrixConvolutionEffect::TestCreate(GrProcessorTestData* d) {
auto [view, ct, at] = d->randomView();
- static constexpr size_t kMaxTestKernelSize = 2 * kMaxUniformSize;
- int width = d->fRandom->nextRangeU(1, kMaxTestKernelSize);
- int height = d->fRandom->nextRangeU(1, kMaxTestKernelSize / width);
+ int width = d->fRandom->nextRangeU(1, MAX_KERNEL_SIZE);
+ int height = d->fRandom->nextRangeU(1, MAX_KERNEL_SIZE / width);
SkISize kernelSize = SkISize::Make(width, height);
std::unique_ptr<SkScalar[]> kernel(new SkScalar[width * height]);
for (int i = 0; i < width * height; i++) {
@@ -446,8 +325,8 @@
auto wm = static_cast<GrSamplerState::WrapMode>(
d->fRandom->nextULessThan(GrSamplerState::kWrapModeCount));
bool convolveAlpha = d->fRandom->nextBool();
- return GrMatrixConvolutionEffect::Make(d->context()->priv().asRecordingContext(),
- std::move(view),
+
+ return GrMatrixConvolutionEffect::Make(std::move(view),
bounds,
kernelSize,
kernel.get(),
diff --git a/src/gpu/effects/GrMatrixConvolutionEffect.h b/src/gpu/effects/GrMatrixConvolutionEffect.h
index 125fa7f..5ff61cc 100644
--- a/src/gpu/effects/GrMatrixConvolutionEffect.h
+++ b/src/gpu/effects/GrMatrixConvolutionEffect.h
@@ -9,17 +9,14 @@
#define GrMatrixConvolutionEffect_DEFINED
#include "src/gpu/GrFragmentProcessor.h"
-#include <array>
-#include <new>
+
+// A little bit less than the minimum # uniforms required by DX9SM2 (32).
+// Allows for a 5x5 kernel (or 25x1, for that matter).
+#define MAX_KERNEL_SIZE 25
class GrMatrixConvolutionEffect : public GrFragmentProcessor {
public:
- // A little bit less than the minimum # uniforms required by DX9SM2 (32).
- // Allows for a 5x5 kernel (or 25x1, for that matter).
- static constexpr int kMaxUniformSize = 25;
-
- static std::unique_ptr<GrFragmentProcessor> Make(GrRecordingContext*,
- GrSurfaceProxyView srcView,
+ static std::unique_ptr<GrFragmentProcessor> Make(GrSurfaceProxyView srcView,
const SkIRect& srcBounds,
const SkISize& kernelSize,
const SkScalar* kernel,
@@ -30,8 +27,7 @@
bool convolveAlpha,
const GrCaps&);
- static std::unique_ptr<GrFragmentProcessor> MakeGaussian(GrRecordingContext*,
- GrSurfaceProxyView srcView,
+ static std::unique_ptr<GrFragmentProcessor> MakeGaussian(GrSurfaceProxyView srcView,
const SkIRect& srcBounds,
const SkISize& kernelSize,
SkScalar gain,
@@ -44,12 +40,9 @@
const GrCaps&);
const SkIRect& bounds() const { return fBounds; }
- SkISize kernelSize() const { return fKernel.size(); }
+ const SkISize& kernelSize() const { return fKernelSize; }
const SkV2 kernelOffset() const { return fKernelOffset; }
- bool kernelIsSampled() const { return fKernel.isSampled(); }
- const float *kernel() const { return fKernel.array().data(); }
- float kernelSampleGain() const { return fKernel.scalableSampler().fGain; }
- float kernelSampleBias() const { return fKernel.scalableSampler().fBias; }
+ const float* kernel() const { return fKernel; }
float gain() const { return fGain; }
float bias() const { return fBias; }
bool convolveAlpha() const { return fConvolveAlpha; }
@@ -59,72 +52,11 @@
std::unique_ptr<GrFragmentProcessor> clone() const override;
private:
- /**
- * Small kernels are represented as float-arrays and uploaded as uniforms.
- * Large kernels go over the uniform limit and are uploaded as textures and sampled.
- */
- class KernelWrapper {
- public:
- struct ScalableSampler {
- TextureSampler fSampler;
- // Applied before any other math.
- float fBias = 0.0f;
- // Premultiplied in with user gain to save time.
- float fGain = 1.0f;
- bool operator==(const ScalableSampler&) const;
- };
- static KernelWrapper Make(GrRecordingContext*, SkISize, const float* values);
-
- KernelWrapper(KernelWrapper&& that) : fSize(that.fSize) {
- if (that.isSampled()) {
- new (&fScalableSampler) ScalableSampler(std::move(that.fScalableSampler));
- } else {
- new (&fArray) std::array<float, kMaxUniformSize>(std::move(that.fArray));
- }
- }
- KernelWrapper(const KernelWrapper& that) : fSize(that.fSize) {
- if (that.isSampled()) {
- new (&fScalableSampler) ScalableSampler(that.fScalableSampler);
- } else {
- new (&fArray) std::array<float, kMaxUniformSize>(that.fArray);
- }
- }
- ~KernelWrapper() {
- if (this->isSampled()) {
- fScalableSampler.~ScalableSampler();
- }
- }
-
- bool isValid() const { return !fSize.isEmpty(); }
- SkISize size() const { return fSize; }
- bool isSampled() const { return fSize.area() > kMaxUniformSize; }
- const std::array<float, kMaxUniformSize>& array() const {
- SkASSERT(!this->isSampled());
- return fArray;
- }
- const ScalableSampler& scalableSampler() const {
- SkASSERT(this->isSampled());
- return fScalableSampler;
- }
- bool operator==(const KernelWrapper&) const;
-
- private:
- KernelWrapper() : fSize({}) {}
- KernelWrapper(SkISize size) : fSize(size) {
- if (this->isSampled()) {
- new (&fScalableSampler) ScalableSampler;
- }
- }
-
- SkISize fSize;
- union {
- std::array<float, kMaxUniformSize> fArray;
- ScalableSampler fScalableSampler;
- };
- };
-
- GrMatrixConvolutionEffect(std::unique_ptr<GrFragmentProcessor> child,
- KernelWrapper kernel,
+ // srcProxy is the texture that is going to be convolved
+ // srcBounds is the subset of 'srcProxy' that will be used (e.g., for clamp mode)
+ GrMatrixConvolutionEffect(std::unique_ptr<GrFragmentProcessor>,
+ const SkISize& kernelSize,
+ const SkScalar* kernel,
SkScalar gain,
SkScalar bias,
const SkIPoint& kernelOffset,
@@ -138,13 +70,12 @@
bool onIsEqual(const GrFragmentProcessor&) const override;
- const GrFragmentProcessor::TextureSampler& onTextureSampler(int index) const override;
-
// We really just want the unaltered local coords, but the only way to get that right now is
// an identity coord transform.
GrCoordTransform fCoordTransform = {};
SkIRect fBounds;
- KernelWrapper fKernel;
+ SkISize fKernelSize;
+ float fKernel[MAX_KERNEL_SIZE];
float fGain;
float fBias;
SkV2 fKernelOffset;