Interpreter: Support returns from runStriped

Change-Id: Id84c3fb35cb61fa839691471d03a44152964bedb
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/268941
Reviewed-by: Ethan Nicholas <ethannicholas@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
diff --git a/src/sksl/SkSLInterpreter.h b/src/sksl/SkSLInterpreter.h
index 4ed391a..5e4db17 100644
--- a/src/sksl/SkSLInterpreter.h
+++ b/src/sksl/SkSLInterpreter.h
@@ -274,11 +274,12 @@
      *   ...
      *   args[argCount - 1] points to an array of N values, the last argument for each invocation
      *
-     * All values in 'args', 'outReturn', and 'uniforms' are 32-bit values (typically floats,
+     * All values in 'args', 'outResult', and 'uniforms' are 32-bit values (typically floats,
      * but possibly int32_t or uint32_t, depending on the types used in the SkSL).
      * Any 'out' or 'inout' parameters will result in the 'args' array being modified.
      */
-    bool runStriped(const ByteCodeFunction* f, int count, float* args[]) {
+    bool runStriped(const ByteCodeFunction* f, int count, float* args[],
+                    float* outResult[] = nullptr) {
         SkASSERT(f);
         Vector* stack = fMemory + MEMORY_SIZE;
         int stackCount = f->fStackSlotCount + f->fParameterSlotCount;
@@ -287,6 +288,7 @@
         VectorI maskStack[MASK_STACK_SIZE];
         VectorI loopStack[LOOP_STACK_SIZE];
         VectorI continueStack[LOOP_STACK_SIZE];
+        Vector* innerResult = nullptr;
         Context context(fMemory, stack, condStack, maskStack, loopStack, continueStack);
         for (int i = 0; i < count; i += width) {
             int lanes = std::min(width, count - i);
@@ -301,7 +303,7 @@
             for (int j = 0; j < f->fParameterSlotCount; ++j) {
                 memcpy(stack + j, &args[j][i], size);
             }
-            if (!this->innerRun(f, context, i, nullptr)) {
+            if (!this->innerRun(f, context, i, &innerResult)) {
                 return false;
             }
             int slot = 0;
@@ -313,6 +315,11 @@
                 }
                 slot += p.fSlotCount;
             }
+            if (outResult) {
+                for (int j = 0; j < f->fReturnSlotCount; ++j) {
+                    memcpy(&outResult[j][i], &innerResult[j], size);
+                }
+            }
         }
         return true;
     }
diff --git a/tests/SkSLInterpreterTest.cpp b/tests/SkSLInterpreterTest.cpp
index 1cb8a63..f2cf0be 100644
--- a/tests/SkSLInterpreterTest.cpp
+++ b/tests/SkSLInterpreterTest.cpp
@@ -842,6 +842,57 @@
     REPORTER_ASSERT(r, out->fFloat[0] = -1.0f);
 }
 
+DEF_TEST(SkSLInterpreterRunStripedReturn, r) {
+    const char* src =
+        "float  prod(float2 v) { return v.x * v.y; }\n"
+        "float2 swap(float2 v) { return v.yx; }\n";
+
+    SkSL::Compiler compiler;
+    SkSL::Program::Settings settings;
+    auto program =
+            compiler.convertProgram(SkSL::Program::kGeneric_Kind, SkSL::String(src), settings);
+    REPORTER_ASSERT(r, program);
+
+    auto byteCode = compiler.toByteCode(*program);
+    REPORTER_ASSERT(r, !compiler.errorCount());
+
+    auto prod = byteCode->getFunction("prod");
+    auto swap = byteCode->getFunction("swap");
+
+    REPORTER_ASSERT(r, prod);
+    REPORTER_ASSERT(r, swap);
+
+    SkSL::Interpreter<4> interpreter(std::move(byteCode));
+    float inX[4] = { 1, 2, 3, 4 };
+    float inY[4] = { 5, 6, 7, 8 };
+    float outX[4], outY[4];
+
+    float* in[] = { inX, inY };
+    float* out[] = { outX, outY };
+
+    for (int count : { 2, 4 }) {
+        memset(outX, 0, sizeof(outX));
+        memset(outY, 0, sizeof(outY));
+
+        bool success = interpreter.runStriped(prod, count, in, out);
+        REPORTER_ASSERT(r, success);
+        for (int i = 0; i < 4; ++i) {
+            REPORTER_ASSERT(r, outX[i] == (i < count ? inX[i] * inY[i] : 0.0f));
+            REPORTER_ASSERT(r, outY[i] == 0.0f);
+        }
+
+        memset(outX, 0, sizeof(outX));
+        memset(outY, 0, sizeof(outY));
+
+        success = interpreter.runStriped(swap, count, in, out);
+        REPORTER_ASSERT(r, success);
+        for (int i = 0; i < 4; ++i) {
+            REPORTER_ASSERT(r, outX[i] == (i < count ? inY[i] : 0.0f));
+            REPORTER_ASSERT(r, outY[i] == (i < count ? inX[i] : 0.0f));
+        }
+    }
+}
+
 DEF_TEST(SkSLInterpreterOutParams, r) {
     test(r,
          "void oneAlpha(inout half4 color) { color.a = 1; }"