test and fix that we cover the right inputs

At head we're redoing any n<8 tail from the start,
not continuing from (n/8)*8 like we'd want.

Change-Id: I1a3d24cdffc843bbe6f3e01a163b6e3a20fdd0ca
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/220556
Reviewed-by: Brian Osman <brianosman@google.com>
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
diff --git a/src/core/SkVM.cpp b/src/core/SkVM.cpp
index bfc7607..4f597f1 100644
--- a/src/core/SkVM.cpp
+++ b/src/core/SkVM.cpp
@@ -402,11 +402,12 @@
                    body_ends = 0,
                    tail_ends = 0;
 
+            // 8 float values in a ymm register.
+            static constexpr int K = 8;
+
             JIT(const std::vector<Program::Instruction>& instructions, int regs, int loop,
                 size_t strides[], int nargs)
             {
-                // 8 float values in a ymm register.
-                constexpr int K = 8;
 
             #if defined(SK_BUILD_FOR_WIN)
                 // TODO  Windows ABI?
@@ -617,7 +618,7 @@
         #endif
         }
 
-        if (n >= 8) {
+        if (n >= JIT::K) {
             bool ran = true;
             switch (nargs) {
                 case 0: fJIT->getCode<void(*)(int              )>()(n                  ); break;
@@ -626,7 +627,16 @@
                 default: ran = false; break;
             }
             if (ran) {
-                n &= 7;
+                // Step n and arguments forward to where the JIT stopped.
+                const int jit_stopped = (n / JIT::K) * JIT::K;
+                n -= jit_stopped;
+
+                void**        arg    = args;
+                const size_t* stride = strides;
+                for (; *arg; arg++, stride++) {
+                    *arg = (void*)( (char*)*arg + jit_stopped * *stride );
+                }
+                SkASSERT(arg == args + nargs);
             }
         }
     #endif
diff --git a/tests/SkVMTest.cpp b/tests/SkVMTest.cpp
index 7d5759e..e6819de 100644
--- a/tests/SkVMTest.cpp
+++ b/tests/SkVMTest.cpp
@@ -169,3 +169,30 @@
         }
     }
 }
+
+DEF_TEST(SkVM_LoopCounts, r) {
+    // Make sure we cover all the exact N we want.
+
+    int buf[64];
+    for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) {
+        for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
+            buf[i] = i;
+        }
+
+        // buf[i] += 1
+        skvm::Builder b;
+        b.store32(b.arg(0),
+                  b.add(b.splat(1),
+                        b.load32(b.arg(0))));
+
+        skvm::Program program = b.done();
+        program.eval(N, buf);
+
+        for (int i = 0; i < N; i++) {
+            REPORTER_ASSERT(r, buf[i] == i+1);
+        }
+        for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) {
+            REPORTER_ASSERT(r, buf[i] == i);
+        }
+    }
+}