Merge "Runtime support for mapping kernels with multiple input allocations"
diff --git a/rsov/driver/rsovScript.cpp b/rsov/driver/rsovScript.cpp
index 9037370..ef0d437 100644
--- a/rsov/driver/rsovScript.cpp
+++ b/rsov/driver/rsovScript.cpp
@@ -148,14 +148,14 @@
                                const void *usr, uint32_t usrLen,
                                const RsScriptCall *sc) {
   // TODO: Handle kernel without input Allocation
-  // TODO: Handle multi-input kernel
-  rsAssert(ains && inLen == 1);
-
-  RSoVAllocation *inputAllocation =
-      static_cast<RSoVAllocation *>(ains[0]->mHal.drv);
+  rsAssert(ains);
+  std::vector<RSoVAllocation *> inputAllocations(inLen);
+  for (uint32_t i = 0; i < inLen; ++i) {
+    inputAllocations[i] = static_cast<RSoVAllocation *>(ains[i]->mHal.drv);
+  }
   RSoVAllocation *outputAllocation =
       static_cast<RSoVAllocation *>(aout->mHal.drv);
-  runForEach(slot, inputAllocation, outputAllocation);
+  runForEach(slot, inLen, inputAllocations, outputAllocation);
 }
 
 void RSoVScript::invokeReduce(uint32_t slot, const Allocation **ains,
@@ -226,39 +226,37 @@
   return 0;
 }
 
-void RSoVScript::InitDescriptorAndPipelineLayouts() {
-  VkDescriptorSetLayoutBinding layout_bindings[] = {
+void RSoVScript::InitDescriptorAndPipelineLayouts(uint32_t inLen) {
+  // TODO: global variables
+  // TODO: kernels with zero output allocations
+  std::vector<VkDescriptorSetLayoutBinding> layout_bindings{
       {
-          .binding = 2,
-          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-          .descriptorCount = 1,
-          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-          .pImmutableSamplers = nullptr,
-      },
-      {
+          // for the output allocation
           .binding = 1,
           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
           .descriptorCount = 1,
           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
           .pImmutableSamplers = nullptr,
       },
-#ifdef SUPPORT_GLOBAL_VARIABLES
-      {
-          .binding = 0,
-          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-          .descriptorCount = 1,
-          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-          .pImmutableSamplers = nullptr,
-      }
-#endif
   };
 
+  // initialize descriptors for input allocations
+  for (uint32_t i = 0; i < inLen; ++i) {
+    layout_bindings.push_back({
+        .binding = i + 2,  // input allocations start from bining #2
+        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+        .descriptorCount = 1,
+        .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+        .pImmutableSamplers = nullptr,
+    });
+  }
+
   VkDescriptorSetLayoutCreateInfo descriptor_layout = {
       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
       .pNext = nullptr,
       .flags = 0,
-      .bindingCount = NELEM(layout_bindings),
-      .pBindings = layout_bindings,
+      .bindingCount = static_cast<uint32_t>(layout_bindings.size()),
+      .pBindings = layout_bindings.data(),
   };
 
   VkResult res;
@@ -271,13 +269,7 @@
                         "vkCreateDescriptorSetLayout() returns %d", res);
   }
   rsAssert(res == VK_SUCCESS);
-  /*
-    VkPushConstantRange pushConstantRange[] = { {
-    .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
-    .offset = 0,
-    .size = 16
-    } };
-  */
+
   /* Now use the descriptor layout to create a pipeline layout */
   VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo = {
       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
@@ -357,8 +349,9 @@
   ALOGV("%s succeeded.", __FUNCTION__);
 }
 
-void RSoVScript::InitDescriptorSet(const RSoVAllocation *inputAllocation,
-                                   RSoVAllocation *outputAllocation) {
+void RSoVScript::InitDescriptorSet(
+    const std::vector<RSoVAllocation *> &inputAllocations,
+    RSoVAllocation *outputAllocation) {
   VkResult res;
 
   VkDescriptorSetAllocateInfo alloc_info = {
@@ -373,16 +366,9 @@
   res = vkAllocateDescriptorSets(mDevice, &alloc_info, mDescSet.data());
   rsAssert(res == VK_SUCCESS);
 
-  const VkWriteDescriptorSet writes[] = {
-      {
-          .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-          .dstSet = mDescSet[0],
-          .dstBinding = 2,
-          .dstArrayElement = 0,
-          .descriptorCount = 1,
-          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-          .pBufferInfo = inputAllocation->getBufferInfo(),
-      },
+  // TODO: support for set up the binding(s) of global variables
+  uint32_t nBindings = inputAllocations.size() + 1;  // input + output.
+  std::vector<VkWriteDescriptorSet> writes{
       {
           .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
           .dstSet = mDescSet[0],
@@ -392,20 +378,20 @@
           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
           .pBufferInfo = outputAllocation->getBufferInfo(),
       },
-#ifdef SUPPORT_GLOBAL_VARIABLES
-      {
-          .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-          .dstSet = mDescSet[0],
-          .dstBinding = 0,
-          .dstArrayElement = 0,
-          .descriptorCount = 1,
-          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-          .pBufferInfo = somebuffer_info,
-      },
-#endif
   };
+  for (uint32_t i = 0; i < inputAllocations.size(); ++i) {
+    writes.push_back({
+        .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+        .dstSet = mDescSet[0],
+        .dstBinding = 2 + i,  // input allocations start from binding #2
+        .dstArrayElement = 0,
+        .descriptorCount = 1,
+        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+        .pBufferInfo = inputAllocations[i]->getBufferInfo(),
+    });
+  }
 
-  vkUpdateDescriptorSets(mDevice, NELEM(writes), writes, 0, NULL);
+  vkUpdateDescriptorSets(mDevice, writes.size(), writes.data(), 0, NULL);
 
   ALOGV("%s succeeded.", __FUNCTION__);
 }
@@ -431,15 +417,16 @@
   ALOGV("%s succeeded.", __FUNCTION__);
 }
 
-void RSoVScript::runForEach(uint32_t slot,
-                            const RSoVAllocation *inputAllocation,
-                            RSoVAllocation *outputAllocation) {
+void RSoVScript::runForEach(
+    uint32_t slot, uint32_t inLen,
+    const std::vector<RSoVAllocation *> &inputAllocations,
+    RSoVAllocation *outputAllocation) {
   VkResult res;
 
-  InitDescriptorAndPipelineLayouts();
+  InitDescriptorAndPipelineLayouts(inLen);
   InitShader(slot);
   InitDescriptorPool();
-  InitDescriptorSet(inputAllocation, outputAllocation);
+  InitDescriptorSet(inputAllocations, outputAllocation);
   // InitPipelineCache();
   InitPipeline();
 
@@ -470,10 +457,10 @@
 
   vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, mPipelineLayout,
                           0, mDescSet.size(), mDescSet.data(), 0, nullptr);
-
-  const uint32_t width = inputAllocation->getWidth();
-  const uint32_t height = rsMax(inputAllocation->getHeight(), 1U);
-  const uint32_t depth = rsMax(inputAllocation->getDepth(), 1U);
+  // Assuming all input allocations are of the same dimensionality
+  const uint32_t width = inputAllocations[0]->getWidth();
+  const uint32_t height = rsMax(inputAllocations[0]->getHeight(), 1U);
+  const uint32_t depth = rsMax(inputAllocations[0]->getDepth(), 1U);
   vkCmdDispatch(cmd, width, height, depth);
 
   res = vkEndCommandBuffer(cmd);
diff --git a/rsov/driver/rsovScript.h b/rsov/driver/rsovScript.h
index 8532c5d..d522a51 100644
--- a/rsov/driver/rsovScript.h
+++ b/rsov/driver/rsovScript.h
@@ -99,14 +99,15 @@
   RsdCpuReference::CpuScript *getCpuScript() const { return mCpuScript; }
 
  private:
-  void InitDescriptorAndPipelineLayouts();
+  void InitDescriptorAndPipelineLayouts(uint32_t inLen);
   void InitShader(uint32_t slot);
   void InitDescriptorPool();
-  void InitDescriptorSet(const RSoVAllocation *inputAllocation,
+  void InitDescriptorSet(const std::vector<RSoVAllocation *> &inputAllocations,
                          RSoVAllocation *outputAllocation);
   void InitPipelineCache();
   void InitPipeline();
-  void runForEach(uint32_t slot, const RSoVAllocation *input,
+  void runForEach(uint32_t slot, uint32_t inLen,
+                  const std::vector<RSoVAllocation *> &input,
                   RSoVAllocation *output);
 
   static constexpr int CPU_SCRIPT_MAGIC_NUMBER = 0x60000;
diff --git a/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/RSoVTestCore.java b/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/RSoVTestCore.java
index aace1ed..40bd307 100644
--- a/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/RSoVTestCore.java
+++ b/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/RSoVTestCore.java
@@ -56,7 +56,8 @@
 
         unitTests.add(new UT_invert(this, mCtx));
         unitTests.add(new UT_modulo(this, mCtx));
-        unitTests.add(new UT_multi(this, mCtx));
+        unitTests.add(new UT_multi_kernel(this, mCtx));
+        unitTests.add(new UT_multi_input(this, mCtx));
 
         UnitTest[] uta = new UnitTest[unitTests.size()];
         uta = unitTests.toArray(uta);
diff --git a/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_multi_input.java b/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_multi_input.java
new file mode 100644
index 0000000..ebb3853
--- /dev/null
+++ b/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_multi_input.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.rs.rsov.test;
+
+import android.content.Context;
+import android.renderscript.Allocation;
+import android.renderscript.Element;
+import android.renderscript.RenderScript;
+import android.renderscript.Type;
+import android.util.Log;
+
+public class UT_multi_input extends UnitTest {
+    private Allocation Ain0;
+    private Allocation Ain1;
+
+    private Allocation Out0;
+
+    private final int Xdim = 100;
+    private final float tolerance = 1e-6f;
+
+    protected UT_multi_input(RSoVTestCore rstc, Context ctx) {
+        super(rstc, "Foreach Multi-input", ctx);
+    }
+
+    private void initializeGlobals(RenderScript RS, ScriptC_multi_input s) {
+        Type.Builder floatBuilder = new Type.Builder(RS, Element.F32(RS));
+
+        floatBuilder.setX(Xdim);
+
+        Ain0 = Allocation.createTyped(RS, floatBuilder.create());
+        Ain1 = Allocation.createTyped(RS, floatBuilder.create());
+        Out0 = Allocation.createTyped(RS, floatBuilder.create());
+        return;
+    }
+
+    public void run() {
+        RenderScript pRS = RenderScript.create(mCtx);
+        ScriptC_multi_input s = new ScriptC_multi_input(pRS);
+
+        initializeGlobals(pRS, s);
+
+        float a[] = new float[Xdim];
+        float b[] = new float[Xdim];
+
+
+        java.util.Random rand = new java.util.Random();
+
+        for (int i = 0; i < Xdim; i++) {
+            a[i] = rand.nextFloat();
+            b[i] = rand.nextFloat();
+        }
+
+        Ain0.copyFrom(a);
+        Ain1.copyFrom(b);
+
+        s.forEach_sum2(Ain0, Ain1, Out0);
+
+        float out0[] = new float[Xdim];
+        float ain0[] = new float[Xdim];
+        float ain1[] = new float[Xdim];
+        Ain0.copyTo(ain0);
+        Ain1.copyTo(ain1);
+        Out0.copyTo(out0);
+
+        pRS.finish();
+        pRS.destroy();
+
+        boolean failed = false;
+        for (int i = 0; i < Xdim; i++) {
+            if (ain0[i] != a[i]) {
+                Log.e(name, "Ain0 was " + a[i] + " but changed to " + ain0[i]);
+                failed = true;
+                break;
+            }
+            if (ain1[i] != b[i]) {
+                Log.e(name, "Ain1 was " + b[i] + " but changed to " + ain1[i]);
+                failed = true;
+                break;
+            }
+            if ((a[i] + b[i] - out0[i]) > tolerance) {
+                float expected = a[i]+b[i];
+                Log.e(name, "expects " + expected + " got " + out0[i]);
+                failed = true;
+                break;
+            }
+        }
+
+        if (failed) {
+            failTest();
+        } else {
+            passTest();
+        }
+    }
+}
diff --git a/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_multi.java b/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_multi_kernel.java
similarity index 92%
rename from rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_multi.java
rename to rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_multi_kernel.java
index 191bfa3..980e6e5 100644
--- a/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_multi.java
+++ b/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_multi_kernel.java
@@ -23,14 +23,14 @@
 import android.renderscript.Type;
 import android.util.Log;
 
-public class UT_multi extends UnitTest {
-    protected UT_multi(RSoVTestCore rstc, Context ctx) {
-        super(rstc, "multi", ctx);
+public class UT_multi_kernel extends UnitTest {
+    protected UT_multi_kernel(RSoVTestCore rstc, Context ctx) {
+        super(rstc, "multi_kernel", ctx);
     }
 
     private boolean Test(int width, int height, int depth) {
         RenderScript pRS = RenderScript.create(mCtx);
-        ScriptC_multi s = new ScriptC_multi(pRS);
+        ScriptC_multi_kernel s = new ScriptC_multi_kernel(pRS);
 
         Type.Builder typeBuilder = new Type.Builder(pRS, Element.F32_4(pRS));
         typeBuilder.setX(width);
diff --git a/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/multi_input.rs b/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/multi_input.rs
new file mode 100644
index 0000000..a05fa5b
--- /dev/null
+++ b/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/multi_input.rs
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma version(1)
+
+#pragma rs java_package_name(com.android.rs.rsov.test)
+
+float RS_KERNEL sum2(float in0, float in1, uint32_t x) {
+    return in0 + in1;
+}
+
diff --git a/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/multi.rs b/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/multi_kernel.rs
similarity index 100%
rename from rsov/tests/RSoVTest/src/com/android/rs/rsov/test/multi.rs
rename to rsov/tests/RSoVTest/src/com/android/rs/rsov/test/multi_kernel.rs