Merge "Runtime support for mapping kernels with multiple input allocations"
diff --git a/rsov/driver/rsovScript.cpp b/rsov/driver/rsovScript.cpp
index 9037370..ef0d437 100644
--- a/rsov/driver/rsovScript.cpp
+++ b/rsov/driver/rsovScript.cpp
@@ -148,14 +148,14 @@
const void *usr, uint32_t usrLen,
const RsScriptCall *sc) {
// TODO: Handle kernel without input Allocation
- // TODO: Handle multi-input kernel
- rsAssert(ains && inLen == 1);
-
- RSoVAllocation *inputAllocation =
- static_cast<RSoVAllocation *>(ains[0]->mHal.drv);
+ rsAssert(ains);
+ std::vector<RSoVAllocation *> inputAllocations(inLen);
+ for (uint32_t i = 0; i < inLen; ++i) {
+ inputAllocations[i] = static_cast<RSoVAllocation *>(ains[i]->mHal.drv);
+ }
RSoVAllocation *outputAllocation =
static_cast<RSoVAllocation *>(aout->mHal.drv);
- runForEach(slot, inputAllocation, outputAllocation);
+ runForEach(slot, inLen, inputAllocations, outputAllocation);
}
void RSoVScript::invokeReduce(uint32_t slot, const Allocation **ains,
@@ -226,39 +226,37 @@
return 0;
}
-void RSoVScript::InitDescriptorAndPipelineLayouts() {
- VkDescriptorSetLayoutBinding layout_bindings[] = {
+void RSoVScript::InitDescriptorAndPipelineLayouts(uint32_t inLen) {
+ // TODO: global variables
+ // TODO: kernels with zero output allocations
+ std::vector<VkDescriptorSetLayoutBinding> layout_bindings{
{
- .binding = 2,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- },
- {
+ // for the output allocation
.binding = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
.pImmutableSamplers = nullptr,
},
-#ifdef SUPPORT_GLOBAL_VARIABLES
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = nullptr,
- }
-#endif
};
+ // initialize descriptors for input allocations
+ for (uint32_t i = 0; i < inLen; ++i) {
+ layout_bindings.push_back({
+ .binding = i + 2, // input allocations start from bining #2
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = nullptr,
+ });
+ }
+
VkDescriptorSetLayoutCreateInfo descriptor_layout = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .bindingCount = NELEM(layout_bindings),
- .pBindings = layout_bindings,
+ .bindingCount = static_cast<uint32_t>(layout_bindings.size()),
+ .pBindings = layout_bindings.data(),
};
VkResult res;
@@ -271,13 +269,7 @@
"vkCreateDescriptorSetLayout() returns %d", res);
}
rsAssert(res == VK_SUCCESS);
- /*
- VkPushConstantRange pushConstantRange[] = { {
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .offset = 0,
- .size = 16
- } };
- */
+
/* Now use the descriptor layout to create a pipeline layout */
VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
@@ -357,8 +349,9 @@
ALOGV("%s succeeded.", __FUNCTION__);
}
-void RSoVScript::InitDescriptorSet(const RSoVAllocation *inputAllocation,
- RSoVAllocation *outputAllocation) {
+void RSoVScript::InitDescriptorSet(
+ const std::vector<RSoVAllocation *> &inputAllocations,
+ RSoVAllocation *outputAllocation) {
VkResult res;
VkDescriptorSetAllocateInfo alloc_info = {
@@ -373,16 +366,9 @@
res = vkAllocateDescriptorSets(mDevice, &alloc_info, mDescSet.data());
rsAssert(res == VK_SUCCESS);
- const VkWriteDescriptorSet writes[] = {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstSet = mDescSet[0],
- .dstBinding = 2,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = inputAllocation->getBufferInfo(),
- },
+ // TODO: support for set up the binding(s) of global variables
+ uint32_t nBindings = inputAllocations.size() + 1; // input + output.
+ std::vector<VkWriteDescriptorSet> writes{
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstSet = mDescSet[0],
@@ -392,20 +378,20 @@
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.pBufferInfo = outputAllocation->getBufferInfo(),
},
-#ifdef SUPPORT_GLOBAL_VARIABLES
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstSet = mDescSet[0],
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = somebuffer_info,
- },
-#endif
};
+ for (uint32_t i = 0; i < inputAllocations.size(); ++i) {
+ writes.push_back({
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstSet = mDescSet[0],
+ .dstBinding = 2 + i, // input allocations start from binding #2
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = inputAllocations[i]->getBufferInfo(),
+ });
+ }
- vkUpdateDescriptorSets(mDevice, NELEM(writes), writes, 0, NULL);
+ vkUpdateDescriptorSets(mDevice, writes.size(), writes.data(), 0, NULL);
ALOGV("%s succeeded.", __FUNCTION__);
}
@@ -431,15 +417,16 @@
ALOGV("%s succeeded.", __FUNCTION__);
}
-void RSoVScript::runForEach(uint32_t slot,
- const RSoVAllocation *inputAllocation,
- RSoVAllocation *outputAllocation) {
+void RSoVScript::runForEach(
+ uint32_t slot, uint32_t inLen,
+ const std::vector<RSoVAllocation *> &inputAllocations,
+ RSoVAllocation *outputAllocation) {
VkResult res;
- InitDescriptorAndPipelineLayouts();
+ InitDescriptorAndPipelineLayouts(inLen);
InitShader(slot);
InitDescriptorPool();
- InitDescriptorSet(inputAllocation, outputAllocation);
+ InitDescriptorSet(inputAllocations, outputAllocation);
// InitPipelineCache();
InitPipeline();
@@ -470,10 +457,10 @@
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, mPipelineLayout,
0, mDescSet.size(), mDescSet.data(), 0, nullptr);
-
- const uint32_t width = inputAllocation->getWidth();
- const uint32_t height = rsMax(inputAllocation->getHeight(), 1U);
- const uint32_t depth = rsMax(inputAllocation->getDepth(), 1U);
+ // Assuming all input allocations are of the same dimensionality
+ const uint32_t width = inputAllocations[0]->getWidth();
+ const uint32_t height = rsMax(inputAllocations[0]->getHeight(), 1U);
+ const uint32_t depth = rsMax(inputAllocations[0]->getDepth(), 1U);
vkCmdDispatch(cmd, width, height, depth);
res = vkEndCommandBuffer(cmd);
diff --git a/rsov/driver/rsovScript.h b/rsov/driver/rsovScript.h
index 8532c5d..d522a51 100644
--- a/rsov/driver/rsovScript.h
+++ b/rsov/driver/rsovScript.h
@@ -99,14 +99,15 @@
RsdCpuReference::CpuScript *getCpuScript() const { return mCpuScript; }
private:
- void InitDescriptorAndPipelineLayouts();
+ void InitDescriptorAndPipelineLayouts(uint32_t inLen);
void InitShader(uint32_t slot);
void InitDescriptorPool();
- void InitDescriptorSet(const RSoVAllocation *inputAllocation,
+ void InitDescriptorSet(const std::vector<RSoVAllocation *> &inputAllocations,
RSoVAllocation *outputAllocation);
void InitPipelineCache();
void InitPipeline();
- void runForEach(uint32_t slot, const RSoVAllocation *input,
+ void runForEach(uint32_t slot, uint32_t inLen,
+ const std::vector<RSoVAllocation *> &input,
RSoVAllocation *output);
static constexpr int CPU_SCRIPT_MAGIC_NUMBER = 0x60000;
diff --git a/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/RSoVTestCore.java b/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/RSoVTestCore.java
index aace1ed..40bd307 100644
--- a/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/RSoVTestCore.java
+++ b/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/RSoVTestCore.java
@@ -56,7 +56,8 @@
unitTests.add(new UT_invert(this, mCtx));
unitTests.add(new UT_modulo(this, mCtx));
- unitTests.add(new UT_multi(this, mCtx));
+ unitTests.add(new UT_multi_kernel(this, mCtx));
+ unitTests.add(new UT_multi_input(this, mCtx));
UnitTest[] uta = new UnitTest[unitTests.size()];
uta = unitTests.toArray(uta);
diff --git a/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_multi_input.java b/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_multi_input.java
new file mode 100644
index 0000000..ebb3853
--- /dev/null
+++ b/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_multi_input.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.rs.rsov.test;
+
+import android.content.Context;
+import android.renderscript.Allocation;
+import android.renderscript.Element;
+import android.renderscript.RenderScript;
+import android.renderscript.Type;
+import android.util.Log;
+
+public class UT_multi_input extends UnitTest {
+ private Allocation Ain0;
+ private Allocation Ain1;
+
+ private Allocation Out0;
+
+ private final int Xdim = 100;
+ private final float tolerance = 1e-6f;
+
+ protected UT_multi_input(RSoVTestCore rstc, Context ctx) {
+ super(rstc, "Foreach Multi-input", ctx);
+ }
+
+ private void initializeGlobals(RenderScript RS, ScriptC_multi_input s) {
+ Type.Builder floatBuilder = new Type.Builder(RS, Element.F32(RS));
+
+ floatBuilder.setX(Xdim);
+
+ Ain0 = Allocation.createTyped(RS, floatBuilder.create());
+ Ain1 = Allocation.createTyped(RS, floatBuilder.create());
+ Out0 = Allocation.createTyped(RS, floatBuilder.create());
+ return;
+ }
+
+ public void run() {
+ RenderScript pRS = RenderScript.create(mCtx);
+ ScriptC_multi_input s = new ScriptC_multi_input(pRS);
+
+ initializeGlobals(pRS, s);
+
+ float a[] = new float[Xdim];
+ float b[] = new float[Xdim];
+
+
+ java.util.Random rand = new java.util.Random();
+
+ for (int i = 0; i < Xdim; i++) {
+ a[i] = rand.nextFloat();
+ b[i] = rand.nextFloat();
+ }
+
+ Ain0.copyFrom(a);
+ Ain1.copyFrom(b);
+
+ s.forEach_sum2(Ain0, Ain1, Out0);
+
+ float out0[] = new float[Xdim];
+ float ain0[] = new float[Xdim];
+ float ain1[] = new float[Xdim];
+ Ain0.copyTo(ain0);
+ Ain1.copyTo(ain1);
+ Out0.copyTo(out0);
+
+ pRS.finish();
+ pRS.destroy();
+
+ boolean failed = false;
+ for (int i = 0; i < Xdim; i++) {
+ if (ain0[i] != a[i]) {
+ Log.e(name, "Ain0 was " + a[i] + " but changed to " + ain0[i]);
+ failed = true;
+ break;
+ }
+ if (ain1[i] != b[i]) {
+ Log.e(name, "Ain1 was " + b[i] + " but changed to " + ain1[i]);
+ failed = true;
+ break;
+ }
+ if ((a[i] + b[i] - out0[i]) > tolerance) {
+ float expected = a[i]+b[i];
+ Log.e(name, "expects " + expected + " got " + out0[i]);
+ failed = true;
+ break;
+ }
+ }
+
+ if (failed) {
+ failTest();
+ } else {
+ passTest();
+ }
+ }
+}
diff --git a/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_multi.java b/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_multi_kernel.java
similarity index 92%
rename from rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_multi.java
rename to rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_multi_kernel.java
index 191bfa3..980e6e5 100644
--- a/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_multi.java
+++ b/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/UT_multi_kernel.java
@@ -23,14 +23,14 @@
import android.renderscript.Type;
import android.util.Log;
-public class UT_multi extends UnitTest {
- protected UT_multi(RSoVTestCore rstc, Context ctx) {
- super(rstc, "multi", ctx);
+public class UT_multi_kernel extends UnitTest {
+ protected UT_multi_kernel(RSoVTestCore rstc, Context ctx) {
+ super(rstc, "multi_kernel", ctx);
}
private boolean Test(int width, int height, int depth) {
RenderScript pRS = RenderScript.create(mCtx);
- ScriptC_multi s = new ScriptC_multi(pRS);
+ ScriptC_multi_kernel s = new ScriptC_multi_kernel(pRS);
Type.Builder typeBuilder = new Type.Builder(pRS, Element.F32_4(pRS));
typeBuilder.setX(width);
diff --git a/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/multi_input.rs b/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/multi_input.rs
new file mode 100644
index 0000000..a05fa5b
--- /dev/null
+++ b/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/multi_input.rs
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma version(1)
+
+#pragma rs java_package_name(com.android.rs.rsov.test)
+
+float RS_KERNEL sum2(float in0, float in1, uint32_t x) {
+ return in0 + in1;
+}
+
diff --git a/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/multi.rs b/rsov/tests/RSoVTest/src/com/android/rs/rsov/test/multi_kernel.rs
similarity index 100%
rename from rsov/tests/RSoVTest/src/com/android/rs/rsov/test/multi.rs
rename to rsov/tests/RSoVTest/src/com/android/rs/rsov/test/multi_kernel.rs