Snap for 4396223 from 1af33897ebdc15c41c94f93535df8904065d211b to oc-mr1-release

Change-Id: Icc95a2df808220c564540cb4c41e1bdc90ae93fe
diff --git a/nn/common/CpuExecutor.cpp b/nn/common/CpuExecutor.cpp
index 79f9255..9c6df76 100644
--- a/nn/common/CpuExecutor.cpp
+++ b/nn/common/CpuExecutor.cpp
@@ -79,6 +79,19 @@
     return true;
 }
 
+bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
+                                         const hidl_vec<hidl_memory>& pools) {
+    poolInfos->resize(pools.size());
+    for (size_t i = 0; i < pools.size(); i++) {
+        auto& poolInfo = (*poolInfos)[i];
+        if (!poolInfo.set(pools[i])) {
+            LOG(ERROR) << "Could not map pool";
+            return false;
+        }
+    }
+    return true;
+}
+
 // Updates the RunTimeOperandInfo with the newly calculated shape.
 // Allocate the buffer if we need to.
 static bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& shape) {
@@ -113,14 +126,15 @@
 // Ignore the .pools entry in model and request.  This will have been taken care of
 // by the caller.
 int CpuExecutor::run(const Model& model, const Request& request,
-                     const std::vector<RunTimePoolInfo>& runTimePoolInfos) {
+                     const std::vector<RunTimePoolInfo>& modelPoolInfos,
+                     const std::vector<RunTimePoolInfo>& requestPoolInfos) {
     VLOG(CPUEXE) << "CpuExecutor::run()";
     // VLOG(CPUEXE) << "model: " << toString(model);
     VLOG(CPUEXE) << "request: " << toString(request);
 
     mModel = &model;
     mRequest = &request; // TODO check if mRequest is needed
-    initializeRunTimeInfo(runTimePoolInfos);
+    initializeRunTimeInfo(modelPoolInfos, requestPoolInfos);
     // The model has serialized the operation in execution order.
     for (const auto& operation : model.operations) {
         int n = executeOperation(operation);
@@ -128,7 +142,10 @@
             return n;
         }
     }
-    for (auto runtimeInfo : runTimePoolInfos) {
+    for (auto runtimeInfo : modelPoolInfos) {
+        runtimeInfo.update();
+    }
+    for (auto runtimeInfo : requestPoolInfos) {
         runtimeInfo.update();
     }
     mModel = nullptr;
@@ -137,7 +154,8 @@
     return ANEURALNETWORKS_NO_ERROR;
 }
 
-bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& runTimePoolInfos) {
+bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
+                                        const std::vector<RunTimePoolInfo>& requestPoolInfos) {
     VLOG(CPUEXE) << "CpuExecutor::initializeRunTimeInfo";
     const size_t count = mModel->operands.size();
     mOperands.resize(count);
@@ -163,8 +181,8 @@
                 break;
             case OperandLifeTime::CONSTANT_REFERENCE: {
                 auto poolIndex = from.location.poolIndex;
-                nnAssert(poolIndex < runTimePoolInfos.size());
-                auto& r = runTimePoolInfos[poolIndex];
+                nnAssert(poolIndex < modelPoolInfos.size());
+                auto& r = modelPoolInfos[poolIndex];
                 to.buffer = r.buffer + from.location.offset;
                 to.numberOfUsesLeft = 0;
                 break;
@@ -183,7 +201,7 @@
 
     // Adjust the runtime info for the arguments passed to the model,
     // modifying the buffer location, and possibly the dimensions.
-    auto updateForArguments = [this, &runTimePoolInfos](const std::vector<uint32_t>& indexes,
+    auto updateForArguments = [this, &requestPoolInfos](const std::vector<uint32_t>& indexes,
                                   const hidl_vec<RequestArgument>& arguments) {
         nnAssert(indexes.size() == arguments.size());
         for (size_t i = 0; i < indexes.size(); i++) {
@@ -203,8 +221,8 @@
                 nnAssert(to.buffer == nullptr);
             } else {
                 auto poolIndex = from.location.poolIndex;
-                nnAssert(poolIndex < runTimePoolInfos.size());
-                auto& r = runTimePoolInfos[poolIndex];
+                nnAssert(poolIndex < requestPoolInfos.size());
+                auto& r = requestPoolInfos[poolIndex];
                 to.buffer = r.buffer + from.location.offset;
             }
         }
diff --git a/nn/common/Utils.cpp b/nn/common/Utils.cpp
index f73b12c..2456267 100644
--- a/nn/common/Utils.cpp
+++ b/nn/common/Utils.cpp
@@ -245,6 +245,16 @@
     return extra;
 }
 
+void logModelToInfo(const Model& model) {
+    LOG(INFO) << "Model start";
+    LOG(INFO) << "operands" << toString(model.operands);
+    LOG(INFO) << "operations" << toString(model.operations);
+    LOG(INFO) << "inputIndexes" << toString(model.inputIndexes);
+    LOG(INFO) << "outputIndexes" << toString(model.outputIndexes);
+    LOG(INFO) << "operandValues size" << model.operandValues.size();
+    LOG(INFO) << "pools" << toString(model.pools);
+}
+
 // Validates the type. The used dimensions can be underspecified.
 int validateOperandType(const ANeuralNetworksOperandType& type, const char* tag,
                         bool allowPartial) {
diff --git a/nn/common/include/CpuExecutor.h b/nn/common/include/CpuExecutor.h
index dd92eaf..b765efc 100644
--- a/nn/common/include/CpuExecutor.h
+++ b/nn/common/include/CpuExecutor.h
@@ -55,10 +55,7 @@
     uint32_t numberOfUsesLeft;
 
     Shape shape() const {
-        return Shape{.type = type,
-                     .dimensions = dimensions,
-                     .scale = scale,
-                     .offset = zeroPoint};
+        return Shape{.type = type, .dimensions = dimensions, .scale = scale, .offset = zeroPoint};
     }
 };
 
@@ -72,6 +69,9 @@
     bool update();
 };
 
+bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
+                                         const hidl_vec<hidl_memory>& pools);
+
 // This class is used to execute a model on the CPU.
 class CpuExecutor {
 public:
@@ -80,17 +80,17 @@
     // The model must outlive the executor.  We prevent it from being modified
     // while this is executing.
     int run(const Model& model, const Request& request,
-            const std::vector<RunTimePoolInfo>& runTimePoolInfos);
+            const std::vector<RunTimePoolInfo>& modelPoolInfos,
+            const std::vector<RunTimePoolInfo>& requestPoolInfos);
 
 private:
-    bool initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& runTimePoolInfos);
+    bool initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
+                               const std::vector<RunTimePoolInfo>& requestPoolInfos);
     // Runs one operation of the graph.
     int executeOperation(const Operation& entry);
     // Decrement the usage count for the operands listed.  Frees the memory
     // allocated for any temporary variable with a count of zero.
     void freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs);
-    void setLocationAndUses(RunTimeOperandInfo* to, const DataLocation& location,
-                            const std::vector<RunTimePoolInfo>& runTimePoolInfos);
 
     // The model and the request that we'll execute. Only valid while run()
     // is being executed.
diff --git a/nn/common/include/Utils.h b/nn/common/include/Utils.h
index da03559..3eebf26 100644
--- a/nn/common/include/Utils.h
+++ b/nn/common/include/Utils.h
@@ -106,6 +106,9 @@
 // to determine what this should be.
 uint32_t alignBytesNeeded(uint32_t index, size_t length);
 
+// Does a detailed LOG(INFO) of the model
+void logModelToInfo(const Model& model);
+
 inline void setFromIntList(hidl_vec<uint32_t>* vec, uint32_t count, const uint32_t* data) {
     vec->resize(count);
     for (uint32_t i = 0; i < count; i++) {
diff --git a/nn/driver/sample/SampleDriver.cpp b/nn/driver/sample/SampleDriver.cpp
index 902d4e8..faeecae 100644
--- a/nn/driver/sample/SampleDriver.cpp
+++ b/nn/driver/sample/SampleDriver.cpp
@@ -31,7 +31,10 @@
 
 Return<ErrorStatus> SampleDriver::prepareModel(const Model& model,
                                                const sp<IPreparedModelCallback>& callback) {
-    VLOG(DRIVER) << "prepareModel(" << toString(model) << ")"; // TODO errror
+    if (VLOG_IS_ON(DRIVER)) {
+        VLOG(DRIVER) << "prepareModel";
+        logModelToInfo(model);
+    }
     if (callback.get() == nullptr) {
         LOG(ERROR) << "invalid callback passed to prepareModel";
         return ErrorStatus::INVALID_ARGUMENT;
@@ -42,9 +45,12 @@
     }
 
     // TODO: make asynchronous later
-    sp<IPreparedModel> preparedModel = new SamplePreparedModel(model);
+    sp<SamplePreparedModel> preparedModel = new SamplePreparedModel(model);
+    if (!preparedModel->initialize()) {
+       callback->notify(ErrorStatus::INVALID_ARGUMENT, nullptr);
+       return ErrorStatus::INVALID_ARGUMENT;
+    }
     callback->notify(ErrorStatus::NONE, preparedModel);
-
     return ErrorStatus::NONE;
 }
 
@@ -64,27 +70,20 @@
     return 1;
 }
 
-static bool mapPools(std::vector<RunTimePoolInfo>* poolInfos, const hidl_vec<hidl_memory>& pools) {
-    poolInfos->resize(pools.size());
-    for (size_t i = 0; i < pools.size(); i++) {
-        auto& poolInfo = (*poolInfos)[i];
-        if (!poolInfo.set(pools[i])) {
-            return false;
-        }
-    }
-    return true;
+bool SamplePreparedModel::initialize() {
+    return setRunTimePoolInfosFromHidlMemories(&mPoolInfos, mModel.pools);
 }
 
 void SamplePreparedModel::asyncExecute(const Request& request,
                                        const sp<IExecutionCallback>& callback) {
-    std::vector<RunTimePoolInfo> poolInfo;
-    if (!mapPools(&poolInfo, request.pools)) {
+    std::vector<RunTimePoolInfo> requestPoolInfos;
+    if (!setRunTimePoolInfosFromHidlMemories(&requestPoolInfos, request.pools)) {
         callback->notify(ErrorStatus::GENERAL_FAILURE);
         return;
     }
 
     CpuExecutor executor;
-    int n = executor.run(mModel, request, poolInfo);
+    int n = executor.run(mModel, request, mPoolInfos, requestPoolInfos);
     VLOG(DRIVER) << "executor.run returned " << n;
     ErrorStatus executionStatus =
             n == ANEURALNETWORKS_NO_ERROR ? ErrorStatus::NONE : ErrorStatus::GENERAL_FAILURE;
diff --git a/nn/driver/sample/SampleDriver.h b/nn/driver/sample/SampleDriver.h
index 51581fe..7e95c95 100644
--- a/nn/driver/sample/SampleDriver.h
+++ b/nn/driver/sample/SampleDriver.h
@@ -17,6 +17,7 @@
 #ifndef ANDROID_ML_NN_SAMPLE_DRIVER_SAMPLE_DRIVER_H
 #define ANDROID_ML_NN_SAMPLE_DRIVER_SAMPLE_DRIVER_H
 
+#include "CpuExecutor.h"
 #include "HalInterfaces.h"
 #include "NeuralNetworks.h"
 
@@ -52,12 +53,15 @@
           : // Make a copy of the model, as we need to preserve it.
             mModel(model) {}
     ~SamplePreparedModel() override {}
+    bool initialize();
     Return<ErrorStatus> execute(const Request& request,
                                 const sp<IExecutionCallback>& callback) override;
 
 private:
     void asyncExecute(const Request& request, const sp<IExecutionCallback>& callback);
+
     Model mModel;
+    std::vector<RunTimePoolInfo> mPoolInfos;
 };
 
 } // namespace sample_driver
diff --git a/nn/runtime/ExecutionBuilder.cpp b/nn/runtime/ExecutionBuilder.cpp
index 56dc723..077e068 100644
--- a/nn/runtime/ExecutionBuilder.cpp
+++ b/nn/runtime/ExecutionBuilder.cpp
@@ -96,8 +96,7 @@
         mModel(compilation->mModel),
         mPlan(&compilation->mPlan),
         mInputs(mModel->inputCount()),
-        mOutputs(mModel->outputCount()),
-        mMemories(mModel->getMemories()) {
+        mOutputs(mModel->outputCount()) {
     VLOG(EXECUTION) << "ExecutionBuilder::ExecutionBuilder";
 }
 
@@ -600,10 +599,11 @@
 }
 
 static void asyncStartComputeOnCpu(const Model& model, const Request& request,
-                                   const std::vector<RunTimePoolInfo>& runTimePoolInfos,
+                                   const std::vector<RunTimePoolInfo>& modelPoolInfos,
+                                   const std::vector<RunTimePoolInfo>& requestPoolInfos,
                                    const sp<IExecutionCallback>& executionCallback) {
     CpuExecutor executor;
-    int err = executor.run(model, request, runTimePoolInfos);
+    int err = executor.run(model, request, modelPoolInfos, requestPoolInfos);
     ErrorStatus status = err == ANEURALNETWORKS_NO_ERROR ?
             ErrorStatus::NONE : ErrorStatus::GENERAL_FAILURE;
     executionCallback->notify(status);
@@ -622,23 +622,30 @@
     sp<ExecutionCallback> executionCallback = new ExecutionCallback();
     *synchronizationCallback = nullptr;
 
-    std::vector<RunTimePoolInfo> runTimePoolInfos;
+    std::vector<RunTimePoolInfo> modelPoolInfos;
+    if (!setRunTimePoolInfosFromHidlMemories(&modelPoolInfos, model.pools)) {
+        return ANEURALNETWORKS_UNMAPPABLE;
+    }
+
+    std::vector<RunTimePoolInfo> requestPoolInfos;
     uint32_t count = mMemories.size();
-    runTimePoolInfos.resize(count);
+    requestPoolInfos.resize(count);
     for (uint32_t i = 0; i < count; i++) {
         const Memory* mem = mMemories[i];
-        runTimePoolInfos[i].set(mem->getHidlMemory());
+        if (!requestPoolInfos[i].set(mem->getHidlMemory())) {
+            return ANEURALNETWORKS_UNMAPPABLE;
+        }
     }
     // Create as many pools as there are input / output.
-    auto fixPointerArguments = [&runTimePoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) {
+    auto fixPointerArguments = [&requestPoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) {
         for (ModelArgumentInfo& argumentInfo : argumentInfos) {
             if (argumentInfo.state == ModelArgumentInfo::POINTER) {
                 RunTimePoolInfo runTimeInfo = {
                             .buffer = static_cast<uint8_t*>(argumentInfo.buffer)};
                 argumentInfo.locationAndLength.poolIndex =
-                            static_cast<uint32_t>(runTimePoolInfos.size());
+                            static_cast<uint32_t>(requestPoolInfos.size());
                 argumentInfo.locationAndLength.offset = 0;
-                runTimePoolInfos.push_back(runTimeInfo);
+                requestPoolInfos.push_back(runTimeInfo);
             }
         }
     };
@@ -651,7 +658,8 @@
 
     // TODO: should model be moved with a std::cref?
     std::thread thread(asyncStartComputeOnCpu, model, std::move(request),
-                       std::move(runTimePoolInfos), executionCallback);
+                       std::move(modelPoolInfos), std::move(requestPoolInfos),
+                       executionCallback);
     executionCallback->bind_thread(std::move(thread));
 
     *synchronizationCallback = executionCallback;
diff --git a/nn/runtime/ExecutionPlan.cpp b/nn/runtime/ExecutionPlan.cpp
index d2f74d5..009fc33 100644
--- a/nn/runtime/ExecutionPlan.cpp
+++ b/nn/runtime/ExecutionPlan.cpp
@@ -360,9 +360,11 @@
 void ExecutionStep::dump() const {
     Model model;
     mSubModel->setHidlModel(&model);
-    VLOG(COMPILATION) << "ExecutionStep#" << mIndex
-                      << " for " << (mDevice == nullptr ? "CPU" : mDevice->getName())
-                      << " submodel: " << toString(model);
+    if (VLOG_IS_ON(COMPILATION)) {
+        VLOG(COMPILATION) << "ExecutionStep#" << mIndex
+                          << " for " << (mDevice == nullptr ? "CPU" : mDevice->getName());
+        logModelToInfo(model);
+    }
 }
 
 int ExecutionPlan::CompoundBody::finish(const ModelBuilder* fromModel) {
@@ -750,8 +752,8 @@
     if (VLOG_IS_ON(COMPILATION)) {
         Model model;
         setHidlModel(&model);
-        VLOG(COMPILATION) << "ModelBuilder::partitionTheWork: original model: "
-                          << toString(model);
+        VLOG(COMPILATION) << "ModelBuilder::partitionTheWork: original model: ";
+        logModelToInfo(model);
         plan->dump();
     }
     return n;
diff --git a/nn/runtime/Memory.cpp b/nn/runtime/Memory.cpp
index 9b05dbf..5660e02 100644
--- a/nn/runtime/Memory.cpp
+++ b/nn/runtime/Memory.cpp
@@ -109,12 +109,14 @@
 }
 
 uint32_t MemoryTracker::add(const Memory* memory) {
+    VLOG(MODEL) << __func__ << " for " << memory;
     // See if we already have this memory. If so,
     // return its index.
     auto i = mKnown.find(memory);
     if (i != mKnown.end()) {
         return i->second;
     }
+    VLOG(MODEL) << "It's new";
     // It's a new one.  Save it an assign an index to it.
     size_t next = mKnown.size();
     if (next > 0xFFFFFFFF) {
diff --git a/nn/runtime/ModelBuilder.cpp b/nn/runtime/ModelBuilder.cpp
index 2274b89..f446bee 100644
--- a/nn/runtime/ModelBuilder.cpp
+++ b/nn/runtime/ModelBuilder.cpp
@@ -58,6 +58,7 @@
 }
 
 int ModelBuilder::setOperandValue(uint32_t index, const void* buffer, size_t length) {
+    VLOG(MODEL) << __func__ << " for operand " << index << " size " << length;
     if (index >= operandCount()) {
         LOG(ERROR) << "ANeuralNetworksModel_setOperandValue setting operand " << index << " of "
                    << operandCount();
@@ -76,25 +77,81 @@
                             .offset = 0,
                             .length = 0};
     } else {
+        if (length > 0xFFFFFFFF) {
+            LOG(ERROR) << "ANeuralNetworksModel_setOperandValue value length of " << length
+                       << " exceeds max size";
+            return ANEURALNETWORKS_BAD_DATA;
+        }
+        uint32_t valueLength = static_cast<uint32_t>(length);
         uint32_t neededLength = sizeOfData(operand.type, operand.dimensions);
-        if (neededLength != length) {
-            LOG(ERROR) << "ANeuralNetworksModel_setOperandValue setting " << length
+        if (neededLength != valueLength) {
+            LOG(ERROR) << "ANeuralNetworksModel_setOperandValue setting " << valueLength
                        << " bytes when needing " << neededLength;
             return ANEURALNETWORKS_BAD_DATA;
         }
-        uint32_t existingSize = static_cast<uint32_t>(mOperandValues.size());
-        uint32_t extraBytes = alignBytesNeeded(existingSize, length);
-        mOperandValues.resize(existingSize + extraBytes + length);
-        operand.lifetime = OperandLifeTime::CONSTANT_COPY;
-        operand.location = {
-                    .poolIndex = 0, .offset = existingSize + extraBytes, .length = neededLength};
-        memcpy(&mOperandValues[operand.location.offset], buffer, length);
+        if (valueLength <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) {
+            uint32_t existingSize = static_cast<uint32_t>(mSmallOperandValues.size());
+            uint32_t extraBytes = alignBytesNeeded(existingSize, valueLength);
+            mSmallOperandValues.resize(existingSize + extraBytes + valueLength);
+            operand.lifetime = OperandLifeTime::CONSTANT_COPY;
+            operand.location = {
+                .poolIndex = 0, .offset = existingSize + extraBytes, .length = neededLength};
+            memcpy(&mSmallOperandValues[operand.location.offset], buffer, valueLength);
+            VLOG(MODEL) << "Copied small value to offset " << operand.location.offset;
+        } else {
+            VLOG(MODEL) << "Saving large value";
+            operand.lifetime = OperandLifeTime::CONSTANT_REFERENCE;
+            // The values for poolIndex and offset will be set when the model is finished.
+            operand.location = {.poolIndex = 0, .offset = 0, .length = valueLength};
+            // We keep track of the buffers. We'll allocate the shared memory only
+            // once we know the total size, to avoid needless copies.
+            mLargeOperandValues.push_back(LargeValue{.operandIndex = index, .buffer = buffer});
+        }
+    }
+    return ANEURALNETWORKS_NO_ERROR;
+}
+
+int ModelBuilder::copyLargeValuesToSharedMemory() {
+    VLOG(MODEL) << __func__ << " has " << mLargeOperandValues.size() << " values.";
+    if (!mLargeOperandValues.empty()) {
+        // Calculate the size of the shared memory needed for all the large values.
+        // Also sets the offset for each value within the memory.
+        size_t poolSize = 0;
+        for (LargeValue& l: mLargeOperandValues) {
+            Operand& operand = mOperands[l.operandIndex];
+            nnAssert(operand.lifetime == OperandLifeTime::CONSTANT_REFERENCE);
+            poolSize += alignBytesNeeded(poolSize, operand.location.length);
+            operand.location.offset = poolSize;
+            poolSize += operand.location.length;
+        }
+
+        // Allocated the shared memory.
+        int n = mLargeValueMemory.create(poolSize);
+        if (n != ANEURALNETWORKS_NO_ERROR) {
+            return n;
+        }
+        uint8_t* memoryPointer = nullptr;
+        n = mLargeValueMemory.getPointer(&memoryPointer);
+        if (n != ANEURALNETWORKS_NO_ERROR) {
+            return n;
+        }
+        uint32_t poolIndex = mMemories.add(&mLargeValueMemory);
+        VLOG(MODEL) << "Allocated large value pool of size " << poolSize << " at index "
+                    << poolIndex;
+
+        // Copy the values to this memory.
+        for (LargeValue& l: mLargeOperandValues) {
+            Operand& operand = mOperands[l.operandIndex];
+            operand.location.poolIndex = poolIndex;
+            memcpy(memoryPointer + operand.location.offset, l.buffer, operand.location.length);
+        }
     }
     return ANEURALNETWORKS_NO_ERROR;
 }
 
 int ModelBuilder::setOperandValueFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
                                             size_t length) {
+    VLOG(MODEL) << __func__ << " for operand " << index << " offset " << offset << " size " << length;
     if (index >= operandCount()) {
         LOG(ERROR) << "ANeuralNetworksModel_setOperandValueFromMemory setting operand " << index
                    << " of " << operandCount();
@@ -223,8 +280,14 @@
         return ANEURALNETWORKS_BAD_STATE;
     }
 
+    int n = copyLargeValuesToSharedMemory();
+    if (n != ANEURALNETWORKS_NO_ERROR) {
+        return n;
+    }
+
     // We sort the operations so that they will be in the appropriate
     // order for a single-threaded, op at a time execution.
+    // TODO: we don't need this if we always run the partitioner.
     sortIntoRunOrder();
     mCompletedModel = true;
     return ANEURALNETWORKS_NO_ERROR;
@@ -282,7 +345,7 @@
     model->operations = mOperations;
     model->inputIndexes = mInputIndexes;
     model->outputIndexes = mOutputIndexes;
-    model->operandValues = mOperandValues;
+    model->operandValues = mSmallOperandValues;
 
     uint32_t count = mMemories.size();
     model->pools.resize(count);
diff --git a/nn/runtime/ModelBuilder.h b/nn/runtime/ModelBuilder.h
index edb6466..d5ab078 100644
--- a/nn/runtime/ModelBuilder.h
+++ b/nn/runtime/ModelBuilder.h
@@ -78,7 +78,7 @@
     const MemoryTracker& getMemories() const { return mMemories; }
     const std::vector<Operation>& getOperations() const { return mOperations; }
     const uint8_t* getPointerToOperandValue(uint32_t offset) const {
-        return mOperandValues.data() + offset;
+        return mSmallOperandValues.data() + offset;
     }
 
     int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices,
@@ -99,12 +99,9 @@
     // Sorts the operations to be in the correct order for single threaded
     // node-at-a-time execution.
     void sortIntoRunOrder();
-    /*
-    int32_t getOperandIndex(const ArrayInfo& info, uint32_t listIndex) const {
-        nnAssert(listIndex < info.count);
-        return mOperandIndexes[info.offset + listIndex];
-    }
-    */
+
+    // Copies the large values to a shared memory, if we have any.
+    int copyLargeValuesToSharedMemory();
 
     // The operations of the graph.
     std::vector<Operation> mOperations;
@@ -118,11 +115,18 @@
 
     MemoryTracker mMemories;
 
-    // The value of the operands that are defined at model
+    // The value of the small operands that are defined at model
     // creation time.
-    // TODO We are copying all the values.  Once we support memory
-    // pools, revisit.
-    std::vector<uint8_t> mOperandValues;
+    std::vector<uint8_t> mSmallOperandValues;
+
+    struct LargeValue {
+        uint32_t operandIndex;
+        const void* buffer;
+    };
+    // Operand index and buffer pointer for all the large operand values of this model.
+    std::vector<LargeValue> mLargeOperandValues;
+    // The shared memory region that will contain the large values.
+    Memory mLargeValueMemory;
 
     // Once the model has been finished, we should not allow further
     // modifications to the model.
diff --git a/nn/runtime/NeuralNetworks.cpp b/nn/runtime/NeuralNetworks.cpp
index 979ca7f..3766e3b 100644
--- a/nn/runtime/NeuralNetworks.cpp
+++ b/nn/runtime/NeuralNetworks.cpp
@@ -36,83 +36,86 @@
 // Make sure the constants defined in the header files have not changed values.
 // IMPORTANT: When adding new values, update kNumberOfDataTypes or kNumberOfDataTypesOEM
 // in Utils.h.
-static_assert(ANEURALNETWORKS_FLOAT32 == 0, "ANEURALNETWORKS_FLOAT32 may have changed");
-static_assert(ANEURALNETWORKS_INT32 == 1, "ANEURALNETWORKS_INT32 may have changed");
-static_assert(ANEURALNETWORKS_UINT32 == 2, "ANEURALNETWORKS_UINT32 may have changed");
+static_assert(ANEURALNETWORKS_FLOAT32 == 0, "ANEURALNETWORKS_FLOAT32 has changed");
+static_assert(ANEURALNETWORKS_INT32 == 1, "ANEURALNETWORKS_INT32 has changed");
+static_assert(ANEURALNETWORKS_UINT32 == 2, "ANEURALNETWORKS_UINT32 has changed");
 static_assert(ANEURALNETWORKS_TENSOR_FLOAT32 == 3,
-              "ANEURALNETWORKS_TENSOR_FLOAT32 may have changed");
-static_assert(ANEURALNETWORKS_TENSOR_INT32 == 4, "ANEURALNETWORKS_TENSOR_INT32 may have changed");
+              "ANEURALNETWORKS_TENSOR_FLOAT32 has changed");
+static_assert(ANEURALNETWORKS_TENSOR_INT32 == 4, "ANEURALNETWORKS_TENSOR_INT32 has changed");
 static_assert(ANEURALNETWORKS_TENSOR_QUANT8_ASYMM == 5,
-              "ANEURALNETWORKS_TENSOR_QUANT8_ASYMM may have changed");
-static_assert(ANEURALNETWORKS_OEM_SCALAR == 10000, "ANEURALNETWORKS_OEM_SCALAR may have changed");
+              "ANEURALNETWORKS_TENSOR_QUANT8_ASYMM has changed");
+static_assert(ANEURALNETWORKS_OEM_SCALAR == 10000, "ANEURALNETWORKS_OEM_SCALAR has changed");
 static_assert(ANEURALNETWORKS_TENSOR_OEM_BYTE == 10001,
-              "ANEURALNETWORKS_TENSOR_OEM_BYTE may have changed");
+              "ANEURALNETWORKS_TENSOR_OEM_BYTE has changed");
 
 // IMPORTANT: When adding new values, update kNumberOfOperationTypes or
 // kNumberOfOperationTypesOEMin Utils.h.
-static_assert(ANEURALNETWORKS_ADD == 0, "ANEURALNETWORKS_ADD may have changed");
+static_assert(ANEURALNETWORKS_ADD == 0, "ANEURALNETWORKS_ADD has changed");
 static_assert(ANEURALNETWORKS_AVERAGE_POOL_2D == 1,
-              "ANEURALNETWORKS_AVERAGE_POOL_2D may have changed");
-static_assert(ANEURALNETWORKS_CONCATENATION == 2, "ANEURALNETWORKS_CONCATENATION may have changed");
-static_assert(ANEURALNETWORKS_CONV_2D == 3, "ANEURALNETWORKS_CONV_2D may have changed");
+              "ANEURALNETWORKS_AVERAGE_POOL_2D has changed");
+static_assert(ANEURALNETWORKS_CONCATENATION == 2, "ANEURALNETWORKS_CONCATENATION has changed");
+static_assert(ANEURALNETWORKS_CONV_2D == 3, "ANEURALNETWORKS_CONV_2D has changed");
 static_assert(ANEURALNETWORKS_DEPTHWISE_CONV_2D == 4,
-              "ANEURALNETWORKS_DEPTHWISE_CONV_2D may have changed");
+              "ANEURALNETWORKS_DEPTHWISE_CONV_2D has changed");
 static_assert(ANEURALNETWORKS_DEPTH_TO_SPACE == 5,
-              "ANEURALNETWORKS_DEPTH_TO_SPACE may have changed");
-static_assert(ANEURALNETWORKS_DEQUANTIZE == 6, "ANEURALNETWORKS_DEQUANTIZE may have changed");
+              "ANEURALNETWORKS_DEPTH_TO_SPACE has changed");
+static_assert(ANEURALNETWORKS_DEQUANTIZE == 6, "ANEURALNETWORKS_DEQUANTIZE has changed");
 static_assert(ANEURALNETWORKS_EMBEDDING_LOOKUP == 7,
-              "ANEURALNETWORKS_EMBEDDING_LOOKUP may have changed");
-static_assert(ANEURALNETWORKS_FLOOR == 8, "ANEURALNETWORKS_FLOOR may have changed");
+              "ANEURALNETWORKS_EMBEDDING_LOOKUP has changed");
+static_assert(ANEURALNETWORKS_FLOOR == 8, "ANEURALNETWORKS_FLOOR has changed");
 static_assert(ANEURALNETWORKS_FULLY_CONNECTED == 9,
-              "ANEURALNETWORKS_FULLY_CONNECTED may have changed");
+              "ANEURALNETWORKS_FULLY_CONNECTED has changed");
 static_assert(ANEURALNETWORKS_HASHTABLE_LOOKUP == 10,
-              "ANEURALNETWORKS_HASHTABLE_LOOKUP may have changed");
+              "ANEURALNETWORKS_HASHTABLE_LOOKUP has changed");
 static_assert(ANEURALNETWORKS_L2_NORMALIZATION == 11,
-              "ANEURALNETWORKS_L2_NORMALIZATION may have changed");
-static_assert(ANEURALNETWORKS_L2_POOL_2D == 12, "ANEURALNETWORKS_L2_POOL may have changed");
+              "ANEURALNETWORKS_L2_NORMALIZATION has changed");
+static_assert(ANEURALNETWORKS_L2_POOL_2D == 12, "ANEURALNETWORKS_L2_POOL has changed");
 static_assert(ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION == 13,
-              "ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION may have changed");
-static_assert(ANEURALNETWORKS_LOGISTIC == 14, "ANEURALNETWORKS_LOGISTIC may have changed");
+              "ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION has changed");
+static_assert(ANEURALNETWORKS_LOGISTIC == 14, "ANEURALNETWORKS_LOGISTIC has changed");
 static_assert(ANEURALNETWORKS_LSH_PROJECTION == 15,
-              "ANEURALNETWORKS_LSH_PROJECTION may have changed");
-static_assert(ANEURALNETWORKS_LSTM == 16, "ANEURALNETWORKS_LSTM may have changed");
-static_assert(ANEURALNETWORKS_MAX_POOL_2D == 17, "ANEURALNETWORKS_MAX_POOL may have changed");
-static_assert(ANEURALNETWORKS_MUL == 18, "ANEURALNETWORKS_MUL may have changed");
-static_assert(ANEURALNETWORKS_RELU == 19, "ANEURALNETWORKS_RELU may have changed");
-static_assert(ANEURALNETWORKS_RELU1 == 20, "ANEURALNETWORKS_RELU1 may have changed");
-static_assert(ANEURALNETWORKS_RELU6 == 21, "ANEURALNETWORKS_RELU6 may have changed");
-static_assert(ANEURALNETWORKS_RESHAPE == 22, "ANEURALNETWORKS_RESHAPE may have changed");
+              "ANEURALNETWORKS_LSH_PROJECTION has changed");
+static_assert(ANEURALNETWORKS_LSTM == 16, "ANEURALNETWORKS_LSTM has changed");
+static_assert(ANEURALNETWORKS_MAX_POOL_2D == 17, "ANEURALNETWORKS_MAX_POOL has changed");
+static_assert(ANEURALNETWORKS_MUL == 18, "ANEURALNETWORKS_MUL has changed");
+static_assert(ANEURALNETWORKS_RELU == 19, "ANEURALNETWORKS_RELU has changed");
+static_assert(ANEURALNETWORKS_RELU1 == 20, "ANEURALNETWORKS_RELU1 has changed");
+static_assert(ANEURALNETWORKS_RELU6 == 21, "ANEURALNETWORKS_RELU6 has changed");
+static_assert(ANEURALNETWORKS_RESHAPE == 22, "ANEURALNETWORKS_RESHAPE has changed");
 static_assert(ANEURALNETWORKS_RESIZE_BILINEAR == 23,
-              "ANEURALNETWORKS_RESIZE_BILINEAR may have changed");
-static_assert(ANEURALNETWORKS_RNN == 24, "ANEURALNETWORKS_RNN may have changed");
-static_assert(ANEURALNETWORKS_SOFTMAX == 25, "ANEURALNETWORKS_SOFTMAX may have changed");
+              "ANEURALNETWORKS_RESIZE_BILINEAR has changed");
+static_assert(ANEURALNETWORKS_RNN == 24, "ANEURALNETWORKS_RNN has changed");
+static_assert(ANEURALNETWORKS_SOFTMAX == 25, "ANEURALNETWORKS_SOFTMAX has changed");
 static_assert(ANEURALNETWORKS_SPACE_TO_DEPTH == 26,
-              "ANEURALNETWORKS_SPACE_TO_DEPTH may have changed");
-static_assert(ANEURALNETWORKS_SVDF == 27, "ANEURALNETWORKS_SVDF may have changed");
-static_assert(ANEURALNETWORKS_TANH == 28, "ANEURALNETWORKS_TANH may have changed");
+              "ANEURALNETWORKS_SPACE_TO_DEPTH has changed");
+static_assert(ANEURALNETWORKS_SVDF == 27, "ANEURALNETWORKS_SVDF has changed");
+static_assert(ANEURALNETWORKS_TANH == 28, "ANEURALNETWORKS_TANH has changed");
 static_assert(ANEURALNETWORKS_OEM_OPERATION == 10000,
-              "ANEURALNETWORKS_OEM_OPERATION may have changed");
+              "ANEURALNETWORKS_OEM_OPERATION has changed");
 
-static_assert(ANEURALNETWORKS_FUSED_NONE == 0, "ANEURALNETWORKS_FUSED_NONE may have changed");
-static_assert(ANEURALNETWORKS_FUSED_RELU == 1, "ANEURALNETWORKS_FUSED_RELU may have changed");
-static_assert(ANEURALNETWORKS_FUSED_RELU1 == 2, "ANEURALNETWORKS_FUSED_RELU1 may have changed");
-static_assert(ANEURALNETWORKS_FUSED_RELU6 == 3, "ANEURALNETWORKS_FUSED_RELU6 may have changed");
+static_assert(ANEURALNETWORKS_FUSED_NONE == 0, "ANEURALNETWORKS_FUSED_NONE has changed");
+static_assert(ANEURALNETWORKS_FUSED_RELU == 1, "ANEURALNETWORKS_FUSED_RELU has changed");
+static_assert(ANEURALNETWORKS_FUSED_RELU1 == 2, "ANEURALNETWORKS_FUSED_RELU1 has changed");
+static_assert(ANEURALNETWORKS_FUSED_RELU6 == 3, "ANEURALNETWORKS_FUSED_RELU6 has changed");
 
 static_assert(ANEURALNETWORKS_PREFER_LOW_POWER == 0,
-              "ANEURALNETWORKS_PREFER_LOW_POWER may have changed");
+              "ANEURALNETWORKS_PREFER_LOW_POWER has changed");
 static_assert(ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER == 1,
-              "ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER may have changed");
+              "ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER has changed");
 static_assert(ANEURALNETWORKS_PREFER_SUSTAINED_SPEED == 2,
-              "ANEURALNETWORKS_PREFER_SUSTAINED_SPEED may have changed");
+              "ANEURALNETWORKS_PREFER_SUSTAINED_SPEED has changed");
 
-static_assert(ANEURALNETWORKS_NO_ERROR == 0, "ANEURALNETWORKS_NO_ERROR may have changed");
-static_assert(ANEURALNETWORKS_OUT_OF_MEMORY == 1, "ANEURALNETWORKS_OUT_OF_MEMORY may have changed");
-static_assert(ANEURALNETWORKS_INCOMPLETE == 2, "ANEURALNETWORKS_INCOMPLETE may have changed");
+static_assert(ANEURALNETWORKS_NO_ERROR == 0, "ANEURALNETWORKS_NO_ERROR has changed");
+static_assert(ANEURALNETWORKS_OUT_OF_MEMORY == 1, "ANEURALNETWORKS_OUT_OF_MEMORY has changed");
+static_assert(ANEURALNETWORKS_INCOMPLETE == 2, "ANEURALNETWORKS_INCOMPLETE has changed");
 static_assert(ANEURALNETWORKS_UNEXPECTED_NULL == 3,
-              "ANEURALNETWORKS_UNEXPECTED_NULL may have changed");
-static_assert(ANEURALNETWORKS_BAD_DATA == 4, "ANEURALNETWORKS_BAD_DATA may have changed");
-static_assert(ANEURALNETWORKS_OP_FAILED == 5, "ANEURALNETWORKS_OP_FAILED may have changed");
-static_assert(ANEURALNETWORKS_BAD_STATE == 6, "ANEURALNETWORKS_BAD_STATE may have changed");
+              "ANEURALNETWORKS_UNEXPECTED_NULL has changed");
+static_assert(ANEURALNETWORKS_BAD_DATA == 4, "ANEURALNETWORKS_BAD_DATA has changed");
+static_assert(ANEURALNETWORKS_OP_FAILED == 5, "ANEURALNETWORKS_OP_FAILED has changed");
+static_assert(ANEURALNETWORKS_BAD_STATE == 6, "ANEURALNETWORKS_BAD_STATE has changed");
+
+static_assert(ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES == 128,
+              "ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES has changed");
 
 // Make sure that the constants are compatible with the values defined in
 // hardware/interfaces/neuralnetworks/1.0/types.hal.
diff --git a/nn/runtime/include/NeuralNetworks.h b/nn/runtime/include/NeuralNetworks.h
index 3d93ef0..beaf6be 100644
--- a/nn/runtime/include/NeuralNetworks.h
+++ b/nn/runtime/include/NeuralNetworks.h
@@ -154,8 +154,10 @@
      * * 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
      * * 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
      * * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
-     * * 5: An INT32 value, specifying the output stride in the ‘width’ dimension.
-     * * 6: An INT32 value, specifying the output stride in the ‘height’ dimension.
+     * * 5: An INT32 value, specifying the stride when walking through input
+     *      in the ‘width’ dimension.
+     * * 6: An INT32 value, specifying the stride when walking through input
+     *      in the ‘height’ dimension.
      * * 7: An INT32 value, specifying the filter width.
      * * 8: An INT32 value, specifying the filter height.
      * * 9: An INT32 value, and has to be one of the {@link FuseCode} values.
@@ -165,8 +167,10 @@
      * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
      * * 1: An INT32 value, specifying the implicit padding scheme, has to be one of the
      *      {@link PaddingCode} values.
-     * * 2: An INT32 value, specifying the output stride in the ‘width’ dimension.
-     * * 3: An INT32 value, specifying the output stride in the ‘height’ dimension.
+     * * 2: An INT32 value, specifying the stride when walking through input
+     *      in the ‘width’ dimension.
+     * * 3: An INT32 value, specifying the stride when walking through input
+     *      in the ‘height’ dimension.
      * * 4: An INT32 value, specifying the filter width.
      * * 5: An INT32 value, specifying the filter height.
      * * 6: An INT32 value, and has to be one of the {@link FuseCode} values.
@@ -238,8 +242,10 @@
      * * 4: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
      * * 5: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
      * * 6: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
-     * * 7: An INT32 value, specifying the output stride in the ‘width’ dimension.
-     * * 8: An INT32 value, specifying the output stride in the ‘height’ dimension.
+     * * 7: An INT32 value, specifying the stride when walking through input
+     *      in the ‘width’ dimension.
+     * * 8: An INT32 value, specifying the stride when walking through input
+     *      in the ‘height’ dimension.
      * * 9: An INT32 value, and has to be one of the {@link FuseCode} values.
      *      Specifies the activation to invoke on the result of each addition.
      *
@@ -255,8 +261,10 @@
      *      bias_scale == input_scale * filter_scale.
      * * 3: An INT32 value, specifying the implicit padding scheme, has to be one of the
      *      {@link PaddingCode} values.
-     * * 4: An INT32 value, specifying the output stride in the ‘width’ dimension.
-     * * 5: An INT32 value, specifying the output stride in the ‘height’ dimension.
+     * * 4: An INT32 value, specifying the stride when walking through input
+     *      in the ‘width’ dimension.
+     * * 5: An INT32 value, specifying the stride when walking through input
+     *      in the ‘height’ dimension.
      * * 6: An INT32 value, and has to be one of the {@link FuseCode} values.
      *      Specifies the activation to invoke on the result of each addition.
      *
@@ -308,8 +316,10 @@
      * * 4: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
      * * 5: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
      * * 6: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
-     * * 7: An INT32 value, specifying the output stride in the ‘width’ dimension.
-     * * 8: An INT32 value, specifying the output stride in the ‘height’ dimension.
+     * * 7: An INT32 value, specifying the stride when walking through input
+     *      in the ‘width’ dimension.
+     * * 8: An INT32 value, specifying the stride when walking through input
+     *      in the ‘height’ dimension.
      * * 9: An INT32 value, specifying the depthwise multiplier.
      * * 10: An INT32 value, and has to be one of the {@link FuseCode} values.
      *       Specifies the activation to invoke on the result of each addition.
@@ -326,8 +336,10 @@
      *      bias_scale == input_scale * filter_scale.
      * * 3: An INT32 value, specifying the implicit padding scheme, has to be one of the
      *      {@link PaddingCode} values.
-     * * 4: An INT32 value, specifying the output stride in the ‘width’ dimension.
-     * * 5: An INT32 value, specifying the output stride in the ‘height’ dimension.
+     * * 4: An INT32 value, specifying the stride when walking through input
+     *      in the ‘width’ dimension.
+     * * 5: An INT32 value, specifying the stride when walking through input
+     *      in the ‘height’ dimension.
      * * 6: An INT32 value, specifying the depthwise multiplier.
      * * 7: An INT32 value, and has to be one of the {@link FuseCode} values.
      *       Specifies the activation to invoke on the result of each addition.
@@ -561,8 +573,10 @@
      * * 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
      * * 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
      * * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
-     * * 5: An INT32 value, specifying the output stride in the ‘width’ dimension.
-     * * 6: An INT32 value, specifying the output stride in the ‘height’ dimension.
+     * * 5: An INT32 value, specifying the stride when walking through input
+     *      in the ‘width’ dimension.
+     * * 6: An INT32 value, specifying the stride when walking through input
+     *      in the ‘height’ dimension.
      * * 7: An INT32 value, specifying the filter width.
      * * 8: An INT32 value, specifying the filter height.
      * * 9: An INT32 value, and has to be one of the {@link FuseCode} values.
@@ -572,8 +586,10 @@
      * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
      * * 1: An INT32 value, specifying the implicit padding scheme, has to be one of the
      *      {@link PaddingCode} values.
-     * * 2: An INT32 value, specifying the output stride in the ‘width’ dimension.
-     * * 3: An INT32 value, specifying the output stride in the ‘height’ dimension.
+     * * 2: An INT32 value, specifying the stride when walking through input
+     *      in the ‘width’ dimension.
+     * * 3: An INT32 value, specifying the stride when walking through input
+     *      in the ‘height’ dimension.
      * * 4: An INT32 value, specifying the filter width.
      * * 5: An INT32 value, specifying the filter height.
      * * 6: An INT32 value, and has to be one of the {@link FuseCode} values.
@@ -794,8 +810,10 @@
      * * 2: An INT32 value, specifying the padding on the right,in the ‘width’ dimension.
      * * 3: An INT32 value, specifying the padding on the top, in the ‘height’ dimension.
      * * 4: An INT32 value, specifying the padding on the bottom, in the ‘height’ dimension.
-     * * 5: An INT32 value, specifying the output stride in the ‘width’ dimension.
-     * * 6: An INT32 value, specifying the output stride in the ‘height’ dimension.
+     * * 5: An INT32 value, specifying the stride when walking through input
+     *      in the ‘width’ dimension.
+     * * 6: An INT32 value, specifying the stride when walking through input
+     *      in the ‘height’ dimension.
      * * 7: An INT32 value, specifying the filter width.
      * * 8: An INT32 value, specifying the filter height.
      * * 9: An INT32 value, and has to be one of the {@link FuseCode} values.
@@ -805,8 +823,10 @@
      * * 0: A 4-D tensor, of shape [batches, height, width, depth], specifying the input.
      * * 1: An INT32 value, specifying the implicit padding scheme, has to be one of the
      *      {@link PaddingCode} values.
-     * * 2: An INT32 value, specifying the output stride in the ‘width’ dimension.
-     * * 3: An INT32 value, specifying the output stride in the ‘height’ dimension.
+     * * 2: An INT32 value, specifying the stride when walking through input
+     *      in the ‘width’ dimension.
+     * * 3: An INT32 value, specifying the stride when walking through input
+     *      in the ‘height’ dimension.
      * * 4: An INT32 value, specifying the filter width.
      * * 5: An INT32 value, specifying the filter height.
      * * 6: An INT32 value, and has to be one of the {@link FuseCode} values.
@@ -1073,7 +1093,8 @@
      *
      * Specifically, for rank 1, this layer implements the operation:
      *
-     *    memory = push(conv1d(inputs, weights_feature, feature_dim, "VALID"));
+     *    memory = push(conv1d(inputs, weights_feature, feature_dim,
+     *                  "ANEURALNETWORKS_PADDING_VALID"));
      *    outputs = activation(memory * weights_time + bias);
      *
      * Where:
@@ -1163,9 +1184,30 @@
  *
  */
 typedef enum {
-    /** SAME padding. */
+    /**
+     * SAME padding.
+     * Padding on both ends are the "same":
+     *     padding_to_beginning =  total_padding / 2
+     *     padding_to_end       = (total_padding + 1)/2.
+     * i.e., for even number of padding, padding to both ends are exactly
+     * the same; for odd number of padding, padding to the ending is bigger
+     * than the padding to the beginning by 1.
+     *
+     * total_padding is a function of input, stride and filter size.
+     * It could be computed as follows:
+     *    out_size = (input + stride - 1) / stride;
+     *    needed_input = (out_size - 1) * stride + filter_size
+     *    total_padding = max(0, needed_input - output_size)
+     *  The computation is the same for the horizontal and vertical directions.
+     */
     ANEURALNETWORKS_PADDING_SAME = 1,
-    /** VALID padding. */
+
+    /**
+     * VALID padding.
+     * No padding. When the input size is not evenly divisible by
+     * the filter size, the input at the end that could not fill
+     * the whole filter tile will simply be ignored.
+     */
     ANEURALNETWORKS_PADDING_VALID = 2,
 } PaddingCode;
 
@@ -1205,6 +1247,15 @@
 } ResultCode;
 
 /**
+ * For {@link ANeuralNetworksModel_setOperandValue}, values with a
+ * length smaller or equal to this will be immediately copied into
+ * the model. The size is in bytes.
+ */
+enum {
+    ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES = 128
+};
+
+/**
  * ANeuralNetworksMemory is an opaque type that represents memory.
  *
  * This type is used to represent shared memory, memory mapped files,
@@ -1468,13 +1519,18 @@
 /**
  * Sets an operand to a constant value.
  *
- * For scalar values, the content of buffer is copied into the model.
+ * Values of length smaller or equal to
+ * {@link ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES}
+ * are immediately copied into the model.
  *
- * For tensor values, a pointer to the buffer is stored within the model.
- * The application is responsible for not changing the content of this region
- * until all executions using this model have completed. As the data may
- * be copied during processing, modifying the data after this call yields
- * undefined results.
+ * For values of length greater than {@link ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES},
+ * a pointer to the buffer is stored within the model. The application is responsible
+ * for not changing the content of this region until all executions using this model
+ * have completed. As the data may be copied during processing, modifying the data
+ * after this call yields undefined results.
+ *
+ * For large tensors, using {@link ANeuralNetworksModel_setOperandValueFromMemory}
+ * is likely to be more efficient.
  *
  * To indicate that an optional operand should be considered missing,
  * pass nullptr for buffer and 0 for length.