Merge "Replace parentheses with curly braces to fix vector doc bug."
diff --git a/cpp/rsDispatch.cpp b/cpp/rsDispatch.cpp
index f09f539..10adcf2 100644
--- a/cpp/rsDispatch.cpp
+++ b/cpp/rsDispatch.cpp
@@ -411,6 +411,11 @@
             LOG_API("Couldn't initialize dispatchTab.Allocation3DRead");
             return false;
         }
+        dispatchTab.ScriptForEachMulti = (ScriptForEachMultiFnPtr)dlsym(handle, "rsScriptForEachMulti");
+        if (dispatchTab.ScriptForEachMulti == NULL) {
+            LOG_API("Couldn't initialize dispatchTab.ScriptForEachMulti");
+            return false;
+        }
     }
 
     return true;
diff --git a/cpp/rsDispatch.h b/cpp/rsDispatch.h
index d77a4d9..740457a 100644
--- a/cpp/rsDispatch.h
+++ b/cpp/rsDispatch.h
@@ -93,6 +93,7 @@
 typedef void (*ScriptGroupSetOutputFnPtr) (RsContext, RsScriptGroup, RsScriptKernelID, RsAllocation);
 typedef void (*ScriptGroupSetInputFnPtr) (RsContext, RsScriptGroup, RsScriptKernelID, RsAllocation);
 typedef void (*ScriptGroupExecuteFnPtr) (RsContext, RsScriptGroup);
+typedef void (*ScriptForEachMultiFnPtr) (RsContext, RsScript, uint32_t, RsAllocation *, size_t, RsAllocation, const void *, size_t, const RsScriptCall *, size_t);
 typedef void (*AllocationIoSendFnPtr) (RsContext, RsAllocation);
 typedef void (*AllocationIoReceiveFnPtr) (RsContext, RsAllocation);
 typedef void * (*AllocationGetPointerFnPtr) (RsContext, RsAllocation, uint32_t lod, RsAllocationCubemapFace face, uint32_t z, uint32_t array, size_t *stride, size_t stride_len);
@@ -176,6 +177,7 @@
     ScriptGroupSetOutputFnPtr ScriptGroupSetOutput;
     ScriptGroupSetInputFnPtr ScriptGroupSetInput;
     ScriptGroupExecuteFnPtr ScriptGroupExecute;
+    ScriptForEachMultiFnPtr ScriptForEachMulti;
     AllocationIoSendFnPtr AllocationIoSend;
     AllocationIoReceiveFnPtr AllocationIoReceive;
     AllocationGetPointerFnPtr AllocationGetPointer;
diff --git a/cpu_ref/rsCpuCore.cpp b/cpu_ref/rsCpuCore.cpp
index 5fe7801..911a0f0 100644
--- a/cpu_ref/rsCpuCore.cpp
+++ b/cpu_ref/rsCpuCore.cpp
@@ -646,11 +646,9 @@
     case RS_SCRIPT_INTRINSIC_ID_RESIZE:
         i = rsdIntrinsic_Resize(this, s, e);
         break;
-#if !defined(RS_COMPATIBILITY_LIB)
     case RS_SCRIPT_INTRINSIC_ID_BLAS:
         i = rsdIntrinsic_BLAS(this, s, e);
         break;
-#endif
 
     default:
         rsAssert(0);
diff --git a/cpu_ref/rsCpuExecutable.cpp b/cpu_ref/rsCpuExecutable.cpp
index e2c27b5..867a2cd 100644
--- a/cpu_ref/rsCpuExecutable.cpp
+++ b/cpu_ref/rsCpuExecutable.cpp
@@ -20,27 +20,6 @@
 
 namespace {
 
-// Create a len length string containing random characters from [A-Za-z0-9].
-static std::string getRandomString(size_t len) {
-    char buf[len + 1];
-    for (size_t i = 0; i < len; i++) {
-        uint32_t r = arc4random() & 0xffff;
-        r %= 62;
-        if (r < 26) {
-            // lowercase
-            buf[i] = 'a' + r;
-        } else if (r < 52) {
-            // uppercase
-            buf[i] = 'A' + (r - 26);
-        } else {
-            // Use a number
-            buf[i] = '0' + (r - 52);
-        }
-    }
-    buf[len] = '\0';
-    return std::string(buf);
-}
-
 // Check if a path exists and attempt to create it if it doesn't.
 static bool ensureCacheDirExists(const char *path) {
     if (access(path, R_OK | W_OK | X_OK) == 0) {
@@ -151,7 +130,8 @@
 
 void* SharedLibraryUtils::loadSharedLibrary(const char *cacheDir,
                                             const char *resName,
-                                            const char *nativeLibDir) {
+                                            const char *nativeLibDir,
+                                            bool* alreadyLoaded) {
     void *loaded = nullptr;
 
 #if defined(RS_COMPATIBILITY_LIB) && defined(__LP64__)
@@ -162,7 +142,7 @@
 
     // We should check if we can load the library from the standard app
     // location for shared libraries first.
-    loaded = loadSOHelper(scriptSOName.c_str(), cacheDir, resName);
+    loaded = loadSOHelper(scriptSOName.c_str(), cacheDir, resName, alreadyLoaded);
 
     if (loaded == nullptr) {
         ALOGE("Unable to open shared library (%s): %s",
@@ -189,8 +169,28 @@
     return loaded;
 }
 
+String8 SharedLibraryUtils::getRandomString(size_t len) {
+    char buf[len + 1];
+    for (size_t i = 0; i < len; i++) {
+        uint32_t r = arc4random() & 0xffff;
+        r %= 62;
+        if (r < 26) {
+            // lowercase
+            buf[i] = 'a' + r;
+        } else if (r < 52) {
+            // uppercase
+            buf[i] = 'A' + (r - 26);
+        } else {
+            // Use a number
+            buf[i] = '0' + (r - 52);
+        }
+    }
+    buf[len] = '\0';
+    return String8(buf);
+}
+
 void* SharedLibraryUtils::loadSOHelper(const char *origName, const char *cacheDir,
-                                       const char *resName) {
+                                       const char *resName, bool *alreadyLoaded) {
     // Keep track of which .so libraries have been loaded. Once a library is
     // in the set (per-process granularity), we must instead make a copy of
     // the original shared object (randomly named .so file) and load that one
@@ -208,6 +208,9 @@
 
     // Common path is that we have not loaded this Script/library before.
     if (LoadedLibraries.find(origName) == LoadedLibraries.end()) {
+        if (alreadyLoaded != nullptr) {
+            *alreadyLoaded = false;
+        }
         loaded = dlopen(origName, RTLD_NOW | RTLD_LOCAL);
         if (loaded) {
             LoadedLibraries.insert(origName);
@@ -215,6 +218,10 @@
         return loaded;
     }
 
+    if (alreadyLoaded != nullptr) {
+        *alreadyLoaded = true;
+    }
+
     std::string newName(cacheDir);
 
     // Append RS_CACHE_DIR only if it is not found in cacheDir
@@ -234,7 +241,7 @@
     newName.append("librs.");
     newName.append(resName);
     newName.append("#");
-    newName.append(getRandomString(6));  // 62^6 potential filename variants.
+    newName.append(getRandomString(6).string());  // 62^6 potential filename variants.
     newName.append(".so");
 
     int r = copyFile(newName.c_str(), origName);
diff --git a/cpu_ref/rsCpuExecutable.h b/cpu_ref/rsCpuExecutable.h
index 0464dac..6880970 100644
--- a/cpu_ref/rsCpuExecutable.h
+++ b/cpu_ref/rsCpuExecutable.h
@@ -43,14 +43,18 @@
     // For 64bit RS Support Lib, the shared lib path cannot be constructed from
     // cacheDir, so nativeLibDir is needed to load shared libs.
     static void* loadSharedLibrary(const char *cacheDir, const char *resName,
-                                   const char *nativeLibDir = nullptr);
+                                   const char *nativeLibDir = nullptr,
+                                   bool *alreadyLoaded = nullptr);
+
+    // Create a len length string containing random characters from [A-Za-z0-9].
+    static String8 getRandomString(size_t len);
 
 private:
     // Attempt to load the shared library from origName, but then fall back to
     // creating a copy of the shared library if necessary (to ensure instancing).
     // This function returns the dlopen()-ed handle if successful.
     static void *loadSOHelper(const char *origName, const char *cacheDir,
-                              const char *resName);
+                              const char *resName, bool* alreadyLoaded = nullptr);
 
     static const char* LD_EXE_PATH;
     static const char* RS_CACHE_DIR;
diff --git a/cpu_ref/rsCpuIntrinsicBLAS.cpp b/cpu_ref/rsCpuIntrinsicBLAS.cpp
index 3034454..a7705b4 100644
--- a/cpu_ref/rsCpuIntrinsicBLAS.cpp
+++ b/cpu_ref/rsCpuIntrinsicBLAS.cpp
@@ -47,10 +47,10 @@
     uint8_t c_offset = 0;
 
     static void kernelBNNM(size_t m, size_t n, size_t k,
-                           const uint8_t* a, uint32_t a_offset, size_t lda,
-                           const uint8_t* b, uint32_t b_offset, size_t ldb,
-                           uint8_t* c, uint32_t c_offset, size_t ldc,
-                           uint32_t c_mult_int);
+                           const uint8_t* a, uint8_t a_offset, size_t lda,
+                           const uint8_t* b, uint8_t b_offset, size_t ldb,
+                           uint8_t* c, int32_t c_offset, size_t ldc,
+                           int32_t c_mult_int);
 
 
 
@@ -653,10 +653,10 @@
 }
 
 void RsdCpuScriptIntrinsicBLAS::kernelBNNM(size_t m, size_t n, size_t k,
-                                           const uint8_t* a, uint32_t a_offset, size_t lda,
-                                           const uint8_t* b, uint32_t b_offset, size_t ldb,
-                                           uint8_t* c, uint32_t c_offset, size_t ldc,
-                                           uint32_t c_mult_int) {
+                                           const uint8_t* a, uint8_t a_offset, size_t lda,
+                                           const uint8_t* b, uint8_t b_offset, size_t ldb,
+                                           uint8_t* c, int32_t c_offset, size_t ldc,
+                                           int32_t c_mult_int) {
     // Calculations are done in 1.10.21 fixed-point format for the final output,
     // just before there's a shift down to drop the fractional parts. The output
     // values are gated to 0 to 255 to fit in a byte, but the 10-bit format
diff --git a/cpu_ref/rsCpuIntrinsicBlend.cpp b/cpu_ref/rsCpuIntrinsicBlend.cpp
index d546bbd..ca40c5e 100644
--- a/cpu_ref/rsCpuIntrinsicBlend.cpp
+++ b/cpu_ref/rsCpuIntrinsicBlend.cpp
@@ -121,7 +121,9 @@
     uint32_t x2 = xend;
 
 #if defined(ARCH_ARM_USE_INTRINSICS) && !defined(ARCH_ARM64_USE_INTRINSICS)
-    if (gArchUseSIMD) {
+    // Bug: 22047392 - Skip optimized version for BLEND_DST_ATOP until this
+    // been fixed.
+    if (gArchUseSIMD && info->slot != BLEND_DST_ATOP) {
         if (rsdIntrinsicBlend_K(out, in, info->slot, x1, x2) >= 0)
             return;
     }
@@ -271,6 +273,9 @@
         }
         break;
     case BLEND_DST_ATOP:
+    // Bug: 22047392 - We need to make sure that "out->w = in->w;" in all
+    // accelerated versions before re-enabling optimizations.
+    #if false  // Bug: 22047392
     #if defined(ARCH_X86_HAVE_SSSE3)
         if (gArchUseSIMD) {
             if ((x1 + 8) < x2) {
@@ -282,11 +287,13 @@
             }
         }
      #endif
+     #endif  // false for Bug: 22047392
         for (;x1 < x2; x1++, out++, in++) {
             short4 in_s = convert_short4(*in);
             short4 out_s = convert_short4(*out);
             out_s.xyz = ((out_s.xyz * in_s.w) +
               (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
+            out_s.w = in_s.w;
             *out = convert_uchar4(out_s);
         }
         break;
diff --git a/cpu_ref/rsCpuScript.cpp b/cpu_ref/rsCpuScript.cpp
index b95d8f7..1909e13 100644
--- a/cpu_ref/rsCpuScript.cpp
+++ b/cpu_ref/rsCpuScript.cpp
@@ -862,10 +862,8 @@
 }
 
 RsdCpuScriptImpl::~RsdCpuScriptImpl() {
-    if (mScriptExec != nullptr) {
-        delete mScriptExec;
-    }
-    if (mBoundAllocs) delete[] mBoundAllocs;
+    delete mScriptExec;
+    delete[] mBoundAllocs;
     if (mScriptSO) {
         dlclose(mScriptSO);
     }
diff --git a/cpu_ref/rsCpuScriptGroup2.cpp b/cpu_ref/rsCpuScriptGroup2.cpp
index 8923c65..bf01403 100644
--- a/cpu_ref/rsCpuScriptGroup2.cpp
+++ b/cpu_ref/rsCpuScriptGroup2.cpp
@@ -156,6 +156,7 @@
     mExecutable(nullptr), mScriptObj(nullptr) {
     rsAssert(!mGroup->mClosures.empty());
 
+    mCpuRefImpl->lockMutex();
     Batch* batch = new Batch(this, "Batch0");
     int i = 0;
     for (Closure* closure: mGroup->mClosures) {
@@ -192,6 +193,7 @@
         }
     }
 #endif  // RS_COMPATIBILITY_LIB
+    mCpuRefImpl->unlockMutex();
 }
 
 void Batch::resolveFuncPtr(void* sharedObj) {
@@ -279,6 +281,9 @@
     }
     args->push_back("-output_path");
     args->push_back(outputDir);
+
+    // The output filename has to be the last, in case we need to pop it out and
+    // replace with a different name.
     args->push_back("-o");
     args->push_back(outputFileName);
 }
@@ -396,15 +401,41 @@
     // Try to load a shared lib from code cache matching filename and checksum
     //===--------------------------------------------------------------------===//
 
-    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
+    bool alreadyLoaded = false;
+    std::string cloneName;
+
+    mScriptObj = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nullptr,
+                                                       &alreadyLoaded);
     if (mScriptObj != nullptr) {
+        // A shared library named resName is found in code cache directory
+        // cacheDir, and loaded with the handle stored in mScriptObj.
+
         mExecutable = ScriptExecutable::createFromSharedObject(
             getCpuRefImpl()->getContext(), mScriptObj, checksum);
+
         if (mExecutable != nullptr) {
+            // The loaded shared library in mScriptObj has a matching checksum.
+            // An executable object has been created.
             return;
-        } else {
-            ALOGE("Failed to create an executable object from so file");
         }
+
+        ALOGV("Failed to create an executable object from so file due to "
+              "mismatching checksum");
+
+        if (alreadyLoaded) {
+            // The shared object found in code cache has already been loaded.
+            // A different file name is needed for the new shared library, to
+            // avoid corrupting the currently loaded instance.
+
+            cloneName.append(resName);
+            cloneName.append("#");
+            cloneName.append(SharedLibraryUtils::getRandomString(6).string());
+
+            // The last element in arguments is the output filename.
+            arguments.pop_back();
+            arguments.push_back(cloneName.c_str());
+        }
+
         dlclose(mScriptObj);
         mScriptObj = nullptr;
     }
@@ -443,6 +474,16 @@
         return;
     }
 
+    if (alreadyLoaded) {
+        // Delete the temporary, random-named file that we created to avoid
+        // interfering with an already loaded shared library.
+        string cloneFilePath(cacheDir);
+        cloneFilePath.append("/");
+        cloneFilePath.append(cloneName.c_str());
+        cloneFilePath.append(".so");
+        unlink(cloneFilePath.c_str());
+    }
+
     mExecutable = ScriptExecutable::createFromSharedObject(
         getCpuRefImpl()->getContext(),
         mScriptObj);
diff --git a/rsDefines.h b/rsDefines.h
index 839520f..6da672f 100644
--- a/rsDefines.h
+++ b/rsDefines.h
@@ -458,10 +458,10 @@
     int incY;
     int KL;
     int KU;
-    uint32_t a_offset;
-    uint32_t b_offset;
-    uint32_t c_offset;
-    uint32_t c_mult_int;
+    uint8_t a_offset;
+    uint8_t b_offset;
+    int32_t c_offset;
+    int32_t c_mult_int;
 } RsBlasCall;
 
 enum RsGlobalProperty {
diff --git a/rsFileA3D.cpp b/rsFileA3D.cpp
index a81d0f9..a7b1e27 100644
--- a/rsFileA3D.cpp
+++ b/rsFileA3D.cpp
@@ -54,7 +54,7 @@
         delete mWriteStream;
     }
     if (mReadStream) {
-        delete mWriteStream;
+        delete mReadStream;
     }
     if (mAlloc) {
         free(mAlloc);