Snap for 9227789 from 6b9ff13286194c7a2b38d624eeee38dc35987dc8 to udc-release

Change-Id: I9776d6d4ec1ba0907abcca920f2d89878ab5e15f
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b7c86ba..6a25d5b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -113,6 +113,17 @@
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__")
 endif()
 
+# Set a module's COMPILE_FLAGS if using gcc or clang.
+macro(set_gnulike_module_compile_flags flags)
+    if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
+        SET_SOURCE_FILES_PROPERTIES(
+            ${${MODULE_NAME}_SOURCES}
+            PROPERTIES
+            COMPILE_FLAGS ${flags}
+        )
+    endif()
+endmacro(set_gnulike_module_compile_flags)
+
 if(MSVC)
     # Don't warn when using standard non-secure functions.
     add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
diff --git a/METADATA b/METADATA
index 3f60212..235bc5a 100644
--- a/METADATA
+++ b/METADATA
@@ -1,13 +1,19 @@
-name: "OpenCL-CTS"
-description:
-    "OpenCL Conformance Tests"
+# This project was upgraded with external_updater.
+# Usage: tools/external_updater/updater.sh update OpenCL-CTS
+# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md
 
+name: "OpenCL-CTS"
+description: "OpenCL Conformance Tests"
 third_party {
   url {
     type: GIT
     value: "https://github.com/KhronosGroup/OpenCL-CTS.git"
   }
-  version: "a87e686757f9fda5377baf73a32bb3c791eae70c"
-  last_upgrade_date { year: 2022 month: 9 day: 18 }
+  version: "90a5183ec499d5b4701f58f6134dd424d82c4dca"
   license_type: NOTICE
+  last_upgrade_date {
+    year: 2022
+    month: 10
+    day: 26
+  }
 }
diff --git a/test_common/harness/conversions.cpp b/test_common/harness/conversions.cpp
index c773126..d52a2ac 100644
--- a/test_common/harness/conversions.cpp
+++ b/test_common/harness/conversions.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 #include "conversions.h"
+#include <cinttypes>
 #include <limits.h>
 #include <time.h>
 #include <assert.h>
@@ -50,10 +51,10 @@
         case kInt: sprintf(string, "%d", *((cl_int *)data)); return;
         case kUInt:
         case kUnsignedInt: sprintf(string, "%u", *((cl_uint *)data)); return;
-        case kLong: sprintf(string, "%lld", *((cl_long *)data)); return;
+        case kLong: sprintf(string, "%" PRId64 "", *((cl_long *)data)); return;
         case kULong:
         case kUnsignedLong:
-            sprintf(string, "%llu", *((cl_ulong *)data));
+            sprintf(string, "%" PRIu64 "", *((cl_ulong *)data));
             return;
         case kFloat: sprintf(string, "%f", *((cl_float *)data)); return;
         case kHalf: sprintf(string, "half"); return;
diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp
index 3dbdffa..f1694e8 100644
--- a/test_common/harness/imageHelpers.cpp
+++ b/test_common/harness/imageHelpers.cpp
@@ -23,6 +23,7 @@
 #include <malloc.h>
 #endif
 #include <algorithm>
+#include <cinttypes>
 #include <iterator>
 #if !defined(_WIN32)
 #include <cmath>
@@ -421,7 +422,7 @@
               (int)thirdDim, (int)imageInfo->rowPitch,
               (int)imageInfo->rowPitch
                   - (int)imageInfo->width * (int)pixel_size);
-    log_error("Failed at column: %ld   ", where);
+    log_error("Failed at column: %zu   ", where);
 
     switch (pixel_size)
     {
@@ -454,7 +455,7 @@
                 ((cl_ushort *)destPixel)[1], ((cl_ushort *)destPixel)[2]);
             break;
         case 8:
-            log_error("*0x%16.16llx vs. 0x%16.16llx\n",
+            log_error("*0x%16.16" PRIx64 " vs. 0x%16.16" PRIx64 "\n",
                       ((cl_ulong *)sourcePixel)[0], ((cl_ulong *)destPixel)[0]);
             break;
         case 12:
@@ -473,7 +474,7 @@
                       ((cl_uint *)destPixel)[2], ((cl_uint *)destPixel)[3]);
             break;
         default:
-            log_error("Don't know how to print pixel size of %ld\n",
+            log_error("Don't know how to print pixel size of %zu\n",
                       pixel_size);
             break;
     }
diff --git a/test_common/harness/mt19937.h b/test_common/harness/mt19937.h
index 98eec84..447ca25 100644
--- a/test_common/harness/mt19937.h
+++ b/test_common/harness/mt19937.h
@@ -94,23 +94,42 @@
 bool genrand_bool(MTdata /*data*/);
 
 #include <cassert>
+#include <utility>
 
-struct MTdataHolder
-{
-    MTdataHolder(cl_uint seed)
+class MTdataHolder {
+public:
+    MTdataHolder() = default;
+    explicit MTdataHolder(cl_uint seed)
     {
         m_mtdata = init_genrand(seed);
         assert(m_mtdata != nullptr);
     }
 
-    MTdataHolder(MTdata mtdata): m_mtdata(mtdata) {}
+    // Forbid copy.
+    MTdataHolder(const MTdataHolder&) = delete;
+    MTdataHolder& operator=(const MTdataHolder&) = delete;
 
-    ~MTdataHolder() { free_mtdata(m_mtdata); }
+    // Support move semantics.
+    MTdataHolder(MTdataHolder&& h) { std::swap(m_mtdata, h.m_mtdata); }
+    MTdataHolder& operator=(MTdataHolder&& h)
+    {
+        std::swap(m_mtdata, h.m_mtdata);
+        return *this;
+    }
 
-    operator MTdata() const { return m_mtdata; }
+    ~MTdataHolder()
+    {
+        if (m_mtdata) free_mtdata(m_mtdata);
+    }
+
+    operator MTdata() const
+    {
+        assert(m_mtdata && "Object wasn't initialised");
+        return m_mtdata;
+    }
 
 private:
-    MTdata m_mtdata;
+    MTdata m_mtdata = nullptr;
 };
 
 #endif // #ifdef __cplusplus
diff --git a/test_common/harness/propertyHelpers.cpp b/test_common/harness/propertyHelpers.cpp
index e368f9b..6a10c07 100644
--- a/test_common/harness/propertyHelpers.cpp
+++ b/test_common/harness/propertyHelpers.cpp
@@ -19,6 +19,7 @@
 #include <assert.h>
 
 #include <algorithm>
+#include <cinttypes>
 #include <vector>
 
 static bool findProperty(const std::vector<cl_properties>& props,
@@ -97,16 +98,16 @@
 
             if (!found)
             {
-                log_error("ERROR: expected property 0x%llx not found!\n",
+                log_error("ERROR: expected property 0x%" PRIx64 " not found!\n",
                           check_prop);
                 return TEST_FAIL;
             }
             else if (check_value != queried_value)
             {
-                log_error(
-                    "ERROR: mis-matched value for property 0x%llx: wanted "
-                    "0x%llx, got 0x%llx\n",
-                    check_prop, check_value, queried_value);
+                log_error("ERROR: mis-matched value for property 0x%" PRIx64
+                          ": wanted "
+                          "0x%" PRIx64 ", got 0x%" PRIx64 "\n",
+                          check_prop, check_value, queried_value);
                 return TEST_FAIL;
             }
         }
diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp
index b386391..a309f53 100644
--- a/test_common/harness/testHarness.cpp
+++ b/test_common/harness/testHarness.cpp
@@ -60,6 +60,54 @@
 
 #define DEFAULT_NUM_ELEMENTS 0x4000
 
+static int saveResultsToJson(const char *suiteName, test_definition testList[],
+                             unsigned char selectedTestList[],
+                             test_status resultTestList[], int testNum)
+{
+    char *fileName = getenv("CL_CONFORMANCE_RESULTS_FILENAME");
+    if (fileName == nullptr)
+    {
+        return EXIT_SUCCESS;
+    }
+
+    FILE *file = fopen(fileName, "w");
+    if (NULL == file)
+    {
+        log_error("ERROR: Failed to open '%s' for writing results.\n",
+                  fileName);
+        return EXIT_FAILURE;
+    }
+
+    const char *save_map[] = { "success", "failure" };
+    const char *result_map[] = { "pass", "fail", "skip" };
+    const char *linebreak[] = { "", ",\n" };
+    int add_linebreak = 0;
+
+    fprintf(file, "{\n");
+    fprintf(file, "\t\"cmd\": \"%s\",\n", suiteName);
+    fprintf(file, "\t\"results\": {\n");
+
+    for (int i = 0; i < testNum; ++i)
+    {
+        if (selectedTestList[i])
+        {
+            fprintf(file, "%s\t\t\"%s\": \"%s\"", linebreak[add_linebreak],
+                    testList[i].name, result_map[(int)resultTestList[i]]);
+            add_linebreak = 1;
+        }
+    }
+    fprintf(file, "\n");
+
+    fprintf(file, "\t}\n");
+    fprintf(file, "}\n");
+
+    int ret = fclose(file) ? EXIT_FAILURE : EXIT_SUCCESS;
+
+    log_info("Saving results to %s: %s!\n", fileName, save_map[ret]);
+
+    return ret;
+}
+
 int runTestHarness(int argc, const char *argv[], int testNum,
                    test_definition testList[], int forceNoContextCreation,
                    cl_command_queue_properties queueProps)
@@ -68,19 +116,28 @@
                                    forceNoContextCreation, queueProps, NULL);
 }
 
-int skip_init_info(int count)
+int suite_did_not_pass_init(const char *suiteName, test_status status,
+                            int testNum, test_definition testList[])
 {
-    log_info("Test skipped while initialization\n");
-    log_info("SKIPPED %d of %d tests.\n", count, count);
-    return EXIT_SUCCESS;
+    std::vector<unsigned char> selectedTestList(testNum, 1);
+    std::vector<test_status> resultTestList(testNum, status);
+
+    int ret = saveResultsToJson(suiteName, testList, selectedTestList.data(),
+                                resultTestList.data(), testNum);
+
+    log_info("Test %s while initialization\n",
+             status == TEST_SKIP ? "skipped" : "failed");
+    log_info("%s %d of %d tests.\n", status == TEST_SKIP ? "SKIPPED" : "FAILED",
+             testNum, testNum);
+
+    if (ret != EXIT_SUCCESS)
+    {
+        return ret;
+    }
+
+    return status == TEST_SKIP ? EXIT_SUCCESS : EXIT_FAILURE;
 }
 
-int fail_init_info(int count)
-{
-    log_info("Test failed while initialization\n");
-    log_info("FAILED %d of %d tests.\n", count, count);
-    return EXIT_FAILURE;
-}
 void version_expected_info(const char *test_name, const char *api_name,
                            const char *expected_version,
                            const char *device_version)
@@ -470,6 +527,7 @@
         log_error("Invalid device address bit size returned by device.\n");
         return EXIT_FAILURE;
     }
+    const char *suiteName = argv[0];
     if (gCompilationMode == kSpir_v)
     {
         test_status spirv_readiness = check_spirv_compilation_readiness(device);
@@ -478,9 +536,15 @@
             switch (spirv_readiness)
             {
                 case TEST_PASS: break;
-                case TEST_FAIL: return fail_init_info(testNum);
-                case TEST_SKIP: return skip_init_info(testNum);
-                case TEST_SKIPPED_ITSELF: return skip_init_info(testNum);
+                case TEST_FAIL:
+                    return suite_did_not_pass_init(suiteName, TEST_FAIL,
+                                                   testNum, testList);
+                case TEST_SKIP:
+                    return suite_did_not_pass_init(suiteName, TEST_SKIP,
+                                                   testNum, testList);
+                case TEST_SKIPPED_ITSELF:
+                    return suite_did_not_pass_init(suiteName, TEST_SKIP,
+                                                   testNum, testList);
             }
         }
     }
@@ -492,9 +556,15 @@
         switch (status)
         {
             case TEST_PASS: break;
-            case TEST_FAIL: return fail_init_info(testNum);
-            case TEST_SKIP: return skip_init_info(testNum);
-            case TEST_SKIPPED_ITSELF: return skip_init_info(testNum);
+            case TEST_FAIL:
+                return suite_did_not_pass_init(suiteName, TEST_FAIL, testNum,
+                                               testList);
+            case TEST_SKIP:
+                return suite_did_not_pass_init(suiteName, TEST_SKIP, testNum,
+                                               testList);
+            case TEST_SKIPPED_ITSELF:
+                return suite_did_not_pass_init(suiteName, TEST_SKIP, testNum,
+                                               testList);
         }
     }
 
@@ -574,49 +644,6 @@
     return EXIT_SUCCESS;
 }
 
-static int saveResultsToJson(const char *fileName, const char *suiteName,
-                             test_definition testList[],
-                             unsigned char selectedTestList[],
-                             test_status resultTestList[], int testNum)
-{
-    FILE *file = fopen(fileName, "w");
-    if (NULL == file)
-    {
-        log_error("ERROR: Failed to open '%s' for writing results.\n",
-                  fileName);
-        return EXIT_FAILURE;
-    }
-
-    const char *save_map[] = { "success", "failure" };
-    const char *result_map[] = { "pass", "fail", "skip" };
-    const char *linebreak[] = { "", ",\n" };
-    int add_linebreak = 0;
-
-    fprintf(file, "{\n");
-    fprintf(file, "\t\"cmd\": \"%s\",\n", suiteName);
-    fprintf(file, "\t\"results\": {\n");
-
-    for (int i = 0; i < testNum; ++i)
-    {
-        if (selectedTestList[i])
-        {
-            fprintf(file, "%s\t\t\"%s\": \"%s\"", linebreak[add_linebreak],
-                    testList[i].name, result_map[(int)resultTestList[i]]);
-            add_linebreak = 1;
-        }
-    }
-    fprintf(file, "\n");
-
-    fprintf(file, "\t}\n");
-    fprintf(file, "}\n");
-
-    int ret = fclose(file) ? 1 : 0;
-
-    log_info("Saving results to %s: %s!\n", fileName, save_map[ret]);
-
-    return ret;
-}
-
 static void print_results(int failed, int count, const char *name)
 {
     if (count < failed)
@@ -658,7 +685,6 @@
     int ret = EXIT_SUCCESS;
 
     unsigned char *selectedTestList = (unsigned char *)calloc(testNum, 1);
-    test_status *resultTestList = NULL;
 
     if (argc == 1)
     {
@@ -697,24 +723,19 @@
 
     if (ret == EXIT_SUCCESS)
     {
-        resultTestList =
-            (test_status *)calloc(testNum, sizeof(*resultTestList));
+        std::vector<test_status> resultTestList(testNum, TEST_PASS);
 
-        callTestFunctions(testList, selectedTestList, resultTestList, testNum,
-                          device, forceNoContextCreation, num_elements,
+        callTestFunctions(testList, selectedTestList, resultTestList.data(),
+                          testNum, device, forceNoContextCreation, num_elements,
                           queueProps);
 
         print_results(gFailCount, gTestCount, "sub-test");
         print_results(gTestsFailed, gTestsFailed + gTestsPassed, "test");
 
-        char *filename = getenv("CL_CONFORMANCE_RESULTS_FILENAME");
-        if (filename != NULL)
-        {
-            ret = saveResultsToJson(filename, argv[0], testList,
-                                    selectedTestList, resultTestList, testNum);
-        }
+        ret = saveResultsToJson(argv[0], testList, selectedTestList,
+                                resultTestList.data(), testNum);
 
-        if (std::any_of(resultTestList, resultTestList + testNum,
+        if (std::any_of(resultTestList.begin(), resultTestList.end(),
                         [](test_status result) {
                             switch (result)
                             {
@@ -730,7 +751,6 @@
     }
 
     free(selectedTestList);
-    free(resultTestList);
 
     return ret;
 }
@@ -1178,7 +1198,7 @@
 
 void PrintArch(void)
 {
-    vlog("sizeof( void*) = %ld\n", sizeof(void *));
+    vlog("sizeof( void*) = %zu\n", sizeof(void *));
 #if defined(__ppc__)
     vlog("ARCH:\tppc\n");
 #elif defined(__ppc64__)
diff --git a/test_conformance/SVM/test_cross_buffer_pointers.cpp b/test_conformance/SVM/test_cross_buffer_pointers.cpp
index c1caebb..2baa7ad 100644
--- a/test_conformance/SVM/test_cross_buffer_pointers.cpp
+++ b/test_conformance/SVM/test_cross_buffer_pointers.cpp
@@ -162,7 +162,8 @@
     test_error(error, "clCreateBuffer failed.");
 
     // this buffer holds the index into the nodes buffer that is used for node allocation
-    clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
+    clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                            sizeof(size_t), NULL, &error);
     test_error(error, "clCreateBuffer failed.");
 
     // this buffer holds the count of correct nodes which is computed by the verify kernel.
diff --git a/test_conformance/SVM/test_shared_sub_buffers.cpp b/test_conformance/SVM/test_shared_sub_buffers.cpp
index a79484c..2532886 100644
--- a/test_conformance/SVM/test_shared_sub_buffers.cpp
+++ b/test_conformance/SVM/test_shared_sub_buffers.cpp
@@ -182,7 +182,8 @@
 
 
     // this buffer holds the index into the nodes buffer that is used for node allocation
-    clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
+    clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                            sizeof(size_t), NULL, &error);
     test_error(error, "clCreateBuffer failed.");
 
     // this buffer holds the count of correct nodes which is computed by the verify kernel.
diff --git a/test_conformance/atomics/main.cpp b/test_conformance/atomics/main.cpp
index afdea37..987d6bf 100644
--- a/test_conformance/atomics/main.cpp
+++ b/test_conformance/atomics/main.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -24,6 +24,7 @@
 #include <unistd.h>
 #endif
 
+// clang-format off
 test_definition test_list[] = {
     ADD_TEST( atomic_add ),
     ADD_TEST( atomic_sub ),
@@ -40,11 +41,11 @@
     ADD_TEST( atomic_add_index ),
     ADD_TEST( atomic_add_index_bin ),
 };
+// clang-format on
 
-const int test_num = ARRAY_SIZE( test_list );
+const int test_num = ARRAY_SIZE(test_list);
 
 int main(int argc, const char *argv[])
 {
     return runTestHarness(argc, argv, test_num, test_list, false, 0);
 }
-
diff --git a/test_conformance/atomics/procs.h b/test_conformance/atomics/procs.h
index bf053f2..fa85aad 100644
--- a/test_conformance/atomics/procs.h
+++ b/test_conformance/atomics/procs.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -18,22 +18,35 @@
 #include "harness/threadTesting.h"
 #include "harness/typeWrappers.h"
 
-extern int      create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
+extern int create_program_and_kernel(const char *source,
+                                     const char *kernel_name,
+                                     cl_program *program_ret,
+                                     cl_kernel *kernel_ret);
 
-extern int        test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_add(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements);
+extern int test_atomic_sub(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements);
+extern int test_atomic_xchg(cl_device_id deviceID, cl_context context,
+                            cl_command_queue queue, int num_elements);
+extern int test_atomic_min(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements);
+extern int test_atomic_max(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements);
+extern int test_atomic_inc(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements);
+extern int test_atomic_dec(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements);
+extern int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context,
+                               cl_command_queue queue, int num_elements);
+extern int test_atomic_and(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements);
+extern int test_atomic_or(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements);
+extern int test_atomic_xor(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements);
 
-extern int        test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-
-
-
+extern int test_atomic_add_index(cl_device_id deviceID, cl_context context,
+                                 cl_command_queue queue, int num_elements);
+extern int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context,
+                                     cl_command_queue queue, int num_elements);
diff --git a/test_conformance/atomics/testBase.h b/test_conformance/atomics/testBase.h
index ba67d14..22bce1d 100644
--- a/test_conformance/atomics/testBase.h
+++ b/test_conformance/atomics/testBase.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -26,6 +26,3 @@
 #include "procs.h"
 
 #endif // _testBase_h
-
-
-
diff --git a/test_conformance/atomics/test_atomics.cpp b/test_conformance/atomics/test_atomics.cpp
index c0c0136..caa4b78 100644
--- a/test_conformance/atomics/test_atomics.cpp
+++ b/test_conformance/atomics/test_atomics.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -19,10 +19,12 @@
 #include <unistd.h>
 #endif
 
+#include <cinttypes>
+
 #define INT_TEST_VALUE 402258822
 #define LONG_TEST_VALUE 515154531254381446LL
 
-
+// clang-format off
 const char *atomic_global_pattern[] = {
     "__kernel void test_atomic_fn(volatile __global %s *destMemory, __global %s *oldValues)\n"
     "{\n"
@@ -36,19 +38,20 @@
     "__kernel void test_atomic_fn(__global %s *finalDest, __global %s *oldValues, volatile __local %s *destMemory, int numDestItems )\n"
     "{\n"
     "    int  tid = get_global_id(0);\n"
-    "     int  dstItemIdx;\n"
+    "    int  dstItemIdx;\n"
     "\n"
     "    // Everybody does the following line(s), but it all has the same result. We still need to ensure we sync before the atomic op, though\n"
-    "     for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
+    "    for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
     "        destMemory[ dstItemIdx ] = finalDest[ dstItemIdx ];\n"
     "    barrier( CLK_LOCAL_MEM_FENCE );\n"
     "\n"
     ,
     "    barrier( CLK_LOCAL_MEM_FENCE );\n"
     "    // Finally, write out the last value. Again, we're synced, so everyone will be writing the same value\n"
-    "     for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
+    "    for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
     "        finalDest[ dstItemIdx ] = destMemory[ dstItemIdx ];\n"
     "}\n" };
+// clang-format on
 
 
 #define TEST_COUNT 128 * 1024
@@ -56,41 +59,48 @@
 
 struct TestFns
 {
-    cl_int    mIntStartValue;
-    cl_long    mLongStartValue;
+    cl_int mIntStartValue;
+    cl_long mLongStartValue;
 
-    size_t    (*NumResultsFn)( size_t threadSize, ExplicitType dataType );
+    size_t (*NumResultsFn)(size_t threadSize, ExplicitType dataType);
 
     // Integer versions
-    cl_int    (*ExpectedValueIntFn)( size_t size, cl_int *startRefValues, size_t whichDestValue );
-    void    (*GenerateRefsIntFn)( size_t size, cl_int *startRefValues, MTdata d );
-    bool    (*VerifyRefsIntFn)( size_t size, cl_int *refValues, cl_int finalValue );
+    cl_int (*ExpectedValueIntFn)(size_t size, cl_int *startRefValues,
+                                 size_t whichDestValue);
+    void (*GenerateRefsIntFn)(size_t size, cl_int *startRefValues, MTdata d);
+    bool (*VerifyRefsIntFn)(size_t size, cl_int *refValues, cl_int finalValue);
 
     // Long versions
-    cl_long    (*ExpectedValueLongFn)( size_t size, cl_long *startRefValues, size_t whichDestValue );
-    void    (*GenerateRefsLongFn)( size_t size, cl_long *startRefValues, MTdata d );
-    bool    (*VerifyRefsLongFn)( size_t size, cl_long *refValues, cl_long finalValue );
+    cl_long (*ExpectedValueLongFn)(size_t size, cl_long *startRefValues,
+                                   size_t whichDestValue);
+    void (*GenerateRefsLongFn)(size_t size, cl_long *startRefValues, MTdata d);
+    bool (*VerifyRefsLongFn)(size_t size, cl_long *refValues,
+                             cl_long finalValue);
 
     // Float versions
-    cl_float    (*ExpectedValueFloatFn)( size_t size, cl_float *startRefValues, size_t whichDestValue );
-    void        (*GenerateRefsFloatFn)( size_t size, cl_float *startRefValues, MTdata d );
-    bool        (*VerifyRefsFloatFn)( size_t size, cl_float *refValues, cl_float finalValue );
+    cl_float (*ExpectedValueFloatFn)(size_t size, cl_float *startRefValues,
+                                     size_t whichDestValue);
+    void (*GenerateRefsFloatFn)(size_t size, cl_float *startRefValues,
+                                MTdata d);
+    bool (*VerifyRefsFloatFn)(size_t size, cl_float *refValues,
+                              cl_float finalValue);
 };
 
-bool check_atomic_support( cl_device_id device, bool extended, bool isLocal, ExplicitType dataType )
+bool check_atomic_support(cl_device_id device, bool extended, bool isLocal,
+                          ExplicitType dataType)
 {
+    // clang-format off
     const char *extensionNames[8] = {
         "cl_khr_global_int32_base_atomics", "cl_khr_global_int32_extended_atomics",
         "cl_khr_local_int32_base_atomics",  "cl_khr_local_int32_extended_atomics",
         "cl_khr_int64_base_atomics",        "cl_khr_int64_extended_atomics",
         "cl_khr_int64_base_atomics",        "cl_khr_int64_extended_atomics"       // this line intended to be the same as the last one
     };
+    // clang-format on
 
     size_t index = 0;
-    if( extended )
-        index += 1;
-    if( isLocal )
-        index += 2;
+    if (extended) index += 1;
+    if (isLocal) index += 2;
 
     Version version = get_device_cl_version(device);
 
@@ -98,26 +108,28 @@
     {
         case kInt:
         case kUInt:
-            if( version >= Version(1,1) )
-                return 1;
+            if (version >= Version(1, 1)) return 1;
             break;
         case kLong:
-        case kULong:
-            index += 4;
-            break;
-        case kFloat:  // this has to stay separate since the float atomics arent in the 1.0 extensions
-            return version >= Version(1,1);
+        case kULong: index += 4; break;
+        case kFloat: // this has to stay separate since the float atomics arent
+                     // in the 1.0 extensions
+            return version >= Version(1, 1);
         default:
-            log_error( "ERROR:  Unsupported data type (%d) in check_atomic_support\n", dataType );
+            log_error(
+                "ERROR:  Unsupported data type (%d) in check_atomic_support\n",
+                dataType);
             return 0;
     }
 
-    return is_extension_available( device, extensionNames[index] );
+    return is_extension_available(device, extensionNames[index]);
 }
 
-int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore,
-                         TestFns testFns,
-                         bool extended, bool isLocal, ExplicitType dataType, bool matchGroupSize )
+int test_atomic_function(cl_device_id deviceID, cl_context context,
+                         cl_command_queue queue, int num_elements,
+                         const char *programCore, TestFns testFns,
+                         bool extended, bool isLocal, ExplicitType dataType,
+                         bool matchGroupSize)
 {
     clProgramWrapper program;
     clKernelWrapper kernel;
@@ -127,55 +139,65 @@
     void *refValues, *startRefValues;
     size_t threadSize, groupSize;
     const char *programLines[4];
-    char pragma[ 512 ];
-    char programHeader[ 512 ];
+    char pragma[512];
+    char programHeader[512];
     MTdata d;
-    size_t typeSize = get_explicit_type_size( dataType );
+    size_t typeSize = get_explicit_type_size(dataType);
 
 
     // Verify we can run first
-    bool isUnsigned = ( dataType == kULong ) || ( dataType == kUInt );
-    if( !check_atomic_support( deviceID, extended, isLocal, dataType ) )
+    bool isUnsigned = (dataType == kULong) || (dataType == kUInt);
+    if (!check_atomic_support(deviceID, extended, isLocal, dataType))
     {
-        // Only print for the signed (unsigned comes right after, and if signed isn't supported, unsigned isn't either)
-        if( dataType == kFloat )
-            log_info( "\t%s float not supported\n", isLocal ? "Local" : "Global" );
-        else if( !isUnsigned )
-            log_info( "\t%s %sint%d not supported\n", isLocal ? "Local" : "Global", isUnsigned ? "u" : "", (int)typeSize * 8 );
+        // Only print for the signed (unsigned comes right after, and if signed
+        // isn't supported, unsigned isn't either)
+        if (dataType == kFloat)
+            log_info("\t%s float not supported\n",
+                     isLocal ? "Local" : "Global");
+        else if (!isUnsigned)
+            log_info("\t%s %sint%d not supported\n",
+                     isLocal ? "Local" : "Global", isUnsigned ? "u" : "",
+                     (int)typeSize * 8);
         // Since we don't support the operation, they implicitly pass
         return 0;
     }
     else
     {
-        if( dataType == kFloat )
-            log_info( "\t%s float%s...", isLocal ? "local" : "global", isLocal ? " " : "" );
+        if (dataType == kFloat)
+            log_info("\t%s float%s...", isLocal ? "local" : "global",
+                     isLocal ? " " : "");
         else
-            log_info( "\t%s %sint%d%s%s...", isLocal ? "local" : "global", isUnsigned ? "u" : "",
-                     (int)typeSize * 8, isUnsigned ? "" : " ", isLocal ? " " : "" );
+            log_info("\t%s %sint%d%s%s...", isLocal ? "local" : "global",
+                     isUnsigned ? "u" : "", (int)typeSize * 8,
+                     isUnsigned ? "" : " ", isLocal ? " " : "");
     }
 
     //// Set up the kernel code
 
     // Create the pragma line for this kernel
-    bool isLong = ( dataType == kLong || dataType == kULong );
-    sprintf( pragma, "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n",
-            isLong ? "" : (isLocal ? "_local" : "_global"), isLong ? "64" : "32",
-            extended ? "extended" : "base" );
+    bool isLong = (dataType == kLong || dataType == kULong);
+    sprintf(pragma,
+            "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n",
+            isLong ? "" : (isLocal ? "_local" : "_global"),
+            isLong ? "64" : "32", extended ? "extended" : "base");
 
     // Now create the program header
-    const char *typeName = get_explicit_type_name( dataType );
-    if( isLocal )
-        sprintf( programHeader, atomic_local_pattern[ 0 ], typeName, typeName, typeName );
+    const char *typeName = get_explicit_type_name(dataType);
+    if (isLocal)
+        sprintf(programHeader, atomic_local_pattern[0], typeName, typeName,
+                typeName);
     else
-        sprintf( programHeader, atomic_global_pattern[ 0 ], typeName, typeName );
+        sprintf(programHeader, atomic_global_pattern[0], typeName, typeName);
 
     // Set up our entire program now
-    programLines[ 0 ] = pragma;
-    programLines[ 1 ] = programHeader;
-    programLines[ 2 ] = programCore;
-    programLines[ 3 ] = ( isLocal ) ? atomic_local_pattern[ 1 ] : atomic_global_pattern[ 1 ];
+    programLines[0] = pragma;
+    programLines[1] = programHeader;
+    programLines[2] = programCore;
+    programLines[3] =
+        (isLocal) ? atomic_local_pattern[1] : atomic_global_pattern[1];
 
-    if( create_single_kernel_helper( context, &program, &kernel, 4, programLines, "test_atomic_fn" ) )
+    if (create_single_kernel_helper(context, &program, &kernel, 4, programLines,
+                                    "test_atomic_fn"))
     {
         return -1;
     }
@@ -183,29 +205,37 @@
     //// Set up to actually run
     threadSize = num_elements;
 
-    error = get_max_common_work_group_size( context, kernel, threadSize, &groupSize );
-    test_error( error, "Unable to get thread group max size" );
+    error =
+        get_max_common_work_group_size(context, kernel, threadSize, &groupSize);
+    test_error(error, "Unable to get thread group max size");
 
-    if( matchGroupSize )
+    if (matchGroupSize)
         // HACK because xchg and cmpxchg apparently are limited by hardware
         threadSize = groupSize;
 
-    if( isLocal )
+    if (isLocal)
     {
-        size_t maxSizes[3] = {0, 0, 0};
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3*sizeof(size_t), maxSizes, 0);
-        test_error( error, "Unable to obtain max work item sizes for the device" );
+        size_t maxSizes[3] = { 0, 0, 0 };
+        error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+                                3 * sizeof(size_t), maxSizes, 0);
+        test_error(error,
+                   "Unable to obtain max work item sizes for the device");
 
         size_t workSize;
-        error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workSize ), &workSize, NULL );
-        test_error( error, "Unable to obtain max work group size for device and kernel combo" );
+        error = clGetKernelWorkGroupInfo(kernel, deviceID,
+                                         CL_KERNEL_WORK_GROUP_SIZE,
+                                         sizeof(workSize), &workSize, NULL);
+        test_error(
+            error,
+            "Unable to obtain max work group size for device and kernel combo");
 
         // Limit workSize to avoid extremely large local buffer size and slow
         // run.
         if (workSize > 65536) workSize = 65536;
 
-        // "workSize" is limited to that of the first dimension as only a 1DRange is executed.
-        if( maxSizes[0] < workSize )
+        // "workSize" is limited to that of the first dimension as only a
+        // 1DRange is executed.
+        if (maxSizes[0] < workSize)
         {
             workSize = maxSizes[0];
         }
@@ -214,38 +244,43 @@
     }
 
 
-    log_info( "\t(thread count %d, group size %d)\n", (int)threadSize, (int)groupSize );
+    log_info("\t(thread count %d, group size %d)\n", (int)threadSize,
+             (int)groupSize);
 
-    refValues = (cl_int *)malloc( typeSize * threadSize );
+    refValues = (cl_int *)malloc(typeSize * threadSize);
 
-    if( testFns.GenerateRefsIntFn != NULL )
+    if (testFns.GenerateRefsIntFn != NULL)
     {
         // We have a ref generator provided
-        d = init_genrand( gRandomSeed );
-        startRefValues = malloc( typeSize * threadSize );
-        if( typeSize == 4 )
-            testFns.GenerateRefsIntFn( threadSize, (cl_int *)startRefValues, d );
+        d = init_genrand(gRandomSeed);
+        startRefValues = malloc(typeSize * threadSize);
+        if (typeSize == 4)
+            testFns.GenerateRefsIntFn(threadSize, (cl_int *)startRefValues, d);
         else
-            testFns.GenerateRefsLongFn( threadSize, (cl_long *)startRefValues, d );
+            testFns.GenerateRefsLongFn(threadSize, (cl_long *)startRefValues,
+                                       d);
         free_mtdata(d);
         d = NULL;
     }
     else
         startRefValues = NULL;
 
-    // If we're given a num_results function, we need to determine how many result objects we need. If
-    // we don't have it, we assume it's just 1
-    size_t numDestItems = ( testFns.NumResultsFn != NULL ) ? testFns.NumResultsFn( threadSize, dataType ) : 1;
+    // If we're given a num_results function, we need to determine how many
+    // result objects we need. If we don't have it, we assume it's just 1
+    size_t numDestItems = (testFns.NumResultsFn != NULL)
+        ? testFns.NumResultsFn(threadSize, dataType)
+        : 1;
 
-    char * destItems = new char[ typeSize * numDestItems ];
-    if( destItems == NULL )
+    char *destItems = new char[typeSize * numDestItems];
+    if (destItems == NULL)
     {
-        log_error( "ERROR: Unable to allocate memory!\n" );
+        log_error("ERROR: Unable to allocate memory!\n");
         return -1;
     }
-    void * startValue = ( typeSize == 4 ) ? (void *)&testFns.mIntStartValue : (void *)&testFns.mLongStartValue;
-    for( size_t i = 0; i < numDestItems; i++ )
-        memcpy( destItems + i * typeSize, startValue, typeSize );
+    void *startValue = (typeSize == 4) ? (void *)&testFns.mIntStartValue
+                                       : (void *)&testFns.mLongStartValue;
+    for (size_t i = 0; i < numDestItems; i++)
+        memcpy(destItems + i * typeSize, startValue, typeSize);
 
     streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
                                 typeSize * numDestItems, destItems, NULL);
@@ -265,82 +300,97 @@
     }
 
     /* Set the arguments */
-    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
-    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+    test_error(error, "Unable to set indexed kernel arguments");
+    error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
+    test_error(error, "Unable to set indexed kernel arguments");
 
-    if( isLocal )
+    if (isLocal)
     {
-        error = clSetKernelArg( kernel, 2, typeSize * numDestItems, NULL );
-        test_error( error, "Unable to set indexed local kernel argument" );
+        error = clSetKernelArg(kernel, 2, typeSize * numDestItems, NULL);
+        test_error(error, "Unable to set indexed local kernel argument");
 
         cl_int numDestItemsInt = (cl_int)numDestItems;
-        error = clSetKernelArg( kernel, 3, sizeof( cl_int ), &numDestItemsInt );
-        test_error( error, "Unable to set indexed kernel argument" );
+        error = clSetKernelArg(kernel, 3, sizeof(cl_int), &numDestItemsInt);
+        test_error(error, "Unable to set indexed kernel argument");
     }
 
     /* Run the kernel */
     threads[0] = threadSize;
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &groupSize, 0, NULL, NULL );
-    test_error( error, "Unable to execute test kernel" );
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, &groupSize,
+                                   0, NULL, NULL);
+    test_error(error, "Unable to execute test kernel");
 
-    error = clEnqueueReadBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
-    test_error( error, "Unable to read result value!" );
+    error =
+        clEnqueueReadBuffer(queue, streams[0], true, 0, typeSize * numDestItems,
+                            destItems, 0, NULL, NULL);
+    test_error(error, "Unable to read result value!");
 
-    error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize * threadSize, refValues, 0, NULL, NULL );
-    test_error( error, "Unable to read reference values!" );
+    error =
+        clEnqueueReadBuffer(queue, streams[1], true, 0, typeSize * threadSize,
+                            refValues, 0, NULL, NULL);
+    test_error(error, "Unable to read reference values!");
 
-    // If we have an expectedFn, then we need to generate a final value to compare against. If we don't
-    // have one, it's because we're comparing ref values only
-    if( testFns.ExpectedValueIntFn != NULL )
+    // If we have an expectedFn, then we need to generate a final value to
+    // compare against. If we don't have one, it's because we're comparing ref
+    // values only
+    if (testFns.ExpectedValueIntFn != NULL)
     {
-        for( size_t i = 0; i < numDestItems; i++ )
+        for (size_t i = 0; i < numDestItems; i++)
         {
-            char expected[ 8 ];
+            char expected[8];
             cl_int intVal;
             cl_long longVal;
-            if( typeSize == 4 )
+            if (typeSize == 4)
             {
                 // Int version
-                intVal = testFns.ExpectedValueIntFn( threadSize, (cl_int *)startRefValues, i );
-                memcpy( expected, &intVal, sizeof( intVal ) );
+                intVal = testFns.ExpectedValueIntFn(
+                    threadSize, (cl_int *)startRefValues, i);
+                memcpy(expected, &intVal, sizeof(intVal));
             }
             else
             {
                 // Long version
-                longVal = testFns.ExpectedValueLongFn( threadSize, (cl_long *)startRefValues, i );
-                memcpy( expected, &longVal, sizeof( longVal ) );
+                longVal = testFns.ExpectedValueLongFn(
+                    threadSize, (cl_long *)startRefValues, i);
+                memcpy(expected, &longVal, sizeof(longVal));
             }
 
-            if( memcmp( expected, destItems + i * typeSize, typeSize ) != 0 )
+            if (memcmp(expected, destItems + i * typeSize, typeSize) != 0)
             {
-                if( typeSize == 4 )
+                if (typeSize == 4)
                 {
-                    cl_int *outValue = (cl_int *)( destItems + i * typeSize );
-                    log_error( "ERROR: Result %ld from kernel does not validate! (should be %d, was %d)\n", i, intVal, *outValue );
+                    cl_int *outValue = (cl_int *)(destItems + i * typeSize);
+                    log_error("ERROR: Result %zu from kernel does not "
+                              "validate! (should be %d, was %d)\n",
+                              i, intVal, *outValue);
                     cl_int *startRefs = (cl_int *)startRefValues;
                     cl_int *refs = (cl_int *)refValues;
-                    for( i = 0; i < threadSize; i++ )
+                    for (i = 0; i < threadSize; i++)
                     {
-                        if( startRefs != NULL )
-                            log_info( " --- %ld - %d --- %d\n", i, startRefs[i], refs[i] );
+                        if (startRefs != NULL)
+                            log_info(" --- %zu - %d --- %d\n", i, startRefs[i],
+                                     refs[i]);
                         else
-                            log_info( " --- %ld --- %d\n", i, refs[i] );
+                            log_info(" --- %zu --- %d\n", i, refs[i]);
                     }
                 }
                 else
                 {
-                    cl_long *outValue = (cl_long *)( destItems + i * typeSize );
-                    log_error( "ERROR: Result %ld from kernel does not validate! (should be %lld, was %lld)\n", i, longVal, *outValue );
+                    cl_long *outValue = (cl_long *)(destItems + i * typeSize);
+                    log_error("ERROR: Result %zu from kernel does not "
+                              "validate! (should be %" PRId64 ", was %" PRId64
+                              ")\n",
+                              i, longVal, *outValue);
                     cl_long *startRefs = (cl_long *)startRefValues;
                     cl_long *refs = (cl_long *)refValues;
-                    for( i = 0; i < threadSize; i++ )
+                    for (i = 0; i < threadSize; i++)
                     {
-                        if( startRefs != NULL )
-                            log_info( " --- %ld - %lld --- %lld\n", i, startRefs[i], refs[i] );
+                        if (startRefs != NULL)
+                            log_info(" --- %zu - %" PRId64 " --- %" PRId64 "\n",
+                                     i, startRefs[i], refs[i]);
                         else
-                            log_info( " --- %ld --- %lld\n", i, refs[i] );
+                            log_info(" --- %zu --- %" PRId64 "\n", i, refs[i]);
                     }
                 }
                 return -1;
@@ -348,104 +398,141 @@
         }
     }
 
-    if( testFns.VerifyRefsIntFn != NULL )
+    if (testFns.VerifyRefsIntFn != NULL)
     {
         /* Use the verify function to also check the results */
-        if( dataType == kFloat )
+        if (dataType == kFloat)
         {
             cl_float *outValue = (cl_float *)destItems;
-            if( !testFns.VerifyRefsFloatFn( threadSize, (cl_float *)refValues, *outValue ) != 0 )
+            if (!testFns.VerifyRefsFloatFn(threadSize, (cl_float *)refValues,
+                                           *outValue)
+                != 0)
             {
-                log_error( "ERROR: Reference values did not validate!\n" );
+                log_error("ERROR: Reference values did not validate!\n");
                 return -1;
             }
         }
-        else if( typeSize == 4 )
+        else if (typeSize == 4)
         {
             cl_int *outValue = (cl_int *)destItems;
-            if( !testFns.VerifyRefsIntFn( threadSize, (cl_int *)refValues, *outValue ) != 0 )
+            if (!testFns.VerifyRefsIntFn(threadSize, (cl_int *)refValues,
+                                         *outValue)
+                != 0)
             {
-                log_error( "ERROR: Reference values did not validate!\n" );
+                log_error("ERROR: Reference values did not validate!\n");
                 return -1;
             }
         }
         else
         {
             cl_long *outValue = (cl_long *)destItems;
-            if( !testFns.VerifyRefsLongFn( threadSize, (cl_long *)refValues, *outValue ) != 0 )
+            if (!testFns.VerifyRefsLongFn(threadSize, (cl_long *)refValues,
+                                          *outValue)
+                != 0)
             {
-                log_error( "ERROR: Reference values did not validate!\n" );
+                log_error("ERROR: Reference values did not validate!\n");
                 return -1;
             }
         }
     }
-    else if( testFns.ExpectedValueIntFn == NULL )
+    else if (testFns.ExpectedValueIntFn == NULL)
     {
-        log_error( "ERROR: Test doesn't check total or refs; no values are verified!\n" );
+        log_error("ERROR: Test doesn't check total or refs; no values are "
+                  "verified!\n");
         return -1;
     }
 
 
     /* Re-write the starting value */
-    for( size_t i = 0; i < numDestItems; i++ )
-        memcpy( destItems + i * typeSize, startValue, typeSize );
-    error = clEnqueueWriteBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
-    test_error( error, "Unable to write starting values!" );
+    for (size_t i = 0; i < numDestItems; i++)
+        memcpy(destItems + i * typeSize, startValue, typeSize);
+    error =
+        clEnqueueWriteBuffer(queue, streams[0], true, 0,
+                             typeSize * numDestItems, destItems, 0, NULL, NULL);
+    test_error(error, "Unable to write starting values!");
 
-    /* Run the kernel once for a single thread, so we can verify that the returned value is the original one */
+    /* Run the kernel once for a single thread, so we can verify that the
+     * returned value is the original one */
     threads[0] = 1;
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, threads, 0, NULL, NULL );
-    test_error( error, "Unable to execute test kernel" );
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, threads, 0,
+                                   NULL, NULL);
+    test_error(error, "Unable to execute test kernel");
 
-    error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize, refValues, 0, NULL, NULL );
-    test_error( error, "Unable to read reference values!" );
+    error = clEnqueueReadBuffer(queue, streams[1], true, 0, typeSize, refValues,
+                                0, NULL, NULL);
+    test_error(error, "Unable to read reference values!");
 
-    if( memcmp( refValues, destItems, typeSize ) != 0 )
+    if (memcmp(refValues, destItems, typeSize) != 0)
     {
-        if( typeSize == 4 )
+        if (typeSize == 4)
         {
             cl_int *s = (cl_int *)destItems;
             cl_int *r = (cl_int *)refValues;
-            log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
-                      " (should have been %d, returned %d)!\n", *s, *r );
+            log_error("ERROR: atomic function operated correctly but did NOT "
+                      "return correct 'old' value "
+                      " (should have been %d, returned %d)!\n",
+                      *s, *r);
         }
         else
         {
             cl_long *s = (cl_long *)destItems;
             cl_long *r = (cl_long *)refValues;
-            log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
-                      " (should have been %lld, returned %lld)!\n", *s, *r );
+            log_error("ERROR: atomic function operated correctly but did NOT "
+                      "return correct 'old' value "
+                      " (should have been %" PRId64 ", returned %" PRId64
+                      ")!\n",
+                      *s, *r);
         }
         return -1;
     }
 
-    delete [] destItems;
-    free( refValues );
-    if( startRefValues != NULL )
-        free( startRefValues );
+    delete[] destItems;
+    free(refValues);
+    if (startRefValues != NULL) free(startRefValues);
 
     return 0;
 }
 
-int test_atomic_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore,
-                             TestFns testFns,
-                             bool extended, bool matchGroupSize, bool usingAtomicPrefix )
+int test_atomic_function_set(cl_device_id deviceID, cl_context context,
+                             cl_command_queue queue, int num_elements,
+                             const char *programCore, TestFns testFns,
+                             bool extended, bool matchGroupSize,
+                             bool usingAtomicPrefix)
 {
-    log_info("    Testing %s functions...\n", usingAtomicPrefix ? "atomic_" : "atom_");
+    log_info("    Testing %s functions...\n",
+             usingAtomicPrefix ? "atomic_" : "atom_");
 
     int errors = 0;
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kInt, matchGroupSize );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kUInt, matchGroupSize );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kInt, matchGroupSize );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kUInt, matchGroupSize );
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   programCore, testFns, extended, false, kInt,
+                                   matchGroupSize);
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   programCore, testFns, extended, false, kUInt,
+                                   matchGroupSize);
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   programCore, testFns, extended, true, kInt,
+                                   matchGroupSize);
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   programCore, testFns, extended, true, kUInt,
+                                   matchGroupSize);
 
-    // Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64 bit functions still use the "atom" prefix.
-    // The argument usingAtomicPrefix is set to true if programCore was generated with the "atomic" prefix.
-    if (!usingAtomicPrefix) {
-      errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kLong, matchGroupSize );
-      errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kULong, matchGroupSize );
-      errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kLong, matchGroupSize );
-      errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kULong, matchGroupSize );
+    // Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64
+    // bit functions still use the "atom" prefix. The argument usingAtomicPrefix
+    // is set to true if programCore was generated with the "atomic" prefix.
+    if (!usingAtomicPrefix)
+    {
+        errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                       programCore, testFns, extended, false,
+                                       kLong, matchGroupSize);
+        errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                       programCore, testFns, extended, false,
+                                       kULong, matchGroupSize);
+        errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                       programCore, testFns, extended, true,
+                                       kLong, matchGroupSize);
+        errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                       programCore, testFns, extended, true,
+                                       kULong, matchGroupSize);
     }
 
     return errors;
@@ -454,265 +541,346 @@
 #pragma mark ---- add
 
 const char atom_add_core[] =
-"    oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n"
-"    atom_add( &destMemory[0], tid + 3 );\n"
-"   atom_add( &destMemory[0], tid + 3 );\n"
-"   atom_add( &destMemory[0], tid + 3 );\n";
+    "    oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n"
+    "    atom_add( &destMemory[0], tid + 3 );\n"
+    "    atom_add( &destMemory[0], tid + 3 );\n"
+    "    atom_add( &destMemory[0], tid + 3 );\n";
 
 const char atomic_add_core[] =
-"    oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n"
-"    atomic_add( &destMemory[0], tid + 3 );\n"
-"   atomic_add( &destMemory[0], tid + 3 );\n"
-"   atomic_add( &destMemory[0], tid + 3 );\n";
+    "    oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n"
+    "    atomic_add( &destMemory[0], tid + 3 );\n"
+    "    atomic_add( &destMemory[0], tid + 3 );\n"
+    "    atomic_add( &destMemory[0], tid + 3 );\n";
 
-cl_int test_atomic_add_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_add_result_int(size_t size, cl_int *startRefValues,
+                                  size_t whichDestValue)
 {
     cl_int total = 0;
-    for( size_t i = 0; i < size; i++ )
-        total += ( (cl_int)i + 3 ) * 4;
+    for (size_t i = 0; i < size; i++) total += ((cl_int)i + 3) * 4;
     return total;
 }
 
-cl_long test_atomic_add_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_add_result_long(size_t size, cl_long *startRefValues,
+                                    size_t whichDestValue)
 {
     cl_long total = 0;
-    for( size_t i = 0; i < size; i++ )
-        total += ( ( i + 3 ) * 4 );
+    for (size_t i = 0; i < size; i++) total += ((i + 3) * 4);
     return total;
 }
 
-int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_add(cl_device_id deviceID, cl_context context,
+                    cl_command_queue queue, int num_elements)
 {
-    TestFns set = { 0, 0LL, NULL, test_atomic_add_result_int, NULL, NULL, test_atomic_add_result_long, NULL, NULL };
+    TestFns set = { 0,
+                    0LL,
+                    NULL,
+                    test_atomic_add_result_int,
+                    NULL,
+                    NULL,
+                    test_atomic_add_result_long,
+                    NULL,
+                    NULL };
 
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atom_add_core, set, false,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+        != 0)
         return -1;
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
-      return -1;
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atomic_add_core, set, false,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+        != 0)
+        return -1;
     return 0;
 }
 
 #pragma mark ---- sub
 
-const char atom_sub_core[] = "    oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n";
+const char atom_sub_core[] =
+    "    oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n";
 
-const char atomic_sub_core[] = "    oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n";
+const char atomic_sub_core[] =
+    "    oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n";
 
-cl_int test_atomic_sub_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_sub_result_int(size_t size, cl_int *startRefValues,
+                                  size_t whichDestValue)
 {
     cl_int total = INT_TEST_VALUE;
-    for( size_t i = 0; i < size; i++ )
-        total -= (cl_int)i + 3;
+    for (size_t i = 0; i < size; i++) total -= (cl_int)i + 3;
     return total;
 }
 
-cl_long test_atomic_sub_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_sub_result_long(size_t size, cl_long *startRefValues,
+                                    size_t whichDestValue)
 {
     cl_long total = LONG_TEST_VALUE;
-    for( size_t i = 0; i < size; i++ )
-        total -= i + 3;
+    for (size_t i = 0; i < size; i++) total -= i + 3;
     return total;
 }
 
-int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_sub(cl_device_id deviceID, cl_context context,
+                    cl_command_queue queue, int num_elements)
 {
-    TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_sub_result_int, NULL, NULL, test_atomic_sub_result_long, NULL, NULL };
+    TestFns set = { INT_TEST_VALUE,
+                    LONG_TEST_VALUE,
+                    NULL,
+                    test_atomic_sub_result_int,
+                    NULL,
+                    NULL,
+                    test_atomic_sub_result_long,
+                    NULL,
+                    NULL };
 
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atom_sub_core, set, false,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+        != 0)
         return -1;
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atomic_sub_core, set, false,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+        != 0)
         return -1;
     return 0;
 }
 
 #pragma mark ---- xchg
 
-const char atom_xchg_core[] = "    oldValues[tid] = atom_xchg( &destMemory[0], tid );\n";
+const char atom_xchg_core[] =
+    "    oldValues[tid] = atom_xchg( &destMemory[0], tid );\n";
 
-const char atomic_xchg_core[] = "    oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
-const char atomic_xchg_float_core[] = "    oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
+const char atomic_xchg_core[] =
+    "    oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
+const char atomic_xchg_float_core[] =
+    "    oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
 
-bool test_atomic_xchg_verify_int( size_t size, cl_int *refValues, cl_int finalValue )
+bool test_atomic_xchg_verify_int(size_t size, cl_int *refValues,
+                                 cl_int finalValue)
 {
-    /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
+    /* For xchg, each value from 0 to size - 1 should have an entry in the ref
+     * array, and ONLY one entry */
     char *valids;
     size_t i;
     char originalValidCount = 0;
 
-    valids = (char *)malloc( sizeof( char ) * size );
-    memset( valids, 0, sizeof( char ) * size );
+    valids = (char *)malloc(sizeof(char) * size);
+    memset(valids, 0, sizeof(char) * size);
 
-    for( i = 0; i < size; i++ )
+    for (i = 0; i < size; i++)
     {
-        if( refValues[ i ] == INT_TEST_VALUE )
+        if (refValues[i] == INT_TEST_VALUE)
         {
             // Special initial value
             originalValidCount++;
             continue;
         }
-        if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
+        if (refValues[i] < 0 || (size_t)refValues[i] >= size)
         {
-            log_error( "ERROR: Reference value %ld outside of valid range! (%d)\n", i, refValues[ i ] );
+            log_error(
+                "ERROR: Reference value %zu outside of valid range! (%d)\n", i,
+                refValues[i]);
             return false;
         }
-        valids[ refValues[ i ] ] ++;
+        valids[refValues[i]]++;
     }
 
-    /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
-     the final value outputted */
-    if( valids[ finalValue ] > 0 )
+    /* Note: ONE entry will have zero count. It'll be the last one that
+     executed, because that value should be the final value outputted */
+    if (valids[finalValue] > 0)
     {
-        log_error( "ERROR: Final value %d was also in ref list!\n", finalValue );
+        log_error("ERROR: Final value %d was also in ref list!\n", finalValue);
         return false;
     }
     else
-        valids[ finalValue ] = 1;    // So the following loop will be okay
+        valids[finalValue] = 1; // So the following loop will be okay
 
     /* Now check that every entry has one and only one count */
-    if( originalValidCount != 1 )
+    if (originalValidCount != 1)
     {
-        log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
+        log_error("ERROR: Starting reference value %d did not occur "
+                  "once-and-only-once (occurred %d)\n",
+                  65191, originalValidCount);
         return false;
     }
-    for( i = 0; i < size; i++ )
+    for (i = 0; i < size; i++)
     {
-        if( valids[ i ] != 1 )
+        if (valids[i] != 1)
         {
-            log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
-            for( size_t j = 0; j < size; j++ )
-                log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
+            log_error("ERROR: Reference value %zu did not occur "
+                      "once-and-only-once (occurred %d)\n",
+                      i, valids[i]);
+            for (size_t j = 0; j < size; j++)
+                log_info("%d: %d\n", (int)j, (int)valids[j]);
             return false;
         }
     }
 
-    free( valids );
+    free(valids);
     return true;
 }
 
-bool test_atomic_xchg_verify_long( size_t size, cl_long *refValues, cl_long finalValue )
+bool test_atomic_xchg_verify_long(size_t size, cl_long *refValues,
+                                  cl_long finalValue)
 {
-    /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
+    /* For xchg, each value from 0 to size - 1 should have an entry in the ref
+     * array, and ONLY one entry */
     char *valids;
     size_t i;
     char originalValidCount = 0;
 
-    valids = (char *)malloc( sizeof( char ) * size );
-    memset( valids, 0, sizeof( char ) * size );
+    valids = (char *)malloc(sizeof(char) * size);
+    memset(valids, 0, sizeof(char) * size);
 
-    for( i = 0; i < size; i++ )
+    for (i = 0; i < size; i++)
     {
-        if( refValues[ i ] == LONG_TEST_VALUE )
+        if (refValues[i] == LONG_TEST_VALUE)
         {
             // Special initial value
             originalValidCount++;
             continue;
         }
-        if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
+        if (refValues[i] < 0 || (size_t)refValues[i] >= size)
         {
-            log_error( "ERROR: Reference value %ld outside of valid range! (%lld)\n", i, refValues[ i ] );
+            log_error(
+                "ERROR: Reference value %zu outside of valid range! (%" PRId64
+                ")\n",
+                i, refValues[i]);
             return false;
         }
-        valids[ refValues[ i ] ] ++;
+        valids[refValues[i]]++;
     }
 
-    /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
-     the final value outputted */
-    if( valids[ finalValue ] > 0 )
+    /* Note: ONE entry will have zero count. It'll be the last one that
+     executed, because that value should be the final value outputted */
+    if (valids[finalValue] > 0)
     {
-        log_error( "ERROR: Final value %lld was also in ref list!\n", finalValue );
+        log_error("ERROR: Final value %" PRId64 " was also in ref list!\n",
+                  finalValue);
         return false;
     }
     else
-        valids[ finalValue ] = 1;    // So the following loop will be okay
+        valids[finalValue] = 1; // So the following loop will be okay
 
     /* Now check that every entry has one and only one count */
-    if( originalValidCount != 1 )
+    if (originalValidCount != 1)
     {
-        log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
+        log_error("ERROR: Starting reference value %d did not occur "
+                  "once-and-only-once (occurred %d)\n",
+                  65191, originalValidCount);
         return false;
     }
-    for( i = 0; i < size; i++ )
+    for (i = 0; i < size; i++)
     {
-        if( valids[ i ] != 1 )
+        if (valids[i] != 1)
         {
-            log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
-            for( size_t j = 0; j < size; j++ )
-                log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
+            log_error("ERROR: Reference value %zu did not occur "
+                      "once-and-only-once (occurred %d)\n",
+                      i, valids[i]);
+            for (size_t j = 0; j < size; j++)
+                log_info("%d: %d\n", (int)j, (int)valids[j]);
             return false;
         }
     }
 
-    free( valids );
+    free(valids);
     return true;
 }
 
-bool test_atomic_xchg_verify_float( size_t size, cl_float *refValues, cl_float finalValue )
+bool test_atomic_xchg_verify_float(size_t size, cl_float *refValues,
+                                   cl_float finalValue)
 {
-    /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
+    /* For xchg, each value from 0 to size - 1 should have an entry in the ref
+     * array, and ONLY one entry */
     char *valids;
     size_t i;
     char originalValidCount = 0;
 
-    valids = (char *)malloc( sizeof( char ) * size );
-    memset( valids, 0, sizeof( char ) * size );
+    valids = (char *)malloc(sizeof(char) * size);
+    memset(valids, 0, sizeof(char) * size);
 
-    for( i = 0; i < size; i++ )
+    for (i = 0; i < size; i++)
     {
-        cl_int *intRefValue = (cl_int *)( &refValues[ i ] );
-        if( *intRefValue == INT_TEST_VALUE )
+        cl_int *intRefValue = (cl_int *)(&refValues[i]);
+        if (*intRefValue == INT_TEST_VALUE)
         {
             // Special initial value
             originalValidCount++;
             continue;
         }
-        if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
+        if (refValues[i] < 0 || (size_t)refValues[i] >= size)
         {
-            log_error( "ERROR: Reference value %ld outside of valid range! (%a)\n", i, refValues[ i ] );
+            log_error(
+                "ERROR: Reference value %zu outside of valid range! (%a)\n", i,
+                refValues[i]);
             return false;
         }
-        valids[ (int)refValues[ i ] ] ++;
+        valids[(int)refValues[i]]++;
     }
 
-    /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
-     the final value outputted */
-    if( valids[ (int)finalValue ] > 0 )
+    /* Note: ONE entry will have zero count. It'll be the last one that
+     executed, because that value should be the final value outputted */
+    if (valids[(int)finalValue] > 0)
     {
-        log_error( "ERROR: Final value %a was also in ref list!\n", finalValue );
+        log_error("ERROR: Final value %a was also in ref list!\n", finalValue);
         return false;
     }
     else
-        valids[ (int)finalValue ] = 1;    // So the following loop will be okay
+        valids[(int)finalValue] = 1; // So the following loop will be okay
 
     /* Now check that every entry has one and only one count */
-    if( originalValidCount != 1 )
+    if (originalValidCount != 1)
     {
-        log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
+        log_error("ERROR: Starting reference value %d did not occur "
+                  "once-and-only-once (occurred %d)\n",
+                  65191, originalValidCount);
         return false;
     }
-    for( i = 0; i < size; i++ )
+    for (i = 0; i < size; i++)
     {
-        if( valids[ i ] != 1 )
+        if (valids[i] != 1)
         {
-            log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
-            for( size_t j = 0; j < size; j++ )
-                log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
+            log_error("ERROR: Reference value %zu did not occur "
+                      "once-and-only-once (occurred %d)\n",
+                      i, valids[i]);
+            for (size_t j = 0; j < size; j++)
+                log_info("%d: %d\n", (int)j, (int)valids[j]);
             return false;
         }
     }
 
-    free( valids );
+    free(valids);
     return true;
 }
 
-int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_xchg(cl_device_id deviceID, cl_context context,
+                     cl_command_queue queue, int num_elements)
 {
-    TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, NULL, NULL, test_atomic_xchg_verify_int, NULL, NULL, test_atomic_xchg_verify_long, NULL, NULL, test_atomic_xchg_verify_float };
+    TestFns set = { INT_TEST_VALUE,
+                    LONG_TEST_VALUE,
+                    NULL,
+                    NULL,
+                    NULL,
+                    test_atomic_xchg_verify_int,
+                    NULL,
+                    NULL,
+                    test_atomic_xchg_verify_long,
+                    NULL,
+                    NULL,
+                    test_atomic_xchg_verify_float };
 
-    int errors = test_atomic_function_set( deviceID, context, queue, num_elements, atom_xchg_core, set, false, true, /*usingAtomicPrefix*/ false  );
-    errors |= test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xchg_core, set, false, true, /*usingAtomicPrefix*/ true  );
+    int errors = test_atomic_function_set(
+        deviceID, context, queue, num_elements, atom_xchg_core, set, false,
+        true, /*usingAtomicPrefix*/ false);
+    errors |= test_atomic_function_set(deviceID, context, queue, num_elements,
+                                       atomic_xchg_core, set, false, true,
+                                       /*usingAtomicPrefix*/ true);
 
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, false, kFloat, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, true, kFloat, true );
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   atomic_xchg_float_core, set, false, false,
+                                   kFloat, true);
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   atomic_xchg_float_core, set, false, true,
+                                   kFloat, true);
 
     return errors;
 }
@@ -720,51 +888,71 @@
 
 #pragma mark ---- min
 
-const char atom_min_core[] = "    oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n";
+const char atom_min_core[] =
+    "    oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n";
 
-const char atomic_min_core[] = "    oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n";
+const char atomic_min_core[] =
+    "    oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n";
 
-cl_int test_atomic_min_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_min_result_int(size_t size, cl_int *startRefValues,
+                                  size_t whichDestValue)
 {
     cl_int total = 0x7fffffffL;
-    for( size_t i = 0; i < size; i++ )
+    for (size_t i = 0; i < size; i++)
     {
-        if( startRefValues[ i ] < total )
-            total = startRefValues[ i ];
+        if (startRefValues[i] < total) total = startRefValues[i];
     }
     return total;
 }
 
-void test_atomic_min_gen_int( size_t size, cl_int *startRefValues, MTdata d )
+void test_atomic_min_gen_int(size_t size, cl_int *startRefValues, MTdata d)
 {
-    for( size_t i = 0; i < size; i++ )
-        startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
+    for (size_t i = 0; i < size; i++)
+        startRefValues[i] =
+            (cl_int)(genrand_int32(d) % 0x3fffffff) + 0x3fffffff;
 }
 
-cl_long test_atomic_min_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_min_result_long(size_t size, cl_long *startRefValues,
+                                    size_t whichDestValue)
 {
     cl_long total = 0x7fffffffffffffffLL;
-    for( size_t i = 0; i < size; i++ )
+    for (size_t i = 0; i < size; i++)
     {
-        if( startRefValues[ i ] < total )
-            total = startRefValues[ i ];
+        if (startRefValues[i] < total) total = startRefValues[i];
     }
     return total;
 }
 
-void test_atomic_min_gen_long( size_t size, cl_long *startRefValues, MTdata d )
+void test_atomic_min_gen_long(size_t size, cl_long *startRefValues, MTdata d)
 {
-    for( size_t i = 0; i < size; i++ )
-        startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
+    for (size_t i = 0; i < size; i++)
+        startRefValues[i] =
+            (cl_long)(genrand_int32(d)
+                      | (((cl_long)genrand_int32(d) & 0x7fffffffL) << 16));
 }
 
-int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_min(cl_device_id deviceID, cl_context context,
+                    cl_command_queue queue, int num_elements)
 {
-    TestFns set = { 0x7fffffffL, 0x7fffffffffffffffLL, NULL, test_atomic_min_result_int, test_atomic_min_gen_int, NULL, test_atomic_min_result_long, test_atomic_min_gen_long, NULL };
+    TestFns set = { 0x7fffffffL,
+                    0x7fffffffffffffffLL,
+                    NULL,
+                    test_atomic_min_result_int,
+                    test_atomic_min_gen_int,
+                    NULL,
+                    test_atomic_min_result_long,
+                    test_atomic_min_gen_long,
+                    NULL };
 
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atom_min_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+        != 0)
         return -1;
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atomic_min_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+        != 0)
         return -1;
     return 0;
 }
@@ -772,79 +960,118 @@
 
 #pragma mark ---- max
 
-const char atom_max_core[] = "    oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n";
+const char atom_max_core[] =
+    "    oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n";
 
-const char atomic_max_core[] = "    oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n";
+const char atomic_max_core[] =
+    "    oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n";
 
-cl_int test_atomic_max_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_max_result_int(size_t size, cl_int *startRefValues,
+                                  size_t whichDestValue)
 {
     cl_int total = 0;
-    for( size_t i = 0; i < size; i++ )
+    for (size_t i = 0; i < size; i++)
     {
-        if( startRefValues[ i ] > total )
-            total = startRefValues[ i ];
+        if (startRefValues[i] > total) total = startRefValues[i];
     }
     return total;
 }
 
-void test_atomic_max_gen_int( size_t size, cl_int *startRefValues, MTdata d )
+void test_atomic_max_gen_int(size_t size, cl_int *startRefValues, MTdata d)
 {
-    for( size_t i = 0; i < size; i++ )
-        startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
+    for (size_t i = 0; i < size; i++)
+        startRefValues[i] =
+            (cl_int)(genrand_int32(d) % 0x3fffffff) + 0x3fffffff;
 }
 
-cl_long test_atomic_max_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_max_result_long(size_t size, cl_long *startRefValues,
+                                    size_t whichDestValue)
 {
     cl_long total = 0;
-    for( size_t i = 0; i < size; i++ )
+    for (size_t i = 0; i < size; i++)
     {
-        if( startRefValues[ i ] > total )
-            total = startRefValues[ i ];
+        if (startRefValues[i] > total) total = startRefValues[i];
     }
     return total;
 }
 
-void test_atomic_max_gen_long( size_t size, cl_long *startRefValues, MTdata d )
+void test_atomic_max_gen_long(size_t size, cl_long *startRefValues, MTdata d)
 {
-    for( size_t i = 0; i < size; i++ )
-        startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
+    for (size_t i = 0; i < size; i++)
+        startRefValues[i] =
+            (cl_long)(genrand_int32(d)
+                      | (((cl_long)genrand_int32(d) & 0x7fffffffL) << 16));
 }
 
-int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_max(cl_device_id deviceID, cl_context context,
+                    cl_command_queue queue, int num_elements)
 {
-    TestFns set = { 0, 0, NULL, test_atomic_max_result_int, test_atomic_max_gen_int, NULL, test_atomic_max_result_long, test_atomic_max_gen_long, NULL };
+    TestFns set = { 0,
+                    0,
+                    NULL,
+                    test_atomic_max_result_int,
+                    test_atomic_max_gen_int,
+                    NULL,
+                    test_atomic_max_result_long,
+                    test_atomic_max_gen_long,
+                    NULL };
 
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atom_max_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+        != 0)
         return -1;
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
-      return -1;
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atomic_max_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+        != 0)
+        return -1;
     return 0;
 }
 
 
 #pragma mark ---- inc
 
-const char atom_inc_core[] = "    oldValues[tid] = atom_inc( &destMemory[0] );\n";
+const char atom_inc_core[] =
+    "    oldValues[tid] = atom_inc( &destMemory[0] );\n";
 
-const char atomic_inc_core[] = "    oldValues[tid] = atomic_inc( &destMemory[0] );\n";
+const char atomic_inc_core[] =
+    "    oldValues[tid] = atomic_inc( &destMemory[0] );\n";
 
-cl_int test_atomic_inc_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_inc_result_int(size_t size, cl_int *startRefValues,
+                                  size_t whichDestValue)
 {
     return INT_TEST_VALUE + (cl_int)size;
 }
 
-cl_long test_atomic_inc_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_inc_result_long(size_t size, cl_long *startRefValues,
+                                    size_t whichDestValue)
 {
     return LONG_TEST_VALUE + size;
 }
 
-int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_inc(cl_device_id deviceID, cl_context context,
+                    cl_command_queue queue, int num_elements)
 {
-    TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_inc_result_int, NULL, NULL, test_atomic_inc_result_long, NULL, NULL };
+    TestFns set = { INT_TEST_VALUE,
+                    LONG_TEST_VALUE,
+                    NULL,
+                    test_atomic_inc_result_int,
+                    NULL,
+                    NULL,
+                    test_atomic_inc_result_long,
+                    NULL,
+                    NULL };
 
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atom_inc_core, set, false,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+        != 0)
         return -1;
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atomic_inc_core, set, false,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+        != 0)
         return -1;
     return 0;
 }
@@ -852,27 +1079,46 @@
 
 #pragma mark ---- dec
 
-const char atom_dec_core[] = "    oldValues[tid] = atom_dec( &destMemory[0] );\n";
+const char atom_dec_core[] =
+    "    oldValues[tid] = atom_dec( &destMemory[0] );\n";
 
-const char atomic_dec_core[] = "    oldValues[tid] = atomic_dec( &destMemory[0] );\n";
+const char atomic_dec_core[] =
+    "    oldValues[tid] = atomic_dec( &destMemory[0] );\n";
 
-cl_int test_atomic_dec_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_dec_result_int(size_t size, cl_int *startRefValues,
+                                  size_t whichDestValue)
 {
     return INT_TEST_VALUE - (cl_int)size;
 }
 
-cl_long test_atomic_dec_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_dec_result_long(size_t size, cl_long *startRefValues,
+                                    size_t whichDestValue)
 {
     return LONG_TEST_VALUE - size;
 }
 
-int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_dec(cl_device_id deviceID, cl_context context,
+                    cl_command_queue queue, int num_elements)
 {
-    TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_dec_result_int, NULL, NULL, test_atomic_dec_result_long, NULL, NULL };
+    TestFns set = { INT_TEST_VALUE,
+                    LONG_TEST_VALUE,
+                    NULL,
+                    test_atomic_dec_result_int,
+                    NULL,
+                    NULL,
+                    test_atomic_dec_result_long,
+                    NULL,
+                    NULL };
 
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atom_dec_core, set, false,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+        != 0)
         return -1;
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atomic_dec_core, set, false,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+        != 0)
         return -1;
     return 0;
 }
@@ -881,129 +1127,159 @@
 #pragma mark ---- cmpxchg
 
 /* We test cmpxchg by implementing (the long way) atom_add */
+// clang-format off
 const char atom_cmpxchg_core[] =
-"    int oldValue, origValue, newValue;\n"
-"    do { \n"
-"        origValue = destMemory[0];\n"
-"        newValue = origValue + tid + 2;\n"
-"        oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
-"    } while( oldValue != origValue );\n"
-"    oldValues[tid] = oldValue;\n"
-;
+    "    int oldValue, origValue, newValue;\n"
+    "    do { \n"
+    "        origValue = destMemory[0];\n"
+    "        newValue = origValue + tid + 2;\n"
+    "        oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
+    "    } while( oldValue != origValue );\n"
+    "    oldValues[tid] = oldValue;\n";
 
 const char atom_cmpxchg64_core[] =
-"    long oldValue, origValue, newValue;\n"
-"    do { \n"
-"        origValue = destMemory[0];\n"
-"        newValue = origValue + tid + 2;\n"
-"        oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
-"    } while( oldValue != origValue );\n"
-"    oldValues[tid] = oldValue;\n"
-;
+    "    long oldValue, origValue, newValue;\n"
+    "    do { \n"
+    "        origValue = destMemory[0];\n"
+    "        newValue = origValue + tid + 2;\n"
+    "        oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
+    "    } while( oldValue != origValue );\n"
+    "    oldValues[tid] = oldValue;\n";
 
 const char atomic_cmpxchg_core[] =
-"    int oldValue, origValue, newValue;\n"
-"    do { \n"
-"        origValue = destMemory[0];\n"
-"        newValue = origValue + tid + 2;\n"
-"        oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n"
-"    } while( oldValue != origValue );\n"
-"    oldValues[tid] = oldValue;\n"
-;
+    "    int oldValue, origValue, newValue;\n"
+    "    do { \n"
+    "        origValue = destMemory[0];\n"
+    "        newValue = origValue + tid + 2;\n"
+    "        oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n"
+    "    } while( oldValue != origValue );\n"
+    "    oldValues[tid] = oldValue;\n";
+// clang-format on
 
-cl_int test_atomic_cmpxchg_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_cmpxchg_result_int(size_t size, cl_int *startRefValues,
+                                      size_t whichDestValue)
 {
     cl_int total = INT_TEST_VALUE;
-    for( size_t i = 0; i < size; i++ )
-        total += (cl_int)i + 2;
+    for (size_t i = 0; i < size; i++) total += (cl_int)i + 2;
     return total;
 }
 
-cl_long test_atomic_cmpxchg_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_cmpxchg_result_long(size_t size, cl_long *startRefValues,
+                                        size_t whichDestValue)
 {
     cl_long total = LONG_TEST_VALUE;
-    for( size_t i = 0; i < size; i++ )
-        total += i + 2;
+    for (size_t i = 0; i < size; i++) total += i + 2;
     return total;
 }
 
-int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context,
+                        cl_command_queue queue, int num_elements)
 {
-    TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_cmpxchg_result_int, NULL, NULL, test_atomic_cmpxchg_result_long, NULL, NULL };
+    TestFns set = { INT_TEST_VALUE,
+                    LONG_TEST_VALUE,
+                    NULL,
+                    test_atomic_cmpxchg_result_int,
+                    NULL,
+                    NULL,
+                    test_atomic_cmpxchg_result_long,
+                    NULL,
+                    NULL };
 
     int errors = 0;
 
     log_info("    Testing atom_ functions...\n");
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kInt, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kUInt, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kInt, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kUInt, true );
+    errors |=
+        test_atomic_function(deviceID, context, queue, num_elements,
+                             atom_cmpxchg_core, set, false, false, kInt, true);
+    errors |=
+        test_atomic_function(deviceID, context, queue, num_elements,
+                             atom_cmpxchg_core, set, false, false, kUInt, true);
+    errors |=
+        test_atomic_function(deviceID, context, queue, num_elements,
+                             atom_cmpxchg_core, set, false, true, kInt, true);
+    errors |=
+        test_atomic_function(deviceID, context, queue, num_elements,
+                             atom_cmpxchg_core, set, false, true, kUInt, true);
 
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kLong, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kULong, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kLong, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kULong, true );
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   atom_cmpxchg64_core, set, false, false,
+                                   kLong, true);
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   atom_cmpxchg64_core, set, false, false,
+                                   kULong, true);
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   atom_cmpxchg64_core, set, false, true, kLong,
+                                   true);
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   atom_cmpxchg64_core, set, false, true,
+                                   kULong, true);
 
     log_info("    Testing atomic_ functions...\n");
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kInt, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kUInt, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kInt, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kUInt, true );
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   atomic_cmpxchg_core, set, false, false, kInt,
+                                   true);
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   atomic_cmpxchg_core, set, false, false,
+                                   kUInt, true);
+    errors |=
+        test_atomic_function(deviceID, context, queue, num_elements,
+                             atomic_cmpxchg_core, set, false, true, kInt, true);
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   atomic_cmpxchg_core, set, false, true, kUInt,
+                                   true);
 
-    if( errors )
-        return -1;
+    if (errors) return -1;
 
     return 0;
 }
 
 #pragma mark -------- Bitwise functions
 
-size_t test_bitwise_num_results( size_t threadCount, ExplicitType dataType )
+size_t test_bitwise_num_results(size_t threadCount, ExplicitType dataType)
 {
-    size_t numBits = get_explicit_type_size( dataType ) * 8;
+    size_t numBits = get_explicit_type_size(dataType) * 8;
 
-    return ( threadCount + numBits - 1 ) / numBits;
+    return (threadCount + numBits - 1) / numBits;
 }
 
 #pragma mark ---- and
 
+// clang-format off
 const char atom_and_core[] =
-"    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-"    int  whichResult = tid / numBits;\n"
-"    int  bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-"    oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
-;
+    "    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+    "    int  whichResult = tid / numBits;\n"
+    "    int  bitIndex = tid - ( whichResult * numBits );\n"
+    "\n"
+    "    oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n";
 
 const char atomic_and_core[] =
-"    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-"    int  whichResult = tid / numBits;\n"
-"    int  bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-"    oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
-;
+    "    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+    "    int  whichResult = tid / numBits;\n"
+    "    int  bitIndex = tid - ( whichResult * numBits );\n"
+    "\n"
+    "    oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n";
+// clang-format on
 
 
-cl_int test_atomic_and_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
+cl_int test_atomic_and_result_int(size_t size, cl_int *startRefValues,
+                                  size_t whichResult)
 {
-    size_t numThreads = ( (size_t)size + 31 ) / 32;
-    if( whichResult < numThreads - 1 )
-        return 0;
+    size_t numThreads = ((size_t)size + 31) / 32;
+    if (whichResult < numThreads - 1) return 0;
 
     // Last item doesn't get and'ed on every bit, so we have to mask away
     size_t numBits = (size_t)size - whichResult * 32;
     cl_int bits = (cl_int)0xffffffffL;
-    for( size_t i = 0; i < numBits; i++ )
-        bits &= ~( 1 << i );
+    for (size_t i = 0; i < numBits; i++) bits &= ~(1 << i);
 
     return bits;
 }
 
-cl_long test_atomic_and_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
+cl_long test_atomic_and_result_long(size_t size, cl_long *startRefValues,
+                                    size_t whichResult)
 {
-    size_t numThreads = ( (size_t)size + 63 ) / 64;
-    if( whichResult < numThreads - 1 )
-        return 0;
+    size_t numThreads = ((size_t)size + 63) / 64;
+    if (whichResult < numThreads - 1) return 0;
 
     // Last item doesn't get and'ed on every bit, so we have to mask away
     size_t numBits = (size_t)size - whichResult * 64;
@@ -1013,14 +1289,28 @@
     return bits;
 }
 
-int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_and(cl_device_id deviceID, cl_context context,
+                    cl_command_queue queue, int num_elements)
 {
-    TestFns set = { 0xffffffff, 0xffffffffffffffffLL, test_bitwise_num_results,
-        test_atomic_and_result_int, NULL, NULL, test_atomic_and_result_long, NULL, NULL };
+    TestFns set = { 0xffffffff,
+                    0xffffffffffffffffLL,
+                    test_bitwise_num_results,
+                    test_atomic_and_result_int,
+                    NULL,
+                    NULL,
+                    test_atomic_and_result_long,
+                    NULL,
+                    NULL };
 
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atom_and_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+        != 0)
         return -1;
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atomic_and_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+        != 0)
         return -1;
     return 0;
 }
@@ -1028,59 +1318,68 @@
 
 #pragma mark ---- or
 
+// clang-format off
 const char atom_or_core[] =
-"    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-"    int  whichResult = tid / numBits;\n"
-"    int  bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-"    oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
-;
+    "    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+    "    int  whichResult = tid / numBits;\n"
+    "    int  bitIndex = tid - ( whichResult * numBits );\n"
+    "\n"
+    "    oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n";
 
 const char atomic_or_core[] =
-"    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-"    int  whichResult = tid / numBits;\n"
-"    int  bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-"    oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
-;
+    "    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+    "    int  whichResult = tid / numBits;\n"
+    "    int  bitIndex = tid - ( whichResult * numBits );\n"
+    "\n"
+    "    oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n";
+// clang-format on
 
-cl_int test_atomic_or_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
+cl_int test_atomic_or_result_int(size_t size, cl_int *startRefValues,
+                                 size_t whichResult)
 {
-    size_t numThreads = ( (size_t)size + 31 ) / 32;
-    if( whichResult < numThreads - 1 )
-        return 0xffffffff;
+    size_t numThreads = ((size_t)size + 31) / 32;
+    if (whichResult < numThreads - 1) return 0xffffffff;
 
     // Last item doesn't get and'ed on every bit, so we have to mask away
     size_t numBits = (size_t)size - whichResult * 32;
     cl_int bits = 0;
-    for( size_t i = 0; i < numBits; i++ )
-        bits |= ( 1 << i );
+    for (size_t i = 0; i < numBits; i++) bits |= (1 << i);
 
     return bits;
 }
 
-cl_long test_atomic_or_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
+cl_long test_atomic_or_result_long(size_t size, cl_long *startRefValues,
+                                   size_t whichResult)
 {
-    size_t numThreads = ( (size_t)size + 63 ) / 64;
-    if( whichResult < numThreads - 1 )
-        return 0x0ffffffffffffffffLL;
+    size_t numThreads = ((size_t)size + 63) / 64;
+    if (whichResult < numThreads - 1) return 0x0ffffffffffffffffLL;
 
     // Last item doesn't get and'ed on every bit, so we have to mask away
     size_t numBits = (size_t)size - whichResult * 64;
     cl_long bits = 0;
-    for( size_t i = 0; i < numBits; i++ )
-        bits |= ( 1LL << i );
+    for (size_t i = 0; i < numBits; i++) bits |= (1LL << i);
 
     return bits;
 }
 
-int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_or(cl_device_id deviceID, cl_context context,
+                   cl_command_queue queue, int num_elements)
 {
-    TestFns set = { 0, 0LL, test_bitwise_num_results, test_atomic_or_result_int, NULL, NULL, test_atomic_or_result_long, NULL, NULL };
+    TestFns set = {
+        0,    0LL,  test_bitwise_num_results,   test_atomic_or_result_int,
+        NULL, NULL, test_atomic_or_result_long, NULL,
+        NULL
+    };
 
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atom_or_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+        != 0)
         return -1;
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atomic_or_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+        != 0)
         return -1;
     return 0;
 }
@@ -1100,33 +1399,44 @@
     "\n"
     "    oldValues[tid] = atomic_xor( &destMemory[0], 1L << bitIndex );\n";
 
-cl_int test_atomic_xor_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
+cl_int test_atomic_xor_result_int(size_t size, cl_int *startRefValues,
+                                  size_t whichResult)
 {
     cl_int total = 0x2f08ab41;
-    for( size_t i = 0; i < size; i++ )
-        total ^= ( 1 << ( i & 31 ) );
+    for (size_t i = 0; i < size; i++) total ^= (1 << (i & 31));
     return total;
 }
 
-cl_long test_atomic_xor_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
+cl_long test_atomic_xor_result_long(size_t size, cl_long *startRefValues,
+                                    size_t whichResult)
 {
     cl_long total = 0x2f08ab418ba0541LL;
-    for( size_t i = 0; i < size; i++ )
-        total ^= ( 1LL << ( i & 63 ) );
+    for (size_t i = 0; i < size; i++) total ^= (1LL << (i & 63));
     return total;
 }
 
-int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_xor(cl_device_id deviceID, cl_context context,
+                    cl_command_queue queue, int num_elements)
 {
-    TestFns set = { 0x2f08ab41, 0x2f08ab418ba0541LL, NULL, test_atomic_xor_result_int, NULL, NULL, test_atomic_xor_result_long, NULL, NULL };
+    TestFns set = { 0x2f08ab41,
+                    0x2f08ab418ba0541LL,
+                    NULL,
+                    test_atomic_xor_result_int,
+                    NULL,
+                    NULL,
+                    test_atomic_xor_result_long,
+                    NULL,
+                    NULL };
 
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atom_xor_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+        != 0)
         return -1;
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atomic_xor_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+        != 0)
         return -1;
     return 0;
 }
-
-
-
-
diff --git a/test_conformance/atomics/test_indexed_cases.cpp b/test_conformance/atomics/test_indexed_cases.cpp
index b85e3d2..2bba3e2 100644
--- a/test_conformance/atomics/test_indexed_cases.cpp
+++ b/test_conformance/atomics/test_indexed_cases.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,48 +16,55 @@
 #include "testBase.h"
 #include "harness/conversions.h"
 
-const char * atomic_index_source =
-"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
-"// Counter keeps track of which index in counts we are using.\n"
-"// We get that value, increment it, and then set that index in counts to our thread ID.\n"
-"// At the end of this we should have all thread IDs in some random location in counts\n"
-"// exactly once. If atom_add failed then we will write over various thread IDs and we\n"
-"// will be missing some.\n"
-"\n"
-"__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
-"    int tid = get_global_id(0);\n"
-"    \n"
-"    int counter_to_use = atom_add(counter, 1);\n"
-"    counts[counter_to_use] = tid;\n"
-"}";
+// clang-format off
+const char *atomic_index_source =
+    "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
+    "// Counter keeps track of which index in counts we are using.\n"
+    "// We get that value, increment it, and then set that index in counts to our thread ID.\n"
+    "// At the end of this we should have all thread IDs in some random location in counts\n"
+    "// exactly once. If atom_add failed then we will write over various thread IDs and we\n"
+    "// will be missing some.\n"
+    "\n"
+    "__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
+    "    int tid = get_global_id(0);\n"
+    "    \n"
+    "    int counter_to_use = atom_add(counter, 1);\n"
+    "    counts[counter_to_use] = tid;\n"
+    "}";
+// clang-format on
 
-int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_add_index(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements)
 {
     clProgramWrapper program;
     clKernelWrapper kernel;
     clMemWrapper counter, counters;
     size_t numGlobalThreads, numLocalThreads;
-    int fail = 0, succeed = 0, err;
+    int fail = 0, err;
 
-  /* Check if atomics are supported. */
-  if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
-    log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n");
-    return 0;
-  }
+    /* Check if atomics are supported. */
+    if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
+    {
+        log_info("Base atomics not supported "
+                 "(cl_khr_global_int32_base_atomics). Skipping test.\n");
+        return 0;
+    }
 
     //===== add_index test
     // The index test replicates what particles does.
-    // It uses one memory location to keep track of the current index and then each thread
-    // does an atomic add to it to get its new location. The threads then write to their
-    // assigned location. At the end we check to make sure that each thread's ID shows up
-    // exactly once in the output.
+    // It uses one memory location to keep track of the current index and then
+    // each thread does an atomic add to it to get its new location. The threads
+    // then write to their assigned location. At the end we check to make sure
+    // that each thread's ID shows up exactly once in the output.
 
     numGlobalThreads = 2048;
 
-    if( create_single_kernel_helper( context, &program, &kernel, 1, &atomic_index_source, "add_index_test" ) )
+    if (create_single_kernel_helper(context, &program, &kernel, 1,
+                                    &atomic_index_source, "add_index_test"))
         return -1;
 
-    if( get_max_common_work_group_size( context, kernel, numGlobalThreads, &numLocalThreads ) )
+    if (get_max_common_work_group_size(context, kernel, numGlobalThreads,
+                                       &numLocalThreads))
         return -1;
 
     log_info("Execute global_threads:%d local_threads:%d\n",
@@ -72,103 +79,148 @@
                               sizeof(cl_int) * numGlobalThreads, NULL, NULL);
 
     // Reset all those locations to -1 to indciate they have not been used.
-    cl_int *values = (cl_int*) malloc(sizeof(cl_int)*numGlobalThreads);
-    if (values == NULL) {
-        log_error("add_index_test FAILED to allocate memory for initial values.\n");
-        fail = 1; succeed = -1;
-    } else {
+    cl_int *values = (cl_int *)malloc(sizeof(cl_int) * numGlobalThreads);
+    if (values == NULL)
+    {
+        log_error(
+            "add_index_test FAILED to allocate memory for initial values.\n");
+        fail = 1;
+    }
+    else
+    {
         memset(values, -1, numLocalThreads);
-        unsigned int i=0;
-        for (i=0; i<numGlobalThreads; i++)
-            values[i] = -1;
-        int init=0;
-        err = clEnqueueWriteBuffer(queue, counters, true, 0, numGlobalThreads*sizeof(cl_int), values, 0, NULL, NULL);
-        err |= clEnqueueWriteBuffer(queue, counter, true, 0,1*sizeof(cl_int), &init, 0, NULL, NULL);
-        if (err) {
-            log_error("add_index_test FAILED to write initial values to arrays: %d\n", err);
-            fail=1; succeed=-1;
-        } else {
+        unsigned int i = 0;
+        for (i = 0; i < numGlobalThreads; i++) values[i] = -1;
+        int init = 0;
+        err = clEnqueueWriteBuffer(queue, counters, true, 0,
+                                   numGlobalThreads * sizeof(cl_int), values, 0,
+                                   NULL, NULL);
+        err |= clEnqueueWriteBuffer(queue, counter, true, 0, 1 * sizeof(cl_int),
+                                    &init, 0, NULL, NULL);
+        if (err)
+        {
+            log_error(
+                "add_index_test FAILED to write initial values to arrays: %d\n",
+                err);
+            fail = 1;
+        }
+        else
+        {
             err = clSetKernelArg(kernel, 0, sizeof(counter), &counter);
             err |= clSetKernelArg(kernel, 1, sizeof(counters), &counters);
-            if (err) {
-                log_error("add_index_test FAILED to set kernel arguments: %d\n", err);
-                fail=1; succeed=-1;
-            } else {
-                err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numGlobalThreads, &numLocalThreads, 0, NULL, NULL );
-                if (err) {
-                    log_error("add_index_test FAILED to execute kernel: %d\n", err);
-                    fail=1; succeed=-1;
-                } else {
-                    err = clEnqueueReadBuffer( queue, counters, true, 0, sizeof(cl_int)*numGlobalThreads, values, 0, NULL, NULL );
-                    if (err) {
-                        log_error("add_index_test FAILED to read back results: %d\n", err);
-                        fail = 1; succeed=-1;
-                    } else {
+            if (err)
+            {
+                log_error("add_index_test FAILED to set kernel arguments: %d\n",
+                          err);
+                fail = 1;
+            }
+            else
+            {
+                err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
+                                             &numGlobalThreads,
+                                             &numLocalThreads, 0, NULL, NULL);
+                if (err)
+                {
+                    log_error("add_index_test FAILED to execute kernel: %d\n",
+                              err);
+                    fail = 1;
+                }
+                else
+                {
+                    err = clEnqueueReadBuffer(queue, counters, true, 0,
+                                              sizeof(cl_int) * numGlobalThreads,
+                                              values, 0, NULL, NULL);
+                    if (err)
+                    {
+                        log_error(
+                            "add_index_test FAILED to read back results: %d\n",
+                            err);
+                        fail = 1;
+                    }
+                    else
+                    {
                         unsigned int looking_for, index;
-                        for (looking_for=0; looking_for<numGlobalThreads; looking_for++) {
-                            int instances_found=0;
-                            for (index=0; index<numGlobalThreads; index++) {
-                                if (values[index]==(int)looking_for)
+                        for (looking_for = 0; looking_for < numGlobalThreads;
+                             looking_for++)
+                        {
+                            int instances_found = 0;
+                            for (index = 0; index < numGlobalThreads; index++)
+                            {
+                                if (values[index] == (int)looking_for)
                                     instances_found++;
                             }
-                            if (instances_found != 1) {
-                                log_error("add_index_test FAILED: wrong number of instances (%d!=1) for counter %d.\n", instances_found, looking_for);
-                                fail = 1; succeed=-1;
+                            if (instances_found != 1)
+                            {
+                                log_error(
+                                    "add_index_test FAILED: wrong number of "
+                                    "instances (%d!=1) for counter %d.\n",
+                                    instances_found, looking_for);
+                                fail = 1;
                             }
                         }
                     }
                 }
             }
         }
-        if (!fail) {
-            log_info("add_index_test passed. Each thread used exactly one index.\n");
+        if (!fail)
+        {
+            log_info(
+                "add_index_test passed. Each thread used exactly one index.\n");
         }
         free(values);
     }
     return fail;
 }
 
+// clang-format off
 const char *add_index_bin_kernel[] = {
-"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
-"// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n"
-"// using an atomic add to keep track of the current location to write into in each bin.\n"
-"// This is the same as the memory update for the particles demo.\n"
-"\n"
-"__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
-"    int tid = get_global_id(0);\n"
-"\n"
-"    int location = bin_assignments[tid];\n"
-"    int counter = atom_add(&bin_counters[location], 1);\n"
-"    bins[location*max_counts_per_bin + counter] = tid;\n"
-"}" };
+    "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
+    "// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n"
+    "// using an atomic add to keep track of the current location to write into in each bin.\n"
+    "// This is the same as the memory update for the particles demo.\n"
+    "\n"
+    "__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
+    "    int tid = get_global_id(0);\n"
+    "\n"
+    "    int location = bin_assignments[tid];\n"
+    "    int counter = atom_add(&bin_counters[location], 1);\n"
+    "    bins[location*max_counts_per_bin + counter] = tid;\n"
+    "}" };
+// clang-format on
 
-// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel
-// using an atomic add to keep track of the current location to write into in each bin.
-// This is the same as the memory update for the particles demo.
-int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_context context, MTdata d)
+// This test assigns a bunch of values to bins and then tries to put them in the
+// bins in parallel using an atomic add to keep track of the current location to
+// write into in each bin. This is the same as the memory update for the
+// particles demo.
+int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
+                       cl_context context, MTdata d)
 {
     int number_of_items = (int)global_threads[0];
     size_t local_threads[1];
     int divisor = 12;
-    int number_of_bins = number_of_items/divisor;
-    int max_counts_per_bin = divisor*2;
+    int number_of_bins = number_of_items / divisor;
+    int max_counts_per_bin = divisor * 2;
 
     int fail = 0;
-    int succeed = 0;
     int err;
 
     clProgramWrapper program;
     clKernelWrapper kernel;
 
-    //  log_info("add_index_bin_test: %d items, into %d bins, with a max of %d items per bin (bins is %d long).\n",
-    //           number_of_items, number_of_bins, max_counts_per_bin, number_of_bins*max_counts_per_bin);
+    //  log_info("add_index_bin_test: %d items, into %d bins, with a max of %d
+    //  items per bin (bins is %d long).\n",
+    //           number_of_items, number_of_bins, max_counts_per_bin,
+    //           number_of_bins*max_counts_per_bin);
 
     //===== add_index_bin test
     // The index test replicates what particles does.
-    err = create_single_kernel_helper(context, &program, &kernel, 1, add_index_bin_kernel, "add_index_bin_test" );
-    test_error( err, "Unable to create testing kernel" );
+    err =
+        create_single_kernel_helper(context, &program, &kernel, 1,
+                                    add_index_bin_kernel, "add_index_bin_test");
+    test_error(err, "Unable to create testing kernel");
 
-    if( get_max_common_work_group_size( context, kernel, global_threads[0], &local_threads[0] ) )
+    if (get_max_common_work_group_size(context, kernel, global_threads[0],
+                                       &local_threads[0]))
         return -1;
 
     log_info("Execute global_threads:%d local_threads:%d\n",
@@ -185,152 +237,228 @@
         clCreateBuffer(context, CL_MEM_READ_ONLY,
                        sizeof(cl_int) * number_of_items, NULL, NULL);
 
-    if (bin_counters == NULL) {
+    if (bin_counters == NULL)
+    {
         log_error("add_index_bin_test FAILED to allocate bin_counters.\n");
         return -1;
     }
-    if (bins == NULL) {
+    if (bins == NULL)
+    {
         log_error("add_index_bin_test FAILED to allocate bins.\n");
         return -1;
     }
-    if (bin_assignments == NULL) {
+    if (bin_assignments == NULL)
+    {
         log_error("add_index_bin_test FAILED to allocate bin_assignments.\n");
         return -1;
     }
 
     // Initialize our storage
-    cl_int *l_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
-    if (!l_bin_counts) {
-        log_error("add_index_bin_test FAILED to allocate initial values for bin_counters.\n");
+    cl_int *l_bin_counts = (cl_int *)malloc(sizeof(cl_int) * number_of_bins);
+    if (!l_bin_counts)
+    {
+        log_error("add_index_bin_test FAILED to allocate initial values for "
+                  "bin_counters.\n");
         return -1;
     }
     int i;
-    for (i=0; i<number_of_bins; i++)
-        l_bin_counts[i] = 0;
-    err = clEnqueueWriteBuffer(queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, l_bin_counts, 0, NULL, NULL);
-    if (err) {
-        log_error("add_index_bin_test FAILED to set initial values for bin_counters: %d\n", err);
+    for (i = 0; i < number_of_bins; i++) l_bin_counts[i] = 0;
+    err = clEnqueueWriteBuffer(queue, bin_counters, true, 0,
+                               sizeof(cl_int) * number_of_bins, l_bin_counts, 0,
+                               NULL, NULL);
+    if (err)
+    {
+        log_error("add_index_bin_test FAILED to set initial values for "
+                  "bin_counters: %d\n",
+                  err);
         return -1;
     }
 
-    cl_int *values = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
-    if (!values) {
-        log_error("add_index_bin_test FAILED to allocate initial values for bins.\n");
+    cl_int *values =
+        (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin);
+    if (!values)
+    {
+        log_error(
+            "add_index_bin_test FAILED to allocate initial values for bins.\n");
         return -1;
     }
-    for (i=0; i<number_of_bins*max_counts_per_bin; i++)
-        values[i] = -1;
-    err = clEnqueueWriteBuffer(queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, values, 0, NULL, NULL);
-    if (err) {
-        log_error("add_index_bin_test FAILED to set initial values for bins: %d\n", err);
+    for (i = 0; i < number_of_bins * max_counts_per_bin; i++) values[i] = -1;
+    err = clEnqueueWriteBuffer(queue, bins, true, 0,
+                               sizeof(cl_int) * number_of_bins
+                                   * max_counts_per_bin,
+                               values, 0, NULL, NULL);
+    if (err)
+    {
+        log_error(
+            "add_index_bin_test FAILED to set initial values for bins: %d\n",
+            err);
         return -1;
     }
     free(values);
 
-    cl_int *l_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_items);
-    if (!l_bin_assignments) {
-        log_error("add_index_bin_test FAILED to allocate initial values for l_bin_assignments.\n");
+    cl_int *l_bin_assignments =
+        (cl_int *)malloc(sizeof(cl_int) * number_of_items);
+    if (!l_bin_assignments)
+    {
+        log_error("add_index_bin_test FAILED to allocate initial values for "
+                  "l_bin_assignments.\n");
         return -1;
     }
-    for (i=0; i<number_of_items; i++) {
-        int bin = random_in_range(0, number_of_bins-1, d);
-        while (l_bin_counts[bin] >= max_counts_per_bin) {
-            bin = random_in_range(0, number_of_bins-1, d);
+    for (i = 0; i < number_of_items; i++)
+    {
+        int bin = random_in_range(0, number_of_bins - 1, d);
+        while (l_bin_counts[bin] >= max_counts_per_bin)
+        {
+            bin = random_in_range(0, number_of_bins - 1, d);
         }
         if (bin >= number_of_bins)
-            log_error("add_index_bin_test internal error generating bin assignments: bin %d >= number_of_bins %d.\n", bin, number_of_bins);
-        if (l_bin_counts[bin]+1 > max_counts_per_bin)
-            log_error("add_index_bin_test internal error generating bin assignments: bin %d has more entries (%d) than max_counts_per_bin (%d).\n", bin, l_bin_counts[bin], max_counts_per_bin);
+            log_error("add_index_bin_test internal error generating bin "
+                      "assignments: bin %d >= number_of_bins %d.\n",
+                      bin, number_of_bins);
+        if (l_bin_counts[bin] + 1 > max_counts_per_bin)
+            log_error(
+                "add_index_bin_test internal error generating bin assignments: "
+                "bin %d has more entries (%d) than max_counts_per_bin (%d).\n",
+                bin, l_bin_counts[bin], max_counts_per_bin);
         l_bin_counts[bin]++;
         l_bin_assignments[i] = bin;
-        //     log_info("item %d assigned to bin %d (%d items)\n", i, bin, l_bin_counts[bin]);
+        //     log_info("item %d assigned to bin %d (%d items)\n", i, bin,
+        //     l_bin_counts[bin]);
     }
-    err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, sizeof(cl_int)*number_of_items, l_bin_assignments, 0, NULL, NULL);
-    if (err) {
-        log_error("add_index_bin_test FAILED to set initial values for bin_assignments: %d\n", err);
+    err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0,
+                               sizeof(cl_int) * number_of_items,
+                               l_bin_assignments, 0, NULL, NULL);
+    if (err)
+    {
+        log_error("add_index_bin_test FAILED to set initial values for "
+                  "bin_assignments: %d\n",
+                  err);
         return -1;
     }
     // Setup the kernel
     err = clSetKernelArg(kernel, 0, sizeof(bin_counters), &bin_counters);
     err |= clSetKernelArg(kernel, 1, sizeof(bins), &bins);
     err |= clSetKernelArg(kernel, 2, sizeof(bin_assignments), &bin_assignments);
-    err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin), &max_counts_per_bin);
-    if (err) {
-        log_error("add_index_bin_test FAILED to set kernel arguments: %d\n", err);
-        fail=1; succeed=-1;
+    err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin),
+                          &max_counts_per_bin);
+    if (err)
+    {
+        log_error("add_index_bin_test FAILED to set kernel arguments: %d\n",
+                  err);
+        fail = 1;
         return -1;
     }
 
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
-    if (err) {
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads,
+                                 local_threads, 0, NULL, NULL);
+    if (err)
+    {
         log_error("add_index_bin_test FAILED to execute kernel: %d\n", err);
-        fail=1; succeed=-1;
+        fail = 1;
     }
 
-    cl_int *final_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
-    if (!final_bin_assignments) {
-        log_error("add_index_bin_test FAILED to allocate initial values for final_bin_assignments.\n");
+    cl_int *final_bin_assignments =
+        (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin);
+    if (!final_bin_assignments)
+    {
+        log_error("add_index_bin_test FAILED to allocate initial values for "
+                  "final_bin_assignments.\n");
         return -1;
     }
-    err = clEnqueueReadBuffer( queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, final_bin_assignments, 0, NULL, NULL );
-    if (err) {
+    err = clEnqueueReadBuffer(queue, bins, true, 0,
+                              sizeof(cl_int) * number_of_bins
+                                  * max_counts_per_bin,
+                              final_bin_assignments, 0, NULL, NULL);
+    if (err)
+    {
         log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
-        fail = 1; succeed=-1;
+        fail = 1;
     }
 
-    cl_int *final_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
-    if (!final_bin_counts) {
-        log_error("add_index_bin_test FAILED to allocate initial values for final_bin_counts.\n");
+    cl_int *final_bin_counts =
+        (cl_int *)malloc(sizeof(cl_int) * number_of_bins);
+    if (!final_bin_counts)
+    {
+        log_error("add_index_bin_test FAILED to allocate initial values for "
+                  "final_bin_counts.\n");
         return -1;
     }
-    err = clEnqueueReadBuffer( queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, final_bin_counts, 0, NULL, NULL );
-    if (err) {
-        log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", err);
-        fail = 1; succeed=-1;
+    err = clEnqueueReadBuffer(queue, bin_counters, true, 0,
+                              sizeof(cl_int) * number_of_bins, final_bin_counts,
+                              0, NULL, NULL);
+    if (err)
+    {
+        log_error("add_index_bin_test FAILED to read back bin_counters: %d\n",
+                  err);
+        fail = 1;
     }
 
     // Verification.
-    int errors=0;
+    int errors = 0;
     int current_bin;
     int search;
     //  Print out all the contents of the bins.
     //  for (current_bin=0; current_bin<number_of_bins; current_bin++)
     //        for (search=0; search<max_counts_per_bin; search++)
-    //      log_info("[bin %d, entry %d] = %d\n", current_bin, search, final_bin_assignments[current_bin*max_counts_per_bin+search]);
+    //      log_info("[bin %d, entry %d] = %d\n", current_bin, search,
+    //      final_bin_assignments[current_bin*max_counts_per_bin+search]);
 
     // First verify that there are the correct number in each bin.
-    for (current_bin=0; current_bin<number_of_bins; current_bin++) {
+    for (current_bin = 0; current_bin < number_of_bins; current_bin++)
+    {
         int expected_number = l_bin_counts[current_bin];
         int actual_number = final_bin_counts[current_bin];
-        if (expected_number != actual_number) {
-            log_error("add_index_bin_test FAILED: bin %d reported %d entries when %d were expected.\n", current_bin, actual_number, expected_number);
+        if (expected_number != actual_number)
+        {
+            log_error("add_index_bin_test FAILED: bin %d reported %d entries "
+                      "when %d were expected.\n",
+                      current_bin, actual_number, expected_number);
             errors++;
         }
-        for (search=0; search<expected_number; search++) {
-            if (final_bin_assignments[current_bin*max_counts_per_bin+search] == -1) {
-                log_error("add_index_bin_test FAILED: bin %d had no entry at position %d when it should have had %d entries.\n", current_bin, search, expected_number);
+        for (search = 0; search < expected_number; search++)
+        {
+            if (final_bin_assignments[current_bin * max_counts_per_bin + search]
+                == -1)
+            {
+                log_error("add_index_bin_test FAILED: bin %d had no entry at "
+                          "position %d when it should have had %d entries.\n",
+                          current_bin, search, expected_number);
                 errors++;
             }
         }
-        for (search=expected_number; search<max_counts_per_bin; search++) {
-            if (final_bin_assignments[current_bin*max_counts_per_bin+search] != -1) {
-                log_error("add_index_bin_test FAILED: bin %d had an extra entry at position %d when it should have had only %d entries.\n", current_bin, search, expected_number);
+        for (search = expected_number; search < max_counts_per_bin; search++)
+        {
+            if (final_bin_assignments[current_bin * max_counts_per_bin + search]
+                != -1)
+            {
+                log_error(
+                    "add_index_bin_test FAILED: bin %d had an extra entry at "
+                    "position %d when it should have had only %d entries.\n",
+                    current_bin, search, expected_number);
                 errors++;
             }
         }
     }
     // Now verify that the correct ones are in each bin
     int index;
-    for (index=0; index<number_of_items; index++) {
+    for (index = 0; index < number_of_items; index++)
+    {
         int expected_bin = l_bin_assignments[index];
         int found_it = 0;
-        for (search=0; search<l_bin_counts[expected_bin]; search++) {
-            if (final_bin_assignments[expected_bin*max_counts_per_bin+search] == index) {
+        for (search = 0; search < l_bin_counts[expected_bin]; search++)
+        {
+            if (final_bin_assignments[expected_bin * max_counts_per_bin
+                                      + search]
+                == index)
+            {
                 found_it = 1;
             }
         }
-        if (found_it == 0) {
-            log_error("add_index_bin_test FAILED: did not find item %d in bin %d.\n", index, expected_bin);
+        if (found_it == 0)
+        {
+            log_error(
+                "add_index_bin_test FAILED: did not find item %d in bin %d.\n",
+                index, expected_bin);
             errors++;
         }
     }
@@ -341,41 +469,49 @@
     clReleaseMemObject(bin_counters);
     clReleaseMemObject(bins);
     clReleaseMemObject(bin_assignments);
-    if (errors == 0) {
-        log_info("add_index_bin_test passed. Each item was put in the correct bin in parallel.\n");
+    if (errors == 0)
+    {
+        log_info("add_index_bin_test passed. Each item was put in the correct "
+                 "bin in parallel.\n");
         return 0;
-    } else {
+    }
+    else
+    {
         log_error("add_index_bin_test FAILED: %d errors.\n", errors);
         return -1;
     }
 }
 
-int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context,
+                              cl_command_queue queue, int num_elements)
 {
     //===== add_index_bin test
     size_t numGlobalThreads = 2048;
-    int iteration=0;
+    int iteration = 0;
     int err, failed = 0;
-    MTdata d = init_genrand( gRandomSeed );
+    MTdata d = init_genrand(gRandomSeed);
 
-  /* Check if atomics are supported. */
-  if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
-    log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n");
-    free_mtdata( d );
-    return 0;
-  }
+    /* Check if atomics are supported. */
+    if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
+    {
+        log_info("Base atomics not supported "
+                 "(cl_khr_global_int32_base_atomics). Skipping test.\n");
+        free_mtdata(d);
+        return 0;
+    }
 
-    for(iteration=0; iteration<10; iteration++) {
-        log_info("add_index_bin_test with %d elements:\n", (int)numGlobalThreads);
-        err = add_index_bin_test(&numGlobalThreads,  queue,  context, d);
-        if (err) {
+    for (iteration = 0; iteration < 10; iteration++)
+    {
+        log_info("add_index_bin_test with %d elements:\n",
+                 (int)numGlobalThreads);
+        err = add_index_bin_test(&numGlobalThreads, queue, context, d);
+        if (err)
+        {
             failed++;
             break;
         }
-        numGlobalThreads*=2;
+        numGlobalThreads *= 2;
     }
-    free_mtdata( d );
+    free_mtdata(d);
     return failed;
 }
-
-
diff --git a/test_conformance/basic/test_async_copy2D.cpp b/test_conformance/basic/test_async_copy2D.cpp
index 54633a3..bf3f155 100644
--- a/test_conformance/basic/test_async_copy2D.cpp
+++ b/test_conformance/basic/test_async_copy2D.cpp
@@ -53,7 +53,7 @@
 
   for (int i = 0; i < lineCopiesPerWorkItem; i++) {
     for (int j = 0; j < numElementsPerLine; j++) {
-      const local_index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
+      const int local_index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
       const int global_index = (get_global_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
       dst[global_index] = localBuffer[local_index];
     }
diff --git a/test_conformance/basic/test_enqueued_local_size.cpp b/test_conformance/basic/test_enqueued_local_size.cpp
index 91fe143..ea95df6 100644
--- a/test_conformance/basic/test_enqueued_local_size.cpp
+++ b/test_conformance/basic/test_enqueued_local_size.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -26,32 +26,33 @@
 
 #include "procs.h"
 
-static const char *enqueued_local_size_2d_code =
-"__kernel void test_enqueued_local_size_2d(global int *dst)\n"
-"{\n"
-"    if ((get_global_id(0) == 0) && (get_global_id(1) == 0))\n"
-"    {\n"
-"        dst[0] = (int)get_enqueued_local_size(0)\n;"
-"        dst[1] = (int)get_enqueued_local_size(1)\n;"
-"    }\n"
-"}\n";
+static const char *enqueued_local_size_2d_code = R"(
+__kernel void test_enqueued_local_size_2d(global int *dst)
+{
+    if ((get_global_id(0) == 0) && (get_global_id(1) == 0))
+    {
+        dst[0] = (int)get_enqueued_local_size(0);
+        dst[1] = (int)get_enqueued_local_size(1);
+    }
+}
+)";
 
-static const char *enqueued_local_size_1d_code =
-"__kernel void test_enqueued_local_size_1d(global int *dst)\n"
-"{\n"
-"    int  tid_x = get_global_id(0);\n"
-"    if (get_global_id(0) == 0)\n"
-"    {\n"
-"        dst[tid_x] = (int)get_enqueued_local_size(0)\n;"
-"    }\n"
-"}\n";
+static const char *enqueued_local_size_1d_code = R"(
+__kernel void test_enqueued_local_size_1d(global int *dst)
+{
+    int  tid_x = get_global_id(0);
+    if (get_global_id(0) == 0)
+    {
+        dst[tid_x] = (int)get_enqueued_local_size(0);
+    }
+}
+)";
 
 
-static int
-verify_enqueued_local_size(int *result, size_t *expected, int n)
+static int verify_enqueued_local_size(int *result, size_t *expected, int n)
 {
     int i;
-    for (i=0; i<n; i++)
+    for (i = 0; i < n; i++)
     {
         if (result[i] != (int)expected[i])
         {
@@ -64,14 +65,14 @@
 }
 
 
-int
-test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_enqueued_local_size(cl_device_id device, cl_context context,
+                             cl_command_queue queue, int num_elements)
 {
-    cl_mem streams;
-    cl_program program[2];
-    cl_kernel kernel[2];
+    clMemWrapper stream;
+    clProgramWrapper program[2];
+    clKernelWrapper kernel[2];
 
-    int *output_ptr;
+    cl_int output_ptr[2];
     size_t globalsize[2];
     size_t localsize[2];
     int err;
@@ -97,34 +98,33 @@
         }
     }
 
-    output_ptr   = (int*)malloc(2 * sizeof(int));
-
-    streams =
-        clCreateBuffer(context, CL_MEM_READ_WRITE, 2 * sizeof(int), NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
+    stream = clCreateBuffer(context, CL_MEM_READ_WRITE, 2 * sizeof(cl_int),
+                            nullptr, &err);
+    test_error(err, "clCreateBuffer failed.");
 
     std::string cl_std = "-cl-std=CL";
     cl_std += (get_device_cl_version(device) == Version(3, 0)) ? "3.0" : "2.0";
     err = create_single_kernel_helper_with_build_options(
         context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code,
         "test_enqueued_local_size_1d", cl_std.c_str());
-    test_error( err, "create_single_kernel_helper failed");
+    test_error(err, "create_single_kernel_helper failed");
     err = create_single_kernel_helper_with_build_options(
         context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code,
         "test_enqueued_local_size_2d", cl_std.c_str());
-    test_error( err, "create_single_kernel_helper failed");
+    test_error(err, "create_single_kernel_helper failed");
 
-    err  = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);
-    test_error( err, "clSetKernelArgs failed.");
-    err  = clSetKernelArg(kernel[1], 0, sizeof streams, &streams);
-    test_error( err, "clSetKernelArgs failed.");
+    err = clSetKernelArg(kernel[0], 0, sizeof stream, &stream);
+    test_error(err, "clSetKernelArgs failed.");
+    err = clSetKernelArg(kernel[1], 0, sizeof stream, &stream);
+    test_error(err, "clSetKernelArgs failed.");
 
-    globalsize[0] = (size_t)num_elements;
-    globalsize[1] = (size_t)num_elements;
+    globalsize[0] = static_cast<size_t>(num_elements);
+    globalsize[1] = static_cast<size_t>(num_elements);
 
     size_t max_wgs;
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_wgs), &max_wgs, NULL);
-    test_error( err, "clGetDeviceInfo failed.");
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+                          sizeof(max_wgs), &max_wgs, nullptr);
+    test_error(err, "clGetDeviceInfo failed.");
 
     localsize[0] = std::min<size_t>(16, max_wgs);
     localsize[1] = std::min<size_t>(11, max_wgs / localsize[0]);
@@ -143,35 +143,31 @@
         }
     }
 
-    err = clEnqueueNDRangeKernel(queue, kernel[1], 2, NULL, globalsize, localsize, 0, NULL, NULL);
-    test_error( err, "clEnqueueNDRangeKernel failed.");
+    err = clEnqueueNDRangeKernel(queue, kernel[1], 2, nullptr, globalsize,
+                                 localsize, 0, nullptr, nullptr);
+    test_error(err, "clEnqueueNDRangeKernel failed.");
 
-    err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL);
-    test_error( err, "clEnqueueReadBuffer failed.");
+    err = clEnqueueReadBuffer(queue, stream, CL_BLOCKING, 0, 2 * sizeof(int),
+                              output_ptr, 0, nullptr, nullptr);
+    test_error(err, "clEnqueueReadBuffer failed.");
 
     err = verify_enqueued_local_size(output_ptr, localsize, 2);
 
-    globalsize[0] = (size_t)num_elements;
+    globalsize[0] = static_cast<size_t>(num_elements);
     localsize[0] = 9;
     if (use_uniform_work_groups && (globalsize[0] % localsize[0]))
     {
         globalsize[0] += (localsize[0] - (globalsize[0] % localsize[0]));
     }
-    err = clEnqueueNDRangeKernel(queue, kernel[1], 1, NULL, globalsize, localsize, 0, NULL, NULL);
-    test_error( err, "clEnqueueNDRangeKernel failed.");
+    err = clEnqueueNDRangeKernel(queue, kernel[1], 1, nullptr, globalsize,
+                                 localsize, 0, nullptr, nullptr);
+    test_error(err, "clEnqueueNDRangeKernel failed.");
 
-    err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL);
-    test_error( err, "clEnqueueReadBuffer failed.");
+    err = clEnqueueReadBuffer(queue, stream, CL_BLOCKING, 0, 2 * sizeof(int),
+                              output_ptr, 0, nullptr, nullptr);
+    test_error(err, "clEnqueueReadBuffer failed.");
 
     err = verify_enqueued_local_size(output_ptr, localsize, 1);
 
-    // cleanup
-    clReleaseMemObject(streams);
-    clReleaseKernel(kernel[0]);
-    clReleaseKernel(kernel[1]);
-    clReleaseProgram(program[0]);
-    clReleaseProgram(program[1]);
-    free(output_ptr);
-
     return err;
 }
diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp
index 9c872be..e202d27 100644
--- a/test_conformance/basic/test_progvar.cpp
+++ b/test_conformance/basic/test_progvar.cpp
@@ -15,12 +15,13 @@
 //
 #include "harness/compat.h"
 
-// Bug: Missing in spec: atomic_intptr_t is always supported if device is 32-bits.
+// Bug: Missing in spec: atomic_intptr_t is always supported if device is
+// 32-bits.
 // Bug: Missing in spec: CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE
 
 #define FLUSH fflush(stdout)
 
-#define MAX_STR 16*1024
+#define MAX_STR 16 * 1024
 
 #define ALIGNMENT 128
 
@@ -66,7 +67,11 @@
 static size_t l_max_global_id0 = 0;
 static cl_bool l_linker_available = false;
 
-#define check_error(errCode,msg,...) ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n", ## __VA_ARGS__, __FILE__, __LINE__), 1) : 0)
+#define check_error(errCode, msg, ...)                                         \
+    ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n",         \
+                                          ##__VA_ARGS__, __FILE__, __LINE__),  \
+                                1)                                             \
+                             : 0)
 
 ////////////////////
 // Info about types we can use for program scope variables.
@@ -75,110 +80,135 @@
 class TypeInfo {
 
 public:
-    TypeInfo() :
-        name(""),
-        m_buf_elem_type(""),
-        m_is_vecbase(false),
-        m_is_atomic(false),
-        m_is_like_size_t(false),
-        m_is_bool(false),
-        m_elem_type(0), m_num_elem(0),
-        m_size(0),
-        m_value_size(0)
-        {}
-    TypeInfo(const char* name_arg) :
-        name(name_arg),
-        m_buf_elem_type(name_arg),
-        m_is_vecbase(false),
-        m_is_atomic(false),
-        m_is_like_size_t(false),
-        m_is_bool(false),
-        m_elem_type(0), m_num_elem(0),
-        m_size(0),
-        m_value_size(0)
-        { }
+    TypeInfo()
+        : name(""), m_buf_elem_type(""), m_is_vecbase(false),
+          m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false),
+          m_elem_type(0), m_num_elem(0), m_size(0), m_value_size(0)
+    {}
+    TypeInfo(const char* name_arg)
+        : name(name_arg), m_buf_elem_type(name_arg), m_is_vecbase(false),
+          m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false),
+          m_elem_type(0), m_num_elem(0), m_size(0), m_value_size(0)
+    {}
 
     // Vectors
-    TypeInfo( TypeInfo* elem_type, int num_elem ) :
-        m_is_vecbase(false),
-        m_is_atomic(false),
-        m_is_like_size_t(false),
-        m_is_bool(false),
-        m_elem_type(elem_type),
-        m_num_elem(num_elem)
+    TypeInfo(TypeInfo* elem_type, int num_elem)
+        : m_is_vecbase(false), m_is_atomic(false), m_is_like_size_t(false),
+          m_is_bool(false), m_elem_type(elem_type), m_num_elem(num_elem)
     {
-        char the_name[10]; // long enough for longest vector type name "double16"
-        snprintf(the_name,sizeof(the_name),"%s%d",elem_type->get_name_c_str(),m_num_elem);
+        char
+            the_name[10]; // long enough for longest vector type name "double16"
+        snprintf(the_name, sizeof(the_name), "%s%d",
+                 elem_type->get_name_c_str(), m_num_elem);
         this->name = std::string(the_name);
         this->m_buf_elem_type = std::string(the_name);
         this->m_value_size = num_elem * elem_type->get_size();
-        if ( m_num_elem == 3 ) {
+        if (m_num_elem == 3)
+        {
             this->m_size = 4 * elem_type->get_size();
-        } else {
+        }
+        else
+        {
             this->m_size = num_elem * elem_type->get_size();
         }
     }
     const std::string& get_name(void) const { return name; }
     const char* get_name_c_str(void) const { return name.c_str(); }
-    TypeInfo& set_vecbase(void) { this->m_is_vecbase = true; return *this; }
-    TypeInfo& set_atomic(void) { this->m_is_atomic = true; return *this; }
-    TypeInfo& set_like_size_t(void) {
+    TypeInfo& set_vecbase(void)
+    {
+        this->m_is_vecbase = true;
+        return *this;
+    }
+    TypeInfo& set_atomic(void)
+    {
+        this->m_is_atomic = true;
+        return *this;
+    }
+    TypeInfo& set_like_size_t(void)
+    {
         this->m_is_like_size_t = true;
-        this->set_size( l_64bit_device ? 8 : 4 );
+        this->set_size(l_64bit_device ? 8 : 4);
         this->m_buf_elem_type = l_64bit_device ? "ulong" : "uint";
         return *this;
     }
-    TypeInfo& set_bool(void) { this->m_is_bool = true; return *this; }
-    TypeInfo& set_size(size_t n) { this->m_value_size = this->m_size = n; return *this; }
-    TypeInfo& set_buf_elem_type( const char* name ) { this->m_buf_elem_type = std::string(name); return *this; }
+    TypeInfo& set_bool(void)
+    {
+        this->m_is_bool = true;
+        return *this;
+    }
+    TypeInfo& set_size(size_t n)
+    {
+        this->m_value_size = this->m_size = n;
+        return *this;
+    }
+    TypeInfo& set_buf_elem_type(const char* name)
+    {
+        this->m_buf_elem_type = std::string(name);
+        return *this;
+    }
 
     const TypeInfo* elem_type(void) const { return m_elem_type; }
     int num_elem(void) const { return m_num_elem; }
 
-    bool is_vecbase(void) const {return m_is_vecbase;}
-    bool is_atomic(void) const {return m_is_atomic;}
-    bool is_atomic_64bit(void) const {return m_is_atomic && m_size == 8;}
-    bool is_like_size_t(void) const {return m_is_like_size_t;}
-    bool is_bool(void) const {return m_is_bool;}
-    size_t get_size(void) const {return m_size;}
-    size_t get_value_size(void) const {return m_value_size;}
+    bool is_vecbase(void) const { return m_is_vecbase; }
+    bool is_atomic(void) const { return m_is_atomic; }
+    bool is_atomic_64bit(void) const { return m_is_atomic && m_size == 8; }
+    bool is_like_size_t(void) const { return m_is_like_size_t; }
+    bool is_bool(void) const { return m_is_bool; }
+    size_t get_size(void) const { return m_size; }
+    size_t get_value_size(void) const { return m_value_size; }
 
     // When passing values of this type to a kernel, what buffer type
     // should be used?
-    const char* get_buf_elem_type(void) const { return m_buf_elem_type.c_str(); }
+    const char* get_buf_elem_type(void) const
+    {
+        return m_buf_elem_type.c_str();
+    }
 
-    std::string as_string(const cl_uchar* value_ptr) const {
+    std::string as_string(const cl_uchar* value_ptr) const
+    {
         // This method would be shorter if I had a real handle to element
         // vector type.
-        if ( this->is_bool() ) {
-            std::string result( name );
+        if (this->is_bool())
+        {
+            std::string result(name);
             result += "<";
             result += (*value_ptr ? "true" : "false");
             result += ", ";
             char buf[10];
-            sprintf(buf,"%02x",*value_ptr);
+            sprintf(buf, "%02x", *value_ptr);
             result += buf;
             result += ">";
             return result;
-        } else if ( this->num_elem() ) {
-            std::string result( name );
+        }
+        else if (this->num_elem())
+        {
+            std::string result(name);
             result += "<";
-            for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) {
+            for (unsigned ielem = 0; ielem < this->num_elem(); ielem++)
+            {
                 char buf[MAX_STR];
-                if ( ielem ) result += ", ";
-                for ( unsigned ibyte = 0; ibyte < this->m_elem_type->get_size() ; ibyte++ ) {
-                    sprintf(buf + 2*ibyte,"%02x", value_ptr[ ielem * this->m_elem_type->get_size() + ibyte ] );
+                if (ielem) result += ", ";
+                for (unsigned ibyte = 0; ibyte < this->m_elem_type->get_size();
+                     ibyte++)
+                {
+                    sprintf(buf + 2 * ibyte, "%02x",
+                            value_ptr[ielem * this->m_elem_type->get_size()
+                                      + ibyte]);
                 }
                 result += buf;
             }
             result += ">";
             return result;
-        } else {
-            std::string result( name );
+        }
+        else
+        {
+            std::string result(name);
             result += "<";
             char buf[MAX_STR];
-            for ( unsigned ibyte = 0; ibyte < this->get_size() ; ibyte++ ) {
-                sprintf(buf + 2*ibyte,"%02x", value_ptr[ ibyte ] );
+            for (unsigned ibyte = 0; ibyte < this->get_size(); ibyte++)
+            {
+                sprintf(buf + 2 * ibyte, "%02x", value_ptr[ibyte]);
             }
             result += buf;
             result += ">";
@@ -189,51 +219,71 @@
     // Initialize the given buffer to a constant value initialized as if it
     // were from the INIT_VAR macro below.
     // Only needs to support values 0 and 1.
-    void init( cl_uchar* buf, cl_uchar val) const {
-        if ( this->num_elem() ) {
-            for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) {
+    void init(cl_uchar* buf, cl_uchar val) const
+    {
+        if (this->num_elem())
+        {
+            for (unsigned ielem = 0; ielem < this->num_elem(); ielem++)
+            {
                 // Delegate!
-                this->init_elem( buf + ielem * this->get_value_size()/this->num_elem(), val );
+                this->init_elem(
+                    buf + ielem * this->get_value_size() / this->num_elem(),
+                    val);
             }
-        } else {
-            init_elem( buf, val );
+        }
+        else
+        {
+            init_elem(buf, val);
         }
     }
 
 private:
-    void init_elem( cl_uchar* buf, cl_uchar val ) const {
-        size_t elem_size = this->num_elem() ? this->get_value_size()/this->num_elem() : this->get_size();
-        memset(buf,0,elem_size);
-        if ( val ) {
-            if ( strstr( name.c_str(), "float" ) ) {
+    void init_elem(cl_uchar* buf, cl_uchar val) const
+    {
+        size_t elem_size = this->num_elem()
+            ? this->get_value_size() / this->num_elem()
+            : this->get_size();
+        memset(buf, 0, elem_size);
+        if (val)
+        {
+            if (strstr(name.c_str(), "float"))
+            {
                 *(float*)buf = (float)val;
                 return;
             }
-            if ( strstr( name.c_str(), "double" ) ) {
+            if (strstr(name.c_str(), "double"))
+            {
                 *(double*)buf = (double)val;
                 return;
             }
-            if ( this->is_bool() ) { *buf = (bool)val; return; }
+            if (this->is_bool())
+            {
+                *buf = (bool)val;
+                return;
+            }
 
             // Write a single character value to the correct spot,
             // depending on host endianness.
-            if ( l_host_is_big_endian ) *(buf + elem_size-1) = (cl_uchar)val;
-            else *buf = (cl_uchar)val;
+            if (l_host_is_big_endian)
+                *(buf + elem_size - 1) = (cl_uchar)val;
+            else
+                *buf = (cl_uchar)val;
         }
     }
-public:
 
-    void dump(FILE* fp) const {
-        fprintf(fp,"Type %s : <%d,%d,%s> ", name.c_str(),
-                (int)m_size,
-                (int)m_value_size,
-                m_buf_elem_type.c_str() );
-        if ( this->m_elem_type ) fprintf(fp, " vec(%s,%d)", this->m_elem_type->get_name_c_str(), this->num_elem() );
-        if ( this->m_is_vecbase ) fprintf(fp, " vecbase");
-        if ( this->m_is_bool ) fprintf(fp, " bool");
-        if ( this->m_is_like_size_t ) fprintf(fp, " like-size_t");
-        if ( this->m_is_atomic ) fprintf(fp, " atomic");
-        fprintf(fp,"\n");
+public:
+    void dump(FILE* fp) const
+    {
+        fprintf(fp, "Type %s : <%d,%d,%s> ", name.c_str(), (int)m_size,
+                (int)m_value_size, m_buf_elem_type.c_str());
+        if (this->m_elem_type)
+            fprintf(fp, " vec(%s,%d)", this->m_elem_type->get_name_c_str(),
+                    this->num_elem());
+        if (this->m_is_vecbase) fprintf(fp, " vecbase");
+        if (this->m_is_bool) fprintf(fp, " bool");
+        if (this->m_is_like_size_t) fprintf(fp, " like-size_t");
+        if (this->m_is_atomic) fprintf(fp, " atomic");
+        fprintf(fp, "\n");
         fflush(fp);
     }
 
@@ -246,7 +296,8 @@
     bool m_is_like_size_t;
     bool m_is_bool;
     size_t m_size; // Number of bytes of storage occupied by this type.
-    size_t m_value_size; // Number of bytes of value significant for this type. Differs for vec3.
+    size_t m_value_size; // Number of bytes of value significant for this type.
+                         // Differs for vec3.
 
     // When passing values of this type to a kernel, what buffer type
     // should be used?
@@ -256,46 +307,65 @@
 };
 
 
-#define NUM_SCALAR_TYPES (8+2) // signed and unsigned integral types, float and double
-#define NUM_VECTOR_SIZES (5)   // 2,3,4,8,16
-#define NUM_PLAIN_TYPES \
-      5 /*boolean and size_t family */  \
-    + NUM_SCALAR_TYPES \
-    + NUM_SCALAR_TYPES*NUM_VECTOR_SIZES \
-    + 10 /* atomic types */
+#define NUM_SCALAR_TYPES                                                       \
+    (8 + 2) // signed and unsigned integral types, float and double
+#define NUM_VECTOR_SIZES (5) // 2,3,4,8,16
+#define NUM_PLAIN_TYPES                                                        \
+    5 /*boolean and size_t family */                                           \
+        + NUM_SCALAR_TYPES + NUM_SCALAR_TYPES* NUM_VECTOR_SIZES                \
+        + 10 /* atomic types */
 
 // Need room for plain, array, pointer, struct
-#define MAX_TYPES (4*NUM_PLAIN_TYPES)
+#define MAX_TYPES (4 * NUM_PLAIN_TYPES)
 
 static TypeInfo type_info[MAX_TYPES];
 static int num_type_info = 0; // Number of valid entries in type_info[]
 
 
-
-
 // A helper class to form kernel source arguments for clCreateProgramWithSource.
 class StringTable {
 public:
-    StringTable() : m_c_strs(NULL), m_lengths(NULL), m_frozen(false), m_strings() {}
+    StringTable(): m_c_strs(NULL), m_lengths(NULL), m_frozen(false), m_strings()
+    {}
     ~StringTable() { release_frozen(); }
 
-    void add(std::string s) { release_frozen(); m_strings.push_back(s); }
+    void add(std::string s)
+    {
+        release_frozen();
+        m_strings.push_back(s);
+    }
 
-    const size_t num_str() { freeze(); return m_strings.size(); }
-    const char** strs() { freeze(); return m_c_strs; }
-    const size_t* lengths() { freeze(); return m_lengths; }
+    const size_t num_str()
+    {
+        freeze();
+        return m_strings.size();
+    }
+    const char** strs()
+    {
+        freeze();
+        return m_c_strs;
+    }
+    const size_t* lengths()
+    {
+        freeze();
+        return m_lengths;
+    }
 
 private:
-    void freeze(void) {
-        if ( !m_frozen ) {
+    void freeze(void)
+    {
+        if (!m_frozen)
+        {
             release_frozen();
 
-            m_c_strs = (const char**) malloc(sizeof(const char*) * m_strings.size());
-            m_lengths = (size_t*) malloc(sizeof(size_t) * m_strings.size());
-            assert( m_c_strs );
-            assert( m_lengths );
+            m_c_strs =
+                (const char**)malloc(sizeof(const char*) * m_strings.size());
+            m_lengths = (size_t*)malloc(sizeof(size_t) * m_strings.size());
+            assert(m_c_strs);
+            assert(m_lengths);
 
-            for ( size_t i = 0; i < m_strings.size() ; i++ ) {
+            for (size_t i = 0; i < m_strings.size(); i++)
+            {
                 m_c_strs[i] = m_strings[i].c_str();
                 m_lengths[i] = strlen(m_c_strs[i]);
             }
@@ -303,9 +373,18 @@
             m_frozen = true;
         }
     }
-    void release_frozen(void) {
-        if ( m_c_strs ) { free(m_c_strs); m_c_strs = 0; }
-        if ( m_lengths ) { free(m_lengths); m_lengths = 0; }
+    void release_frozen(void)
+    {
+        if (m_c_strs)
+        {
+            free(m_c_strs);
+            m_c_strs = 0;
+        }
+        if (m_lengths)
+        {
+            free(m_lengths);
+            m_lengths = 0;
+        }
         m_frozen = false;
     }
 
@@ -325,11 +404,15 @@
 static const char* l_get_cles_int64_pragma(void);
 static int l_build_type_table(cl_device_id device);
 
-static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret);
+static int l_get_device_info(cl_device_id device, size_t* max_size_ret,
+                             size_t* pref_size_ret);
 
-static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state );
-static int l_compare( const cl_uchar* expected, const cl_uchar* received, unsigned num_values, const TypeInfo&ti );
-static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti );
+static void l_set_randomly(cl_uchar* buf, size_t buf_size,
+                           RandomSeed& rand_state);
+static int l_compare(const cl_uchar* expected, const cl_uchar* received,
+                     unsigned num_values, const TypeInfo& ti);
+static int l_copy(cl_uchar* dest, unsigned dest_idx, const cl_uchar* src,
+                  unsigned src_idx, const TypeInfo& ti);
 
 static std::string conversion_functions(const TypeInfo& ti);
 static std::string global_decls(const TypeInfo& ti, bool with_init);
@@ -337,90 +420,123 @@
 static std::string writer_function(const TypeInfo& ti);
 static std::string reader_function(const TypeInfo& ti);
 
-static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue );
-static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state );
+static int l_write_read(cl_device_id device, cl_context context,
+                        cl_command_queue queue);
+static int l_write_read_for_type(cl_device_id device, cl_context context,
+                                 cl_command_queue queue, const TypeInfo& ti,
+                                 RandomSeed& rand_state);
 
-static int l_init_write_read( cl_device_id device, cl_context context, cl_command_queue queue );
-static int l_init_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state );
+static int l_init_write_read(cl_device_id device, cl_context context,
+                             cl_command_queue queue);
+static int l_init_write_read_for_type(cl_device_id device, cl_context context,
+                                      cl_command_queue queue,
+                                      const TypeInfo& ti,
+                                      RandomSeed& rand_state);
 
-static int l_capacity( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size );
-static int l_user_type( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size, bool separate_compilation );
-
+static int l_capacity(cl_device_id device, cl_context context,
+                      cl_command_queue queue, size_t max_size);
+static int l_user_type(cl_device_id device, cl_context context,
+                       cl_command_queue queue, size_t max_size,
+                       bool separate_compilation);
 
 
 ////////////////////
 // File scope function definitions
 
-static cl_int print_build_log(cl_program program, cl_uint num_devices, cl_device_id *device_list, cl_uint count, const char **strings, const size_t *lengths, const char* options)
+static cl_int print_build_log(cl_program program, cl_uint num_devices,
+                              cl_device_id* device_list, cl_uint count,
+                              const char** strings, const size_t* lengths,
+                              const char* options)
 {
     cl_uint i;
     cl_int error;
     BufferOwningPtr<cl_device_id> devices;
 
-    if(num_devices == 0 || device_list == NULL)
+    if (num_devices == 0 || device_list == NULL)
     {
-        error = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(num_devices), &num_devices, NULL);
+        error = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES,
+                                 sizeof(num_devices), &num_devices, NULL);
         test_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
 
-        device_list = (cl_device_id*)malloc(sizeof(cl_device_id)*num_devices);
+        device_list = (cl_device_id*)malloc(sizeof(cl_device_id) * num_devices);
         devices.reset(device_list);
 
         memset(device_list, 0, sizeof(cl_device_id) * num_devices);
 
-        error = clGetProgramInfo(program, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * num_devices, device_list, NULL);
+        error = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
+                                 sizeof(cl_device_id) * num_devices,
+                                 device_list, NULL);
         test_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
     }
 
     cl_uint z;
     bool sourcePrinted = false;
 
-    for(z = 0; z < num_devices; z++)
+    for (z = 0; z < num_devices; z++)
     {
         char deviceName[4096] = "";
-        error = clGetDeviceInfo(device_list[z], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
-        check_error(error, "Device \"%d\" failed to return a name. clGetDeviceInfo CL_DEVICE_NAME failed", z);
+        error = clGetDeviceInfo(device_list[z], CL_DEVICE_NAME,
+                                sizeof(deviceName), deviceName, NULL);
+        check_error(error,
+                    "Device \"%d\" failed to return a name. clGetDeviceInfo "
+                    "CL_DEVICE_NAME failed",
+                    z);
 
         cl_build_status buildStatus;
-        error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
-        check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
+        error = clGetProgramBuildInfo(program, device_list[z],
+                                      CL_PROGRAM_BUILD_STATUS,
+                                      sizeof(buildStatus), &buildStatus, NULL);
+        check_error(error,
+                    "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
 
-        if(buildStatus != CL_BUILD_SUCCESS)
+        if (buildStatus != CL_BUILD_SUCCESS)
         {
-            if(!sourcePrinted)
+            if (!sourcePrinted)
             {
                 log_error("Build options: %s\n", options);
-                if(count && strings)
+                if (count && strings)
                 {
                     log_error("Original source is: ------------\n");
-                    for(i = 0; i < count; i++) log_error("%s", strings[i]);
+                    for (i = 0; i < count; i++) log_error("%s", strings[i]);
                 }
                 sourcePrinted = true;
             }
 
             char statusString[64] = "";
             if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
-              sprintf(statusString, "CL_BUILD_SUCCESS");
+                sprintf(statusString, "CL_BUILD_SUCCESS");
             else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
-              sprintf(statusString, "CL_BUILD_NONE");
+                sprintf(statusString, "CL_BUILD_NONE");
             else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
-              sprintf(statusString, "CL_BUILD_ERROR");
+                sprintf(statusString, "CL_BUILD_ERROR");
             else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
-              sprintf(statusString, "CL_BUILD_IN_PROGRESS");
+                sprintf(statusString, "CL_BUILD_IN_PROGRESS");
             else
-              sprintf(statusString, "UNKNOWN (%d)", buildStatus);
+                sprintf(statusString, "UNKNOWN (%d)", buildStatus);
 
-            log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
+            log_error("Build not successful for device \"%s\", status: %s\n",
+                      deviceName, statusString);
 
             size_t paramSize = 0;
-            error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, 0, NULL, &paramSize);
-            if(check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed")) break;
+            error = clGetProgramBuildInfo(program, device_list[z],
+                                          CL_PROGRAM_BUILD_LOG, 0, NULL,
+                                          &paramSize);
+            if (check_error(
+                    error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed"))
+                break;
 
             std::string log;
-            log.resize(paramSize/sizeof(char));
+            log.resize(paramSize / sizeof(char));
 
-            error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, paramSize, &log[0], NULL);
-            if(check_error(error, "Device %d (%s) failed to return a build log", z, deviceName)) break;
-            if(log[0] == 0) log_error("clGetProgramBuildInfo returned an empty log.\n");
+            error = clGetProgramBuildInfo(program, device_list[z],
+                                          CL_PROGRAM_BUILD_LOG, paramSize,
+                                          &log[0], NULL);
+            if (check_error(error,
+                            "Device %d (%s) failed to return a build log", z,
+                            deviceName))
+                break;
+            if (log[0] == 0)
+                log_error("clGetProgramBuildInfo returned an empty log.\n");
             else
             {
                 log_error("Build log:\n", deviceName);
@@ -433,25 +549,29 @@
 
 static void l_load_abilities(cl_device_id device)
 {
-    l_has_half       = is_extension_available(device,"cl_khr_fp16");
-    l_has_double     = is_extension_available(device,"cl_khr_fp64");
-    l_has_cles_int64 = is_extension_available(device,"cles_khr_int64");
+    l_has_half = is_extension_available(device, "cl_khr_fp16");
+    l_has_double = is_extension_available(device, "cl_khr_fp64");
+    l_has_cles_int64 = is_extension_available(device, "cles_khr_int64");
 
-    l_has_int64_atomics
-    =  is_extension_available(device,"cl_khr_int64_base_atomics")
-    && is_extension_available(device,"cl_khr_int64_extended_atomics");
+    l_has_int64_atomics =
+        is_extension_available(device, "cl_khr_int64_base_atomics")
+        && is_extension_available(device, "cl_khr_int64_extended_atomics");
 
     {
         int status = CL_SUCCESS;
         cl_uint addr_bits = 32;
-        status = clGetDeviceInfo(device,CL_DEVICE_ADDRESS_BITS,sizeof(addr_bits),&addr_bits,0);
-        l_64bit_device = ( status == CL_SUCCESS && addr_bits == 64 );
+        status = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS,
+                                 sizeof(addr_bits), &addr_bits, 0);
+        l_64bit_device = (status == CL_SUCCESS && addr_bits == 64);
     }
 
     // 32-bit devices always have intptr atomics.
     l_has_intptr_atomics = !l_64bit_device || l_has_int64_atomics;
 
-    union { char c[4]; int i; } probe;
+    union {
+        char c[4];
+        int i;
+    } probe;
     probe.i = 1;
     l_host_is_big_endian = !probe.c[0];
 
@@ -459,33 +579,40 @@
     {
         int status = CL_SUCCESS;
         cl_uint max_dim = 0;
-        status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(max_dim),&max_dim,0);
-        assert( status == CL_SUCCESS );
-        assert( max_dim > 0 );
+        status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
+                                 sizeof(max_dim), &max_dim, 0);
+        assert(status == CL_SUCCESS);
+        assert(max_dim > 0);
         size_t max_id[3];
         max_id[0] = 0;
-    status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_SIZES,max_dim*sizeof(size_t),&max_id[0],0);
-        assert( status == CL_SUCCESS );
+        status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+                                 max_dim * sizeof(size_t), &max_id[0], 0);
+        assert(status == CL_SUCCESS);
         l_max_global_id0 = max_id[0];
     }
 
     { // Is separate compilation supported?
         int status = CL_SUCCESS;
         l_linker_available = false;
-        status = clGetDeviceInfo(device,CL_DEVICE_LINKER_AVAILABLE,sizeof(l_linker_available),&l_linker_available,0);
-        assert( status == CL_SUCCESS );
+        status =
+            clGetDeviceInfo(device, CL_DEVICE_LINKER_AVAILABLE,
+                            sizeof(l_linker_available), &l_linker_available, 0);
+        assert(status == CL_SUCCESS);
     }
 }
 
 
 static const char* l_get_fp64_pragma(void)
 {
-    return l_has_double ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" : "";
+    return l_has_double ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+                        : "";
 }
 
 static const char* l_get_cles_int64_pragma(void)
 {
-    return l_has_cles_int64 ? "#pragma OPENCL EXTENSION cles_khr_int64 : enable\n" : "";
+    return l_has_cles_int64
+        ? "#pragma OPENCL EXTENSION cles_khr_int64 : enable\n"
+        : "";
 }
 
 static const char* l_get_int64_atomic_pragma(void)
@@ -500,89 +627,83 @@
     size_t iscalar = 0;
     size_t ivecsize = 0;
     int vecsizes[] = { 2, 3, 4, 8, 16 };
-    const char* vecbase[] = {
-        "uchar", "char",
-        "ushort", "short",
-        "uint", "int",
-        "ulong", "long",
-        "float",
-        "double"
-    };
-    int vecbase_size[] = {
-        1, 1,
-        2, 2,
-        4, 4,
-        8, 8,
-        4,
-        8
-    };
-    const char* like_size_t[] = {
-        "intptr_t",
-        "uintptr_t",
-        "size_t",
-        "ptrdiff_t"
-    };
+    const char* vecbase[] = { "uchar", "char",  "ushort", "short", "uint",
+                              "int",   "ulong", "long",   "float", "double" };
+    int vecbase_size[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
+    const char* like_size_t[] = { "intptr_t", "uintptr_t", "size_t",
+                                  "ptrdiff_t" };
     const char* atomics[] = {
-        "atomic_int", "atomic_uint",
-        "atomic_long", "atomic_ulong",
-        "atomic_float",
-        "atomic_double",
+        "atomic_int",   "atomic_uint",  "atomic_long",
+        "atomic_ulong", "atomic_float", "atomic_double",
     };
-    int atomics_size[] = {
-        4, 4,
-        8, 8,
-        4,
-        8
-    };
-    const char* intptr_atomics[] = {
-        "atomic_intptr_t",
-        "atomic_uintptr_t",
-        "atomic_size_t",
-        "atomic_ptrdiff_t"
-    };
+    int atomics_size[] = { 4, 4, 8, 8, 4, 8 };
+    const char* intptr_atomics[] = { "atomic_intptr_t", "atomic_uintptr_t",
+                                     "atomic_size_t", "atomic_ptrdiff_t" };
 
     l_load_abilities(device);
     num_type_info = 0;
 
     // Boolean.
-    type_info[ num_type_info++ ] = TypeInfo( "bool" ).set_bool().set_size(1).set_buf_elem_type("uchar");
+    type_info[num_type_info++] =
+        TypeInfo("bool").set_bool().set_size(1).set_buf_elem_type("uchar");
 
     // Vector types, and the related scalar element types.
-    for ( iscalar=0; iscalar < sizeof(vecbase)/sizeof(vecbase[0]) ; ++iscalar ) {
-        if ( !gHasLong && strstr(vecbase[iscalar],"long") ) continue;
-        if ( !l_has_double && strstr(vecbase[iscalar],"double") ) continue;
+    for (iscalar = 0; iscalar < sizeof(vecbase) / sizeof(vecbase[0]); ++iscalar)
+    {
+        if (!gHasLong && strstr(vecbase[iscalar], "long")) continue;
+        if (!l_has_double && strstr(vecbase[iscalar], "double")) continue;
 
         // Scalar
         TypeInfo* elem_type = type_info + num_type_info++;
-        *elem_type = TypeInfo( vecbase[iscalar] ).set_vecbase().set_size( vecbase_size[iscalar] );
+        *elem_type = TypeInfo(vecbase[iscalar])
+                         .set_vecbase()
+                         .set_size(vecbase_size[iscalar]);
 
         // Vector
-        for ( ivecsize=0; ivecsize < sizeof(vecsizes)/sizeof(vecsizes[0]) ; ivecsize++ ) {
-            type_info[ num_type_info++ ] = TypeInfo( elem_type, vecsizes[ivecsize] );
+        for (ivecsize = 0; ivecsize < sizeof(vecsizes) / sizeof(vecsizes[0]);
+             ivecsize++)
+        {
+            type_info[num_type_info++] =
+                TypeInfo(elem_type, vecsizes[ivecsize]);
         }
     }
 
     // Size_t-like types
-    for ( iscalar=0; iscalar < sizeof(like_size_t)/sizeof(like_size_t[0]) ; ++iscalar ) {
-        type_info[ num_type_info++ ] = TypeInfo( like_size_t[iscalar] ).set_like_size_t();
+    for (iscalar = 0; iscalar < sizeof(like_size_t) / sizeof(like_size_t[0]);
+         ++iscalar)
+    {
+        type_info[num_type_info++] =
+            TypeInfo(like_size_t[iscalar]).set_like_size_t();
     }
 
     // Atomic types.
-    for ( iscalar=0; iscalar < sizeof(atomics)/sizeof(atomics[0]) ; ++iscalar ) {
-        if ( !l_has_int64_atomics && strstr(atomics[iscalar],"long") ) continue;
-        if ( !(l_has_int64_atomics && l_has_double) && strstr(atomics[iscalar],"double") ) continue;
+    for (iscalar = 0; iscalar < sizeof(atomics) / sizeof(atomics[0]); ++iscalar)
+    {
+        if (!l_has_int64_atomics && strstr(atomics[iscalar], "long")) continue;
+        if (!(l_has_int64_atomics && l_has_double)
+            && strstr(atomics[iscalar], "double"))
+            continue;
 
         // The +7 is used to skip over the "atomic_" prefix.
         const char* buf_type = atomics[iscalar] + 7;
-        type_info[ num_type_info++ ] = TypeInfo( atomics[iscalar] ).set_atomic().set_size( atomics_size[iscalar] ).set_buf_elem_type( buf_type );
+        type_info[num_type_info++] = TypeInfo(atomics[iscalar])
+                                         .set_atomic()
+                                         .set_size(atomics_size[iscalar])
+                                         .set_buf_elem_type(buf_type);
     }
-    if ( l_has_intptr_atomics ) {
-        for ( iscalar=0; iscalar < sizeof(intptr_atomics)/sizeof(intptr_atomics[0]) ; ++iscalar ) {
-            type_info[ num_type_info++ ] = TypeInfo( intptr_atomics[iscalar] ).set_atomic().set_like_size_t();
+    if (l_has_intptr_atomics)
+    {
+        for (iscalar = 0;
+             iscalar < sizeof(intptr_atomics) / sizeof(intptr_atomics[0]);
+             ++iscalar)
+        {
+            type_info[num_type_info++] = TypeInfo(intptr_atomics[iscalar])
+                                             .set_atomic()
+                                             .set_like_size_t();
         }
     }
 
-    assert( num_type_info <= MAX_TYPES ); // or increase MAX_TYPES
+    assert(num_type_info <= MAX_TYPES); // or increase MAX_TYPES
 
 #if 0
     for ( size_t i = 0 ; i < num_type_info ; i++ ) {
@@ -594,7 +715,7 @@
     return status;
 }
 
-static const TypeInfo& l_find_type( const char* name )
+static const TypeInfo& l_find_type(const char* name)
 {
     auto itr =
         std::find_if(type_info, type_info + num_type_info,
@@ -604,36 +725,54 @@
 }
 
 
+// Populate return parameters for max program variable size, preferred program
+// variable size.
 
-// Populate return parameters for max program variable size, preferred program variable size.
-
-static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret)
+static int l_get_device_info(cl_device_id device, size_t* max_size_ret,
+                             size_t* pref_size_ret)
 {
     int err = CL_SUCCESS;
     size_t return_size = 0;
 
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, sizeof(*max_size_ret), max_size_ret, &return_size);
-    if ( err != CL_SUCCESS ) {
-        log_error("Error: Failed to get device info for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n");
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE,
+                          sizeof(*max_size_ret), max_size_ret, &return_size);
+    if (err != CL_SUCCESS)
+    {
+        log_error("Error: Failed to get device info for "
+                  "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n");
         return err;
     }
-    if ( return_size != sizeof(size_t) ) {
-        log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size );
+    if (return_size != sizeof(size_t))
+    {
+        log_error("Error: Invalid size %d returned for "
+                  "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n",
+                  (int)return_size);
         return 1;
     }
-    if ( return_size != sizeof(size_t) ) {
-        log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size );
+    if (return_size != sizeof(size_t))
+    {
+        log_error("Error: Invalid size %d returned for "
+                  "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n",
+                  (int)return_size);
         return 1;
     }
 
     return_size = 0;
-    err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, sizeof(*pref_size_ret), pref_size_ret, &return_size);
-    if ( err != CL_SUCCESS ) {
-        log_error("Error: Failed to get device info for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: %d\n",err);
+    err =
+        clGetDeviceInfo(device, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE,
+                        sizeof(*pref_size_ret), pref_size_ret, &return_size);
+    if (err != CL_SUCCESS)
+    {
+        log_error("Error: Failed to get device info for "
+                  "CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: %d\n",
+                  err);
         return err;
     }
-    if ( return_size != sizeof(size_t) ) {
-        log_error("Error: Invalid size %d returned for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE\n", (int)return_size );
+    if (return_size != sizeof(size_t))
+    {
+        log_error("Error: Invalid size %d returned for "
+                  "CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE\n",
+                  (int)return_size);
         return 1;
     }
 
@@ -641,11 +780,13 @@
 }
 
 
-static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state )
+static void l_set_randomly(cl_uchar* buf, size_t buf_size,
+                           RandomSeed& rand_state)
 {
-    assert( 0 == (buf_size % sizeof(cl_uint) ) );
-    for ( size_t i = 0; i < buf_size ; i += sizeof(cl_uint) ) {
-        *( (cl_uint*)(buf + i) ) = genrand_int32( rand_state );
+    assert(0 == (buf_size % sizeof(cl_uint)));
+    for (size_t i = 0; i < buf_size; i += sizeof(cl_uint))
+    {
+        *((cl_uint*)(buf + i)) = genrand_int32(rand_state);
     }
 #if 0
     for ( size_t i = 0; i < buf_size ; i++ ) {
@@ -657,20 +798,23 @@
 
 // Return num_value values of the given type.
 // Returns CL_SUCCESS if they compared as equal.
-static int l_compare( const char* test_name, const cl_uchar* expected, const cl_uchar* received, size_t num_values, const TypeInfo&ti )
+static int l_compare(const char* test_name, const cl_uchar* expected,
+                     const cl_uchar* received, size_t num_values,
+                     const TypeInfo& ti)
 {
     // Compare only the valid returned bytes.
-    for ( unsigned value_idx = 0; value_idx < num_values; value_idx++ ) {
+    for (unsigned value_idx = 0; value_idx < num_values; value_idx++)
+    {
         const cl_uchar* expv = expected + value_idx * ti.get_size();
         const cl_uchar* gotv = received + value_idx * ti.get_size();
-        if ( memcmp( expv, gotv, ti.get_value_size() ) ) {
-            std::string exp_str = ti.as_string( expv );
-            std::string got_str = ti.as_string( gotv );
-            log_error("Error: %s test for type %s, at index %d: Expected %s got %s\n",
-                    test_name,
-                    ti.get_name_c_str(), value_idx,
-                    exp_str.c_str(),
-                    got_str.c_str() );
+        if (memcmp(expv, gotv, ti.get_value_size()))
+        {
+            std::string exp_str = ti.as_string(expv);
+            std::string got_str = ti.as_string(gotv);
+            log_error(
+                "Error: %s test for type %s, at index %d: Expected %s got %s\n",
+                test_name, ti.get_name_c_str(), value_idx, exp_str.c_str(),
+                got_str.c_str());
             return 1;
         }
     }
@@ -678,11 +822,12 @@
 }
 
 // Copy a target value from src[idx] to dest[idx]
-static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti )
+static int l_copy(cl_uchar* dest, unsigned dest_idx, const cl_uchar* src,
+                  unsigned src_idx, const TypeInfo& ti)
 {
-    cl_uchar* raw_dest      = dest + dest_idx * ti.get_size();
-    const cl_uchar* raw_src =  src +  src_idx * ti.get_size();
-    memcpy( raw_dest, raw_src,  ti.get_value_size() );
+    cl_uchar* raw_dest = dest + dest_idx * ti.get_size();
+    const cl_uchar* raw_src = src + src_idx * ti.get_size();
+    memcpy(raw_dest, raw_src, ti.get_value_size());
 
     return 0;
 }
@@ -694,59 +839,70 @@
     static char buf[MAX_STR];
     int num_printed = 0;
     // The atomic types just use the base type.
-    if ( ti.is_atomic() || 0 == strcmp( ti.get_buf_elem_type(), ti.get_name_c_str() ) ) {
+    if (ti.is_atomic()
+        || 0 == strcmp(ti.get_buf_elem_type(), ti.get_name_c_str()))
+    {
         // The type is represented in a buffer by itself.
-        num_printed = snprintf(buf,MAX_STR,
-                "%s from_buf(%s a) { return a; }\n"
-                "%s to_buf(%s a) { return a; }\n",
-                ti.get_buf_elem_type(), ti.get_buf_elem_type(),
-                ti.get_buf_elem_type(), ti.get_buf_elem_type() );
-    } else {
+        num_printed = snprintf(buf, MAX_STR,
+                               "%s from_buf(%s a) { return a; }\n"
+                               "%s to_buf(%s a) { return a; }\n",
+                               ti.get_buf_elem_type(), ti.get_buf_elem_type(),
+                               ti.get_buf_elem_type(), ti.get_buf_elem_type());
+    }
+    else
+    {
         // Just use C-style cast.
-        num_printed = snprintf(buf,MAX_STR,
-                "%s from_buf(%s a) { return (%s)a; }\n"
-                "%s to_buf(%s a) { return (%s)a; }\n",
-                ti.get_name_c_str(), ti.get_buf_elem_type(), ti.get_name_c_str(),
-                ti.get_buf_elem_type(), ti.get_name_c_str(), ti.get_buf_elem_type() );
+        num_printed = snprintf(buf, MAX_STR,
+                               "%s from_buf(%s a) { return (%s)a; }\n"
+                               "%s to_buf(%s a) { return (%s)a; }\n",
+                               ti.get_name_c_str(), ti.get_buf_elem_type(),
+                               ti.get_name_c_str(), ti.get_buf_elem_type(),
+                               ti.get_name_c_str(), ti.get_buf_elem_type());
     }
     // Add initializations.
-    if ( ti.is_atomic() ) {
-        num_printed += snprintf( buf + num_printed, MAX_STR-num_printed,
-                "#define INIT_VAR(a) ATOMIC_VAR_INIT(a)\n" );
-    } else {
-        // This cast works even if the target type is a vector type.
-        num_printed += snprintf( buf + num_printed, MAX_STR-num_printed,
-                "#define INIT_VAR(a) ((%s)(a))\n", ti.get_name_c_str());
+    if (ti.is_atomic())
+    {
+        num_printed += snprintf(buf + num_printed, MAX_STR - num_printed,
+                                "#define INIT_VAR(a) ATOMIC_VAR_INIT(a)\n");
     }
-    assert( num_printed < MAX_STR ); // or increase MAX_STR
+    else
+    {
+        // This cast works even if the target type is a vector type.
+        num_printed +=
+            snprintf(buf + num_printed, MAX_STR - num_printed,
+                     "#define INIT_VAR(a) ((%s)(a))\n", ti.get_name_c_str());
+    }
+    assert(num_printed < MAX_STR); // or increase MAX_STR
     result = buf;
     return result;
 }
 
-static std::string global_decls(const TypeInfo& ti, bool with_init )
+static std::string global_decls(const TypeInfo& ti, bool with_init)
 {
     const char* tn = ti.get_name_c_str();
     const char* vol = (ti.is_atomic() ? " volatile " : " ");
     static char decls[MAX_STR];
     int num_printed = 0;
-    if ( with_init ) {
-        const char *decls_template_with_init =
+    if (with_init)
+    {
+        const char* decls_template_with_init =
             "%s %s var = INIT_VAR(0);\n"
             "global %s %s g_var = INIT_VAR(1);\n"
             "%s %s a_var[2] = { INIT_VAR(1), INIT_VAR(1) };\n"
             "volatile global %s %s* p_var = &a_var[1];\n\n";
-        num_printed = snprintf(decls,sizeof(decls),decls_template_with_init,
-                vol,tn,vol,tn,vol,tn,vol,tn);
-    } else {
-        const char *decls_template_no_init =
-            "%s %s var;\n"
-            "global %s %s g_var;\n"
-            "%s %s a_var[2];\n"
-            "global %s %s* p_var;\n\n";
-        num_printed = snprintf(decls,sizeof(decls),decls_template_no_init,
-             vol,tn,vol,tn,vol,tn,vol,tn);
+        num_printed = snprintf(decls, sizeof(decls), decls_template_with_init,
+                               vol, tn, vol, tn, vol, tn, vol, tn);
     }
-    assert( num_printed < sizeof(decls) );
+    else
+    {
+        const char* decls_template_no_init = "%s %s var;\n"
+                                             "global %s %s g_var;\n"
+                                             "%s %s a_var[2];\n"
+                                             "global %s %s* p_var;\n\n";
+        num_printed = snprintf(decls, sizeof(decls), decls_template_no_init,
+                               vol, tn, vol, tn, vol, tn, vol, tn);
+    }
+    assert(num_printed < sizeof(decls));
     return std::string(decls);
 }
 
@@ -761,18 +917,26 @@
 
     // all() should only be used on vector inputs. For scalar comparison, the
     // result of the equality operator can be used as a bool value.
-    const bool is_scalar = ti.num_elem() == 0; // 0 is used to represent scalar types, not 1.
+    const bool is_scalar =
+        ti.num_elem() == 0; // 0 is used to represent scalar types, not 1.
     const std::string is_equality_true = is_scalar ? "" : "all";
 
     std::string code = "kernel void global_check(global int* out) {\n";
     code += "  const " + type_name + " zero = ((" + type_name + ")0);\n";
     code += "  bool status = true;\n";
-    if (ti.is_atomic()) {
-        code += "  status &= " + is_equality_true + "(atomic_load(&var) == zero);\n";
-        code += "  status &= " + is_equality_true + "(atomic_load(&g_var) == zero);\n";
-        code += "  status &= " + is_equality_true + "(atomic_load(&a_var[0]) == zero);\n";
-        code += "  status &= " + is_equality_true + "(atomic_load(&a_var[1]) == zero);\n";
-    } else {
+    if (ti.is_atomic())
+    {
+        code += "  status &= " + is_equality_true
+            + "(atomic_load(&var) == zero);\n";
+        code += "  status &= " + is_equality_true
+            + "(atomic_load(&g_var) == zero);\n";
+        code += "  status &= " + is_equality_true
+            + "(atomic_load(&a_var[0]) == zero);\n";
+        code += "  status &= " + is_equality_true
+            + "(atomic_load(&a_var[1]) == zero);\n";
+    }
+    else
+    {
         code += "  status &= " + is_equality_true + "(var == zero);\n";
         code += "  status &= " + is_equality_true + "(g_var == zero);\n";
         code += "  status &= " + is_equality_true + "(a_var[0] == zero);\n";
@@ -792,7 +956,8 @@
 {
     static char writer_src[MAX_STR];
     int num_printed = 0;
-    if ( !ti.is_atomic() ) {
+    if (!ti.is_atomic())
+    {
         const char* writer_template_normal =
             "kernel void writer( global %s* src, uint idx ) {\n"
             "  var = from_buf(src[0]);\n"
@@ -801,8 +966,11 @@
             "  a_var[1] = from_buf(src[3]);\n"
             "  p_var = a_var + idx;\n"
             "}\n\n";
-        num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_normal,ti.get_buf_elem_type());
-    } else {
+        num_printed = snprintf(writer_src, sizeof(writer_src),
+                               writer_template_normal, ti.get_buf_elem_type());
+    }
+    else
+    {
         const char* writer_template_atomic =
             "kernel void writer( global %s* src, uint idx ) {\n"
             "  atomic_store( &var, from_buf(src[0]) );\n"
@@ -811,9 +979,10 @@
             "  atomic_store( &a_var[1], from_buf(src[3]) );\n"
             "  p_var = a_var + idx;\n"
             "}\n\n";
-        num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_atomic,ti.get_buf_elem_type());
+        num_printed = snprintf(writer_src, sizeof(writer_src),
+                               writer_template_atomic, ti.get_buf_elem_type());
     }
-    assert( num_printed < sizeof(writer_src) );
+    assert(num_printed < sizeof(writer_src));
     std::string result = writer_src;
     return result;
 }
@@ -826,7 +995,8 @@
 {
     static char reader_src[MAX_STR];
     int num_printed = 0;
-    if ( !ti.is_atomic() ) {
+    if (!ti.is_atomic())
+    {
         const char* reader_template_normal =
             "kernel void reader( global %s* dest, %s ptr_write_val ) {\n"
             "  *p_var = from_buf(ptr_write_val);\n"
@@ -835,8 +1005,12 @@
             "  dest[2] = to_buf(a_var[0]);\n"
             "  dest[3] = to_buf(a_var[1]);\n"
             "}\n\n";
-        num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_normal,ti.get_buf_elem_type(),ti.get_buf_elem_type());
-    } else {
+        num_printed =
+            snprintf(reader_src, sizeof(reader_src), reader_template_normal,
+                     ti.get_buf_elem_type(), ti.get_buf_elem_type());
+    }
+    else
+    {
         const char* reader_template_atomic =
             "kernel void reader( global %s* dest, %s ptr_write_val ) {\n"
             "  atomic_store( p_var, from_buf(ptr_write_val) );\n"
@@ -845,40 +1019,53 @@
             "  dest[2] = to_buf( atomic_load( &a_var[0] ) );\n"
             "  dest[3] = to_buf( atomic_load( &a_var[1] ) );\n"
             "}\n\n";
-        num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_atomic,ti.get_buf_elem_type(),ti.get_buf_elem_type());
+        num_printed =
+            snprintf(reader_src, sizeof(reader_src), reader_template_atomic,
+                     ti.get_buf_elem_type(), ti.get_buf_elem_type());
     }
-    assert( num_printed < sizeof(reader_src) );
+    assert(num_printed < sizeof(reader_src));
     std::string result = reader_src;
     return result;
 }
 
 // Check that all globals where appropriately default-initialized.
-static int check_global_initialization(cl_context context, cl_program program, cl_command_queue queue)
+static int check_global_initialization(cl_context context, cl_program program,
+                                       cl_command_queue queue)
 {
     int status = CL_SUCCESS;
 
     // Create a buffer on device to store a unique integer.
     cl_int is_init_valid = 0;
-    clMemWrapper buffer(clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(is_init_valid), &is_init_valid, &status));
+    clMemWrapper buffer(
+        clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+                       sizeof(is_init_valid), &is_init_valid, &status));
     test_error_ret(status, "Failed to allocate buffer", status);
 
     // Create, setup and invoke kernel.
-    clKernelWrapper global_check(clCreateKernel(program, "global_check", &status));
+    clKernelWrapper global_check(
+        clCreateKernel(program, "global_check", &status));
     test_error_ret(status, "Failed to create global_check kernel", status);
     status = clSetKernelArg(global_check, 0, sizeof(cl_mem), &buffer);
-    test_error_ret(status, "Failed to set up argument for the global_check kernel", status);
+    test_error_ret(status,
+                   "Failed to set up argument for the global_check kernel",
+                   status);
     const cl_uint work_dim = 1;
     const size_t global_work_offset[] = { 0 };
     const size_t global_work_size[] = { 1 };
-    status = clEnqueueNDRangeKernel(queue, global_check, work_dim, global_work_offset, global_work_size, nullptr, 0, nullptr, nullptr);
+    status = clEnqueueNDRangeKernel(queue, global_check, work_dim,
+                                    global_work_offset, global_work_size,
+                                    nullptr, 0, nullptr, nullptr);
     test_error_ret(status, "Failed to run global_check kernel", status);
     status = clFinish(queue);
     test_error_ret(status, "clFinish() failed", status);
 
     // Read back the memory buffer from the device.
-    status = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(is_init_valid), &is_init_valid, 0, nullptr, nullptr);
+    status =
+        clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(is_init_valid),
+                            &is_init_valid, 0, nullptr, nullptr);
     test_error_ret(status, "Failed to read buffer from device", status);
-    if (is_init_valid == 0) {
+    if (is_init_valid == 0)
+    {
         log_error("Unexpected default values were detected");
         return 1;
     }
@@ -887,58 +1074,75 @@
 }
 
 // Check write-then-read.
-static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue )
+static int l_write_read(cl_device_id device, cl_context context,
+                        cl_command_queue queue)
 {
     int status = CL_SUCCESS;
     int itype;
 
-    RandomSeed rand_state( gRandomSeed );
+    RandomSeed rand_state(gRandomSeed);
 
-    for ( itype = 0; itype < num_type_info ; itype++ ) {
-        status = status | l_write_read_for_type(device,context,queue,type_info[itype], rand_state );
+    for (itype = 0; itype < num_type_info; itype++)
+    {
+        status = status
+            | l_write_read_for_type(device, context, queue, type_info[itype],
+                                    rand_state);
         FLUSH;
     }
 
     return status;
 }
 
-static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state )
+static int l_write_read_for_type(cl_device_id device, cl_context context,
+                                 cl_command_queue queue, const TypeInfo& ti,
+                                 RandomSeed& rand_state)
 {
     int err = CL_SUCCESS;
-    std::string type_name( ti.get_name() );
+    std::string type_name(ti.get_name());
     const char* tn = type_name.c_str();
-    log_info("  %s ",tn);
+    log_info("  %s ", tn);
 
     StringTable ksrc;
-    ksrc.add( l_get_fp64_pragma() );
-    ksrc.add( l_get_cles_int64_pragma() );
-    if (ti.is_atomic_64bit())
-      ksrc.add( l_get_int64_atomic_pragma() );
-    ksrc.add( conversion_functions(ti) );
-    ksrc.add( global_decls(ti,false) );
-    ksrc.add( global_check_function(ti) );
-    ksrc.add( writer_function(ti) );
-    ksrc.add( reader_function(ti) );
+    ksrc.add(l_get_fp64_pragma());
+    ksrc.add(l_get_cles_int64_pragma());
+    if (ti.is_atomic_64bit()) ksrc.add(l_get_int64_atomic_pragma());
+    ksrc.add(conversion_functions(ti));
+    ksrc.add(global_decls(ti, false));
+    ksrc.add(global_check_function(ti));
+    ksrc.add(writer_function(ti));
+    ksrc.add(reader_function(ti));
 
     int status = CL_SUCCESS;
     clProgramWrapper program;
     clKernelWrapper writer;
 
-    status = create_single_kernel_helper_with_build_options(context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", OPTIONS);
-    test_error_ret(status,"Failed to create program for read-after-write test",status);
+    status = create_single_kernel_helper_with_build_options(
+        context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer",
+        OPTIONS);
+    test_error_ret(status, "Failed to create program for read-after-write test",
+                   status);
 
-    clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
-    test_error_ret(status,"Failed to create reader kernel for read-after-write test",status);
+    clKernelWrapper reader(clCreateKernel(program, "reader", &status));
+    test_error_ret(status,
+                   "Failed to create reader kernel for read-after-write test",
+                   status);
 
     // Check size query.
     size_t used_bytes = 0;
-    status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
-    test_error_ret(status,"Failed to query global variable total size",status);
-    size_t expected_used_bytes =
-        (NUM_TESTED_VALUES-1)*ti.get_size() // Two regular variables and an array of 2 elements.
-        + ( l_64bit_device ? 8 : 4 ); // The pointer
-    if ( used_bytes < expected_used_bytes ) {
-        log_error("Error program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_used_bytes, (unsigned long long)used_bytes );
+    status = clGetProgramBuildInfo(program, device,
+                                   CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+                                   sizeof(used_bytes), &used_bytes, 0);
+    test_error_ret(status, "Failed to query global variable total size",
+                   status);
+    size_t expected_used_bytes = (NUM_TESTED_VALUES - 1)
+            * ti.get_size() // Two regular variables and an array of 2 elements.
+        + (l_64bit_device ? 8 : 4); // The pointer
+    if (used_bytes < expected_used_bytes)
+    {
+        log_error("Error program query for global variable total size query "
+                  "failed: Expected at least %llu but got %llu\n",
+                  (unsigned long long)expected_used_bytes,
+                  (unsigned long long)used_bytes);
         err |= 1;
     }
 
@@ -951,90 +1155,131 @@
     cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
     cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
 
-    clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) );
-    test_error_ret(status,"Failed to allocate write buffer",status);
-    clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) );
-    test_error_ret(status,"Failed to allocate read buffer",status);
+    clMemWrapper write_mem(clCreateBuffer(
+        context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status));
+    test_error_ret(status, "Failed to allocate write buffer", status);
+    clMemWrapper read_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+                                         read_data_size, read_data, &status));
+    test_error_ret(status, "Failed to allocate read buffer", status);
 
-    status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status);
-    status = clSetKernelArg(reader,0,sizeof(cl_mem),&read_mem); test_error_ret(status,"set arg",status);
+    status = clSetKernelArg(writer, 0, sizeof(cl_mem), &write_mem);
+    test_error_ret(status, "set arg", status);
+    status = clSetKernelArg(reader, 0, sizeof(cl_mem), &read_mem);
+    test_error_ret(status, "set arg", status);
 
     // Boolean random data needs to be massaged a bit more.
-    const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES ) : NUM_ROUNDS;
+    const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES) : NUM_ROUNDS;
     unsigned bool_iter = 0;
 
-    for ( int iround = 0; iround < num_rounds ; iround++ ) {
-        for ( cl_uint iptr_idx = 0; iptr_idx < 2 ; iptr_idx++ ) { // Index into array, to write via pointer
+    for (int iround = 0; iround < num_rounds; iround++)
+    {
+        for (cl_uint iptr_idx = 0; iptr_idx < 2; iptr_idx++)
+        { // Index into array, to write via pointer
             // Generate new random data to push through.
-            // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that.
+            // Generate 5 * 128 bytes all the time, even though the test for
+            // many types use less than all that.
 
-            cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0);
+            cl_uchar* write_ptr = (cl_uchar*)clEnqueueMapBuffer(
+                queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0,
+                0, 0, 0);
 
-            if ( ti.is_bool() ) {
+            if (ti.is_bool())
+            {
                 // For boolean, random data cast to bool isn't very random.
                 // So use the bottom bit of bool_value_iter to get true
                 // diversity.
-                for ( unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES ; value_idx++ ) {
-                    write_data[value_idx] = (1<<value_idx) & bool_iter;
-                    //printf(" %s", (write_data[value_idx] ? "true" : "false" ));
+                for (unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES;
+                     value_idx++)
+                {
+                    write_data[value_idx] = (1 << value_idx) & bool_iter;
+                    // printf(" %s", (write_data[value_idx] ? "true" : "false"
+                    // ));
                 }
                 bool_iter++;
-            } else {
-                l_set_randomly( write_data, write_data_size, rand_state );
             }
-            status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status);
+            else
+            {
+                l_set_randomly(write_data, write_data_size, rand_state);
+            }
+            status = clSetKernelArg(writer, 1, sizeof(cl_uint), &iptr_idx);
+            test_error_ret(status, "set arg", status);
 
             // The value to write via the pointer should be taken from the
             // 5th typed slot of the write_data.
-            status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status);
+            status = clSetKernelArg(
+                reader, 1, ti.get_size(),
+                write_data + (NUM_TESTED_VALUES - 1) * ti.get_size());
+            test_error_ret(status, "set arg", status);
 
             // Determine the expected values.
             cl_uchar expected[read_data_size];
-            memset( expected, -1, sizeof(expected) );
-            l_copy( expected, 0, write_data, 0, ti );
-            l_copy( expected, 1, write_data, 1, ti );
-            l_copy( expected, 2, write_data, 2, ti );
-            l_copy( expected, 3, write_data, 3, ti );
-            // But we need to take into account the value from the pointer write.
-            // The 2 represents where the "a" array values begin in our read-back.
-            l_copy( expected, 2 + iptr_idx, write_data, 4, ti );
+            memset(expected, -1, sizeof(expected));
+            l_copy(expected, 0, write_data, 0, ti);
+            l_copy(expected, 1, write_data, 1, ti);
+            l_copy(expected, 2, write_data, 2, ti);
+            l_copy(expected, 3, write_data, 3, ti);
+            // But we need to take into account the value from the pointer
+            // write. The 2 represents where the "a" array values begin in our
+            // read-back.
+            l_copy(expected, 2 + iptr_idx, write_data, 4, ti);
 
             clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
 
-            if ( ti.is_bool() ) {
+            if (ti.is_bool())
+            {
                 // Collapse down to one bit.
-                for ( unsigned i = 0; i <  NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i];
+                for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
+                    expected[i] = (bool)expected[i];
             }
 
-            cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
+            cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
+                queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
+                0, 0);
             memset(read_data, -1, read_data_size);
             clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
 
             // Now run the kernel
             const size_t one = 1;
-            status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
-            status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
-            status = clFinish(queue); test_error_ret(status,"finish",status);
+            status =
+                clEnqueueNDRangeKernel(queue, writer, 1, 0, &one, 0, 0, 0, 0);
+            test_error_ret(status, "enqueue writer", status);
+            status =
+                clEnqueueNDRangeKernel(queue, reader, 1, 0, &one, 0, 0, 0, 0);
+            test_error_ret(status, "enqueue reader", status);
+            status = clFinish(queue);
+            test_error_ret(status, "finish", status);
 
-            read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
+            read_ptr = (cl_uchar*)clEnqueueMapBuffer(
+                queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
+                0, 0);
 
-            if ( ti.is_bool() ) {
+            if (ti.is_bool())
+            {
                 // Collapse down to one bit.
-                for ( unsigned i = 0; i <  NUM_TESTED_VALUES-1 ; i++ ) read_data[i] = (bool)read_data[i];
+                for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
+                    read_data[i] = (bool)read_data[i];
             }
 
             // Compare only the valid returned bytes.
-            int compare_result = l_compare( "read-after-write", expected, read_data, NUM_TESTED_VALUES-1, ti );
-            // log_info("Compared %d values each of size %llu. Result %d\n", NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(), compare_result );
+            int compare_result =
+                l_compare("read-after-write", expected, read_data,
+                          NUM_TESTED_VALUES - 1, ti);
+            // log_info("Compared %d values each of size %llu. Result %d\n",
+            // NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(),
+            // compare_result );
             err |= compare_result;
 
             clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
 
-            if ( err ) break;
+            if (err) break;
         }
     }
 
-    if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+    if (CL_SUCCESS == err)
+    {
+        log_info("OK\n");
+        FLUSH;
+    }
     align_free(write_data);
     align_free(read_data);
     return err;
@@ -1042,74 +1287,97 @@
 
 
 // Check initialization, then, read, then write, then read.
-static int l_init_write_read( cl_device_id device, cl_context context, cl_command_queue queue )
+static int l_init_write_read(cl_device_id device, cl_context context,
+                             cl_command_queue queue)
 {
     int status = CL_SUCCESS;
     int itype;
 
-    RandomSeed rand_state( gRandomSeed );
+    RandomSeed rand_state(gRandomSeed);
 
-    for ( itype = 0; itype < num_type_info ; itype++ ) {
-        status = status | l_init_write_read_for_type(device,context,queue,type_info[itype], rand_state );
+    for (itype = 0; itype < num_type_info; itype++)
+    {
+        status = status
+            | l_init_write_read_for_type(device, context, queue,
+                                         type_info[itype], rand_state);
     }
     return status;
 }
-static int l_init_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state )
+static int l_init_write_read_for_type(cl_device_id device, cl_context context,
+                                      cl_command_queue queue,
+                                      const TypeInfo& ti,
+                                      RandomSeed& rand_state)
 {
     int err = CL_SUCCESS;
-    std::string type_name( ti.get_name() );
+    std::string type_name(ti.get_name());
     const char* tn = type_name.c_str();
-    log_info("  %s ",tn);
+    log_info("  %s ", tn);
 
     StringTable ksrc;
-    ksrc.add( l_get_fp64_pragma() );
-    ksrc.add( l_get_cles_int64_pragma() );
-    if (ti.is_atomic_64bit())
-      ksrc.add( l_get_int64_atomic_pragma() );
-    ksrc.add( conversion_functions(ti) );
-    ksrc.add( global_decls(ti,true) );
-    ksrc.add( writer_function(ti) );
-    ksrc.add( reader_function(ti) );
+    ksrc.add(l_get_fp64_pragma());
+    ksrc.add(l_get_cles_int64_pragma());
+    if (ti.is_atomic_64bit()) ksrc.add(l_get_int64_atomic_pragma());
+    ksrc.add(conversion_functions(ti));
+    ksrc.add(global_decls(ti, true));
+    ksrc.add(writer_function(ti));
+    ksrc.add(reader_function(ti));
 
     int status = CL_SUCCESS;
     clProgramWrapper program;
     clKernelWrapper writer;
 
-    status = create_single_kernel_helper_with_build_options(context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", OPTIONS);
-    test_error_ret(status,"Failed to create program for init-read-after-write test",status);
+    status = create_single_kernel_helper_with_build_options(
+        context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer",
+        OPTIONS);
+    test_error_ret(status,
+                   "Failed to create program for init-read-after-write test",
+                   status);
 
-    clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
-    test_error_ret(status,"Failed to create reader kernel for init-read-after-write test",status);
+    clKernelWrapper reader(clCreateKernel(program, "reader", &status));
+    test_error_ret(
+        status, "Failed to create reader kernel for init-read-after-write test",
+        status);
 
     // Check size query.
     size_t used_bytes = 0;
-    status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
-    test_error_ret(status,"Failed to query global variable total size",status);
-    size_t expected_used_bytes =
-        (NUM_TESTED_VALUES-1)*ti.get_size() // Two regular variables and an array of 2 elements.
-        + ( l_64bit_device ? 8 : 4 ); // The pointer
-    if ( used_bytes < expected_used_bytes ) {
-        log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_used_bytes, (unsigned long long)used_bytes );
+    status = clGetProgramBuildInfo(program, device,
+                                   CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+                                   sizeof(used_bytes), &used_bytes, 0);
+    test_error_ret(status, "Failed to query global variable total size",
+                   status);
+    size_t expected_used_bytes = (NUM_TESTED_VALUES - 1)
+            * ti.get_size() // Two regular variables and an array of 2 elements.
+        + (l_64bit_device ? 8 : 4); // The pointer
+    if (used_bytes < expected_used_bytes)
+    {
+        log_error("Error: program query for global variable total size query "
+                  "failed: Expected at least %llu but got %llu\n",
+                  (unsigned long long)expected_used_bytes,
+                  (unsigned long long)used_bytes);
         err |= 1;
     }
 
     // We need to create 5 random values of the given type,
     // and read 4 of them back.
     const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16);
-    const size_t read_data_size = (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16);
+    const size_t read_data_size = (NUM_TESTED_VALUES - 1) * sizeof(cl_ulong16);
 
     cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
     cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
-    clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) );
-    test_error_ret(status,"Failed to allocate write buffer",status);
-    clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) );
-    test_error_ret(status,"Failed to allocate read buffer",status);
+    clMemWrapper write_mem(clCreateBuffer(
+        context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status));
+    test_error_ret(status, "Failed to allocate write buffer", status);
+    clMemWrapper read_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+                                         read_data_size, read_data, &status));
+    test_error_ret(status, "Failed to allocate read buffer", status);
 
-    status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status);
-    status = clSetKernelArg(reader,0,sizeof(cl_mem),&read_mem); test_error_ret(status,"set arg",status);
+    status = clSetKernelArg(writer, 0, sizeof(cl_mem), &write_mem);
+    test_error_ret(status, "set arg", status);
+    status = clSetKernelArg(reader, 0, sizeof(cl_mem), &read_mem);
+    test_error_ret(status, "set arg", status);
 
     // Boolean random data needs to be massaged a bit more.
-    const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES ) : NUM_ROUNDS;
+    const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES) : NUM_ROUNDS;
     unsigned bool_iter = 0;
 
     // We need to count iterations.  We do something *different on the
@@ -1117,107 +1385,152 @@
     // values.
     unsigned iteration = 0;
 
-    for ( int iround = 0; iround < num_rounds ; iround++ ) {
-        for ( cl_uint iptr_idx = 0; iptr_idx < 2 ; iptr_idx++ ) { // Index into array, to write via pointer
+    for (int iround = 0; iround < num_rounds; iround++)
+    {
+        for (cl_uint iptr_idx = 0; iptr_idx < 2; iptr_idx++)
+        { // Index into array, to write via pointer
             // Generate new random data to push through.
-            // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that.
+            // Generate 5 * 128 bytes all the time, even though the test for
+            // many types use less than all that.
 
-            cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0);
+            cl_uchar* write_ptr = (cl_uchar*)clEnqueueMapBuffer(
+                queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0,
+                0, 0, 0);
 
-            if ( ti.is_bool() ) {
+            if (ti.is_bool())
+            {
                 // For boolean, random data cast to bool isn't very random.
                 // So use the bottom bit of bool_value_iter to get true
                 // diversity.
-                for ( unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES ; value_idx++ ) {
-                    write_data[value_idx] = (1<<value_idx) & bool_iter;
-                    //printf(" %s", (write_data[value_idx] ? "true" : "false" ));
+                for (unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES;
+                     value_idx++)
+                {
+                    write_data[value_idx] = (1 << value_idx) & bool_iter;
+                    // printf(" %s", (write_data[value_idx] ? "true" : "false"
+                    // ));
                 }
                 bool_iter++;
-            } else {
-                l_set_randomly( write_data, write_data_size, rand_state );
             }
-            status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status);
+            else
+            {
+                l_set_randomly(write_data, write_data_size, rand_state);
+            }
+            status = clSetKernelArg(writer, 1, sizeof(cl_uint), &iptr_idx);
+            test_error_ret(status, "set arg", status);
 
-            if ( !iteration ) {
+            if (!iteration)
+            {
                 // On first iteration, the value we write via the last arg
                 // to the "reader" function is 0.
                 // It's way easier to code the test this way.
-                ti.init( write_data + (NUM_TESTED_VALUES-1)*ti.get_size(), 0 );
+                ti.init(write_data + (NUM_TESTED_VALUES - 1) * ti.get_size(),
+                        0);
             }
 
             // The value to write via the pointer should be taken from the
             // 5th typed slot of the write_data.
-            status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status);
+            status = clSetKernelArg(
+                reader, 1, ti.get_size(),
+                write_data + (NUM_TESTED_VALUES - 1) * ti.get_size());
+            test_error_ret(status, "set arg", status);
 
             // Determine the expected values.
             cl_uchar expected[read_data_size];
-            memset( expected, -1, sizeof(expected) );
-            if ( iteration ) {
-                l_copy( expected, 0, write_data, 0, ti );
-                l_copy( expected, 1, write_data, 1, ti );
-                l_copy( expected, 2, write_data, 2, ti );
-                l_copy( expected, 3, write_data, 3, ti );
-                // But we need to take into account the value from the pointer write.
-                // The 2 represents where the "a" array values begin in our read-back.
-                // But we need to take into account the value from the pointer write.
-                l_copy( expected, 2 + iptr_idx, write_data, 4, ti );
-            } else {
+            memset(expected, -1, sizeof(expected));
+            if (iteration)
+            {
+                l_copy(expected, 0, write_data, 0, ti);
+                l_copy(expected, 1, write_data, 1, ti);
+                l_copy(expected, 2, write_data, 2, ti);
+                l_copy(expected, 3, write_data, 3, ti);
+                // But we need to take into account the value from the pointer
+                // write. The 2 represents where the "a" array values begin in
+                // our read-back. But we need to take into account the value
+                // from the pointer write.
+                l_copy(expected, 2 + iptr_idx, write_data, 4, ti);
+            }
+            else
+            {
                 // On first iteration, expect these initialized values!
                 // See the decls_template_with_init above.
-                ti.init( expected, 0 );
-                ti.init( expected + ti.get_size(), 1 );
-                ti.init( expected + 2*ti.get_size(), 1 );
+                ti.init(expected, 0);
+                ti.init(expected + ti.get_size(), 1);
+                ti.init(expected + 2 * ti.get_size(), 1);
                 // Emulate the effect of the write via the pointer.
                 // The value is 0, not 1 (see above).
                 // The pointer is always initialized to the second element
                 // of the array. So it goes into slot 3 of the "expected" array.
-                ti.init( expected + 3*ti.get_size(), 0 );
+                ti.init(expected + 3 * ti.get_size(), 0);
             }
 
-            if ( ti.is_bool() ) {
+            if (ti.is_bool())
+            {
                 // Collapse down to one bit.
-                for ( unsigned i = 0; i <  NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i];
+                for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
+                    expected[i] = (bool)expected[i];
             }
 
             clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
 
-            cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
-            memset( read_data, -1, read_data_size );
+            cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
+                queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
+                0, 0);
+            memset(read_data, -1, read_data_size);
             clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
 
             // Now run the kernel
             const size_t one = 1;
-            if ( iteration ) {
-                status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
-            } else {
+            if (iteration)
+            {
+                status = clEnqueueNDRangeKernel(queue, writer, 1, 0, &one, 0, 0,
+                                                0, 0);
+                test_error_ret(status, "enqueue writer", status);
+            }
+            else
+            {
                 // On first iteration, we should be picking up the
                 // initialized value. So don't enqueue the writer.
             }
-            status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
-            status = clFinish(queue); test_error_ret(status,"finish",status);
+            status =
+                clEnqueueNDRangeKernel(queue, reader, 1, 0, &one, 0, 0, 0, 0);
+            test_error_ret(status, "enqueue reader", status);
+            status = clFinish(queue);
+            test_error_ret(status, "finish", status);
 
-            read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
+            read_ptr = (cl_uchar*)clEnqueueMapBuffer(
+                queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
+                0, 0);
 
-            if ( ti.is_bool() ) {
+            if (ti.is_bool())
+            {
                 // Collapse down to one bit.
-                for ( unsigned i = 0; i <  NUM_TESTED_VALUES-1 ; i++ ) read_data[i] = (bool)read_data[i];
+                for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
+                    read_data[i] = (bool)read_data[i];
             }
 
             // Compare only the valid returned bytes.
-            //log_info(" Round %d ptr_idx %u\n", iround, iptr_idx );
-            int compare_result = l_compare( "init-write-read", expected, read_data, NUM_TESTED_VALUES-1, ti );
-            //log_info("Compared %d values each of size %llu. Result %d\n", NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(), compare_result );
+            // log_info(" Round %d ptr_idx %u\n", iround, iptr_idx );
+            int compare_result =
+                l_compare("init-write-read", expected, read_data,
+                          NUM_TESTED_VALUES - 1, ti);
+            // log_info("Compared %d values each of size %llu. Result %d\n",
+            // NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(),
+            // compare_result );
             err |= compare_result;
 
             clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
 
-            if ( err ) break;
+            if (err) break;
 
             iteration++;
         }
     }
 
-    if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+    if (CL_SUCCESS == err)
+    {
+        log_info("OK\n");
+        FLUSH;
+    }
     align_free(write_data);
     align_free(read_data);
 
@@ -1226,12 +1539,14 @@
 
 
 // Check that we can make at least one variable with size
-// max_size which is returned from the device info property : CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE.
-static int l_capacity( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size )
+// max_size which is returned from the device info property :
+// CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE.
+static int l_capacity(cl_device_id device, cl_context context,
+                      cl_command_queue queue, size_t max_size)
 {
     int err = CL_SUCCESS;
     // Just test one type.
-    const TypeInfo ti( l_find_type("uchar") );
+    const TypeInfo ti(l_find_type("uchar"));
     log_info(" l_capacity...");
 
     const char prog_src_template[] =
@@ -1254,84 +1569,132 @@
         "  dest[get_global_linear_id()] = var[get_global_id(0)];\n"
         "}\n\n";
     char prog_src[MAX_STR];
-    int num_printed = snprintf(prog_src,sizeof(prog_src),prog_src_template,max_size, max_size);
-    assert( num_printed < MAX_STR ); // or increase MAX_STR
+    int num_printed = snprintf(prog_src, sizeof(prog_src), prog_src_template,
+                               max_size, max_size);
+    assert(num_printed < MAX_STR); // or increase MAX_STR
     (void)num_printed;
 
     StringTable ksrc;
-    ksrc.add( prog_src );
+    ksrc.add(prog_src);
 
     int status = CL_SUCCESS;
     clProgramWrapper program;
     clKernelWrapper get_max_size;
 
-    status = create_single_kernel_helper_with_build_options(context, &program, &get_max_size, ksrc.num_str(), ksrc.strs(), "get_max_size", OPTIONS);
-    test_error_ret(status,"Failed to create program for capacity test",status);
+    status = create_single_kernel_helper_with_build_options(
+        context, &program, &get_max_size, ksrc.num_str(), ksrc.strs(),
+        "get_max_size", OPTIONS);
+    test_error_ret(status, "Failed to create program for capacity test",
+                   status);
 
     // Check size query.
     size_t used_bytes = 0;
-    status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
-    test_error_ret(status,"Failed to query global variable total size",status);
-    if ( used_bytes < max_size ) {
-        log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)max_size, (unsigned long long)used_bytes );
+    status = clGetProgramBuildInfo(program, device,
+                                   CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+                                   sizeof(used_bytes), &used_bytes, 0);
+    test_error_ret(status, "Failed to query global variable total size",
+                   status);
+    if (used_bytes < max_size)
+    {
+        log_error("Error: program query for global variable total size query "
+                  "failed: Expected at least %llu but got %llu\n",
+                  (unsigned long long)max_size, (unsigned long long)used_bytes);
         err |= 1;
     }
 
     // Prepare to execute
-    clKernelWrapper writer( clCreateKernel( program, "writer", &status ) );
-    test_error_ret(status,"Failed to create writer kernel for capacity test",status);
-    clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
-    test_error_ret(status,"Failed to create reader kernel for capacity test",status);
+    clKernelWrapper writer(clCreateKernel(program, "writer", &status));
+    test_error_ret(status, "Failed to create writer kernel for capacity test",
+                   status);
+    clKernelWrapper reader(clCreateKernel(program, "reader", &status));
+    test_error_ret(status, "Failed to create reader kernel for capacity test",
+                   status);
 
     cl_ulong max_size_ret = 0;
-    const size_t arr_size = 10*1024*1024;
-    cl_uchar* buffer = (cl_uchar*) align_malloc( arr_size, ALIGNMENT );
+    const size_t arr_size = 10 * 1024 * 1024;
+    cl_uchar* buffer = (cl_uchar*)align_malloc(arr_size, ALIGNMENT);
 
-    if ( !buffer ) { log_error("Failed to allocate buffer\n"); return 1; }
+    if (!buffer)
+    {
+        log_error("Failed to allocate buffer\n");
+        return 1;
+    }
 
-    clMemWrapper max_size_ret_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(max_size_ret), &max_size_ret, &status ) );
-    test_error_ret(status,"Failed to allocate size query buffer",status);
-    clMemWrapper buffer_mem( clCreateBuffer( context, CL_MEM_READ_WRITE, arr_size, 0, &status ) );
-    test_error_ret(status,"Failed to allocate write buffer",status);
+    clMemWrapper max_size_ret_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+                                                 sizeof(max_size_ret),
+                                                 &max_size_ret, &status));
+    test_error_ret(status, "Failed to allocate size query buffer", status);
+    clMemWrapper buffer_mem(
+        clCreateBuffer(context, CL_MEM_READ_WRITE, arr_size, 0, &status));
+    test_error_ret(status, "Failed to allocate write buffer", status);
 
-    status = clSetKernelArg(get_max_size,0,sizeof(cl_mem),&max_size_ret_mem); test_error_ret(status,"set arg",status);
-    status = clSetKernelArg(writer,0,sizeof(cl_mem),&buffer_mem); test_error_ret(status,"set arg",status);
-    status = clSetKernelArg(reader,0,sizeof(cl_mem),&buffer_mem); test_error_ret(status,"set arg",status);
+    status = clSetKernelArg(get_max_size, 0, sizeof(cl_mem), &max_size_ret_mem);
+    test_error_ret(status, "set arg", status);
+    status = clSetKernelArg(writer, 0, sizeof(cl_mem), &buffer_mem);
+    test_error_ret(status, "set arg", status);
+    status = clSetKernelArg(reader, 0, sizeof(cl_mem), &buffer_mem);
+    test_error_ret(status, "set arg", status);
 
     // Check the macro value of CL_DEVICE_MAX_GLOBAL_VARIABLE
     const size_t one = 1;
-    status = clEnqueueNDRangeKernel(queue,get_max_size,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue size query",status);
-    status = clFinish(queue); test_error_ret(status,"finish",status);
+    status =
+        clEnqueueNDRangeKernel(queue, get_max_size, 1, 0, &one, 0, 0, 0, 0);
+    test_error_ret(status, "enqueue size query", status);
+    status = clFinish(queue);
+    test_error_ret(status, "finish", status);
 
-    cl_uchar *max_size_ret_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, max_size_ret_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(max_size_ret), 0, 0, 0, 0);
-    if ( max_size_ret != max_size ) {
-        log_error("Error: preprocessor definition for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE is %llu and does not match device query value %llu\n",
-                (unsigned long long) max_size_ret,
-                (unsigned long long) max_size );
+    cl_uchar* max_size_ret_ptr = (cl_uchar*)clEnqueueMapBuffer(
+        queue, max_size_ret_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(max_size_ret),
+        0, 0, 0, 0);
+    if (max_size_ret != max_size)
+    {
+        log_error("Error: preprocessor definition for "
+                  "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE is %llu and does not "
+                  "match device query value %llu\n",
+                  (unsigned long long)max_size_ret,
+                  (unsigned long long)max_size);
         err |= 1;
     }
     clEnqueueUnmapMemObject(queue, max_size_ret_mem, max_size_ret_ptr, 0, 0, 0);
 
-    RandomSeed rand_state_write( gRandomSeed );
-    for ( size_t offset = 0; offset < max_size ; offset += arr_size ) {
-        size_t curr_size = (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
-        l_set_randomly( buffer, curr_size, rand_state_write );
-        status = clEnqueueWriteBuffer (queue, buffer_mem, CL_TRUE, 0, curr_size, buffer, 0, 0, 0);test_error_ret(status,"populate buffer_mem object",status);
-        status = clEnqueueNDRangeKernel(queue,writer,1,&offset,&curr_size,0,0,0,0); test_error_ret(status,"enqueue writer",status);
-    status = clFinish(queue); test_error_ret(status,"finish",status);
+    RandomSeed rand_state_write(gRandomSeed);
+    for (size_t offset = 0; offset < max_size; offset += arr_size)
+    {
+        size_t curr_size =
+            (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
+        l_set_randomly(buffer, curr_size, rand_state_write);
+        status = clEnqueueWriteBuffer(queue, buffer_mem, CL_TRUE, 0, curr_size,
+                                      buffer, 0, 0, 0);
+        test_error_ret(status, "populate buffer_mem object", status);
+        status = clEnqueueNDRangeKernel(queue, writer, 1, &offset, &curr_size,
+                                        0, 0, 0, 0);
+        test_error_ret(status, "enqueue writer", status);
+        status = clFinish(queue);
+        test_error_ret(status, "finish", status);
     }
 
-    RandomSeed rand_state_read( gRandomSeed );
-    for ( size_t offset = 0; offset < max_size ; offset += arr_size ) {
-        size_t curr_size = (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
-        status = clEnqueueNDRangeKernel(queue,reader,1,&offset,&curr_size,0,0,0,0); test_error_ret(status,"enqueue reader",status);
-        cl_uchar* read_mem_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, buffer_mem, CL_TRUE, CL_MAP_READ, 0, curr_size, 0, 0, 0, &status);test_error_ret(status,"map read data",status);
-        l_set_randomly( buffer, curr_size, rand_state_read );
-        err |= l_compare( "capacity", buffer, read_mem_ptr, curr_size, ti );
+    RandomSeed rand_state_read(gRandomSeed);
+    for (size_t offset = 0; offset < max_size; offset += arr_size)
+    {
+        size_t curr_size =
+            (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
+        status = clEnqueueNDRangeKernel(queue, reader, 1, &offset, &curr_size,
+                                        0, 0, 0, 0);
+        test_error_ret(status, "enqueue reader", status);
+        cl_uchar* read_mem_ptr = (cl_uchar*)clEnqueueMapBuffer(
+            queue, buffer_mem, CL_TRUE, CL_MAP_READ, 0, curr_size, 0, 0, 0,
+            &status);
+        test_error_ret(status, "map read data", status);
+        l_set_randomly(buffer, curr_size, rand_state_read);
+        err |= l_compare("capacity", buffer, read_mem_ptr, curr_size, ti);
         clEnqueueUnmapMemObject(queue, buffer_mem, read_mem_ptr, 0, 0, 0);
     }
 
-    if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+    if (CL_SUCCESS == err)
+    {
+        log_info("OK\n");
+        FLUSH;
+    }
     align_free(buffer);
 
     return err;
@@ -1339,32 +1702,33 @@
 
 
 // Check operation on a user type.
-static int l_user_type( cl_device_id device, cl_context context, cl_command_queue queue, bool separate_compile )
+static int l_user_type(cl_device_id device, cl_context context,
+                       cl_command_queue queue, bool separate_compile)
 {
     int err = CL_SUCCESS;
     // Just test one type.
-    const TypeInfo ti( l_find_type("uchar") );
-    log_info(" l_user_type %s...", separate_compile ? "separate compilation" : "single source compilation" );
+    const TypeInfo ti(l_find_type("uchar"));
+    log_info(" l_user_type %s...",
+             separate_compile ? "separate compilation"
+                              : "single source compilation");
 
-    if ( separate_compile && ! l_linker_available ) {
+    if (separate_compile && !l_linker_available)
+    {
         log_info("Separate compilation is not supported. Skipping test\n");
         return err;
     }
 
     const char type_src[] =
         "typedef struct { uchar c; uint i; } my_struct_t;\n\n";
-    const char def_src[] =
-        "my_struct_t var = { 'a', 42 };\n\n";
-    const char decl_src[] =
-        "extern my_struct_t var;\n\n";
+    const char def_src[] = "my_struct_t var = { 'a', 42 };\n\n";
+    const char decl_src[] = "extern my_struct_t var;\n\n";
 
     // Don't use a host struct. We can't guarantee that the host
     // compiler has the same structure layout as the device compiler.
-    const char writer_src[] =
-        "kernel void writer( uchar c, uint i ) {\n"
-        "  var.c = c;\n"
-        "  var.i = i;\n"
-        "}\n\n";
+    const char writer_src[] = "kernel void writer( uchar c, uint i ) {\n"
+                              "  var.c = c;\n"
+                              "  var.i = i;\n"
+                              "}\n\n";
     const char reader_src[] =
         "kernel void reader( global uchar* C, global uint* I ) {\n"
         "  *C = var.c;\n"
@@ -1373,36 +1737,53 @@
 
     clProgramWrapper program;
 
-    if ( separate_compile ) {
+    if (separate_compile)
+    {
         // Separate compilation flow.
         StringTable wksrc;
-        wksrc.add( type_src );
-        wksrc.add( def_src );
-        wksrc.add( writer_src );
+        wksrc.add(type_src);
+        wksrc.add(def_src);
+        wksrc.add(writer_src);
 
         StringTable rksrc;
-        rksrc.add( type_src );
-        rksrc.add( decl_src );
-        rksrc.add( reader_src );
+        rksrc.add(type_src);
+        rksrc.add(decl_src);
+        rksrc.add(reader_src);
 
         int status = CL_SUCCESS;
-        clProgramWrapper writer_program( clCreateProgramWithSource( context, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), &status ) );
-        test_error_ret(status,"Failed to create writer program for user type test",status);
+        clProgramWrapper writer_program(clCreateProgramWithSource(
+            context, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), &status));
+        test_error_ret(status,
+                       "Failed to create writer program for user type test",
+                       status);
 
-        status = clCompileProgram( writer_program, 1, &device, OPTIONS, 0, 0, 0, 0, 0 );
-        if(check_error(status, "Failed to compile writer program for user type test (%s)", IGetErrorString(status)))
+        status = clCompileProgram(writer_program, 1, &device, OPTIONS, 0, 0, 0,
+                                  0, 0);
+        if (check_error(
+                status,
+                "Failed to compile writer program for user type test (%s)",
+                IGetErrorString(status)))
         {
-            print_build_log(writer_program, 1, &device, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), OPTIONS);
+            print_build_log(writer_program, 1, &device, wksrc.num_str(),
+                            wksrc.strs(), wksrc.lengths(), OPTIONS);
             return status;
         }
 
-        clProgramWrapper reader_program( clCreateProgramWithSource( context, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), &status ) );
-        test_error_ret(status,"Failed to create reader program for user type test",status);
+        clProgramWrapper reader_program(clCreateProgramWithSource(
+            context, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), &status));
+        test_error_ret(status,
+                       "Failed to create reader program for user type test",
+                       status);
 
-        status = clCompileProgram( reader_program, 1, &device, OPTIONS, 0, 0, 0, 0, 0 );
-        if(check_error(status, "Failed to compile reader program for user type test (%s)", IGetErrorString(status)))
+        status = clCompileProgram(reader_program, 1, &device, OPTIONS, 0, 0, 0,
+                                  0, 0);
+        if (check_error(
+                status,
+                "Failed to compile reader program for user type test (%s)",
+                IGetErrorString(status)))
         {
-            print_build_log(reader_program, 1, &device, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), OPTIONS);
+            print_build_log(reader_program, 1, &device, rksrc.num_str(),
+                            rksrc.strs(), rksrc.lengths(), OPTIONS);
             return status;
         }
 
@@ -1410,33 +1791,45 @@
         progs[0] = writer_program;
         progs[1] = reader_program;
 
-        program = clLinkProgram( context, 1, &device, "", 2, progs, 0, 0, &status );
-        if(check_error(status, "Failed to link program for user type test (%s)", IGetErrorString(status)))
+        program =
+            clLinkProgram(context, 1, &device, "", 2, progs, 0, 0, &status);
+        if (check_error(status,
+                        "Failed to link program for user type test (%s)",
+                        IGetErrorString(status)))
         {
             print_build_log(program, 1, &device, 0, NULL, NULL, "");
             return status;
         }
-    } else {
+    }
+    else
+    {
         // Single compilation flow.
         StringTable ksrc;
-        ksrc.add( type_src );
-        ksrc.add( def_src );
-        ksrc.add( writer_src );
-        ksrc.add( reader_src );
+        ksrc.add(type_src);
+        ksrc.add(def_src);
+        ksrc.add(writer_src);
+        ksrc.add(reader_src);
 
         int status = CL_SUCCESS;
 
-        status = create_single_kernel_helper_create_program(context, &program, ksrc.num_str(), ksrc.strs(), OPTIONS);
-        if(check_error(status, "Failed to build program for user type test (%s)", IGetErrorString(status)))
+        status = create_single_kernel_helper_create_program(
+            context, &program, ksrc.num_str(), ksrc.strs(), OPTIONS);
+        if (check_error(status,
+                        "Failed to build program for user type test (%s)",
+                        IGetErrorString(status)))
         {
-            print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS);
+            print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(),
+                            ksrc.lengths(), OPTIONS);
             return status;
         }
 
         status = clBuildProgram(program, 1, &device, OPTIONS, 0, 0);
-        if(check_error(status, "Failed to compile program for user type test (%s)", IGetErrorString(status)))
+        if (check_error(status,
+                        "Failed to compile program for user type test (%s)",
+                        IGetErrorString(status)))
         {
-            print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS);
+            print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(),
+                            ksrc.lengths(), OPTIONS);
             return status;
         }
     }
@@ -1444,48 +1837,71 @@
 
     // Check size query.
     size_t used_bytes = 0;
-    int status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
-    test_error_ret(status,"Failed to query global variable total size",status);
+    int status = clGetProgramBuildInfo(
+        program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+        sizeof(used_bytes), &used_bytes, 0);
+    test_error_ret(status, "Failed to query global variable total size",
+                   status);
     size_t expected_size = sizeof(cl_uchar) + sizeof(cl_uint);
-    if ( used_bytes < expected_size ) {
-        log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_size, (unsigned long long)used_bytes );
+    if (used_bytes < expected_size)
+    {
+        log_error("Error: program query for global variable total size query "
+                  "failed: Expected at least %llu but got %llu\n",
+                  (unsigned long long)expected_size,
+                  (unsigned long long)used_bytes);
         err |= 1;
     }
 
     // Prepare to execute
-    clKernelWrapper writer( clCreateKernel( program, "writer", &status ) );
-    test_error_ret(status,"Failed to create writer kernel for user type test",status);
-    clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
-    test_error_ret(status,"Failed to create reader kernel for user type test",status);
+    clKernelWrapper writer(clCreateKernel(program, "writer", &status));
+    test_error_ret(status, "Failed to create writer kernel for user type test",
+                   status);
+    clKernelWrapper reader(clCreateKernel(program, "reader", &status));
+    test_error_ret(status, "Failed to create reader kernel for user type test",
+                   status);
 
     // Set up data.
     cl_uchar* uchar_data = (cl_uchar*)align_malloc(sizeof(cl_uchar), ALIGNMENT);
     cl_uint* uint_data = (cl_uint*)align_malloc(sizeof(cl_uint), ALIGNMENT);
 
-    clMemWrapper uchar_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar), uchar_data, &status ) );
-    test_error_ret(status,"Failed to allocate uchar buffer",status);
-    clMemWrapper uint_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uint), uint_data, &status ) );
-    test_error_ret(status,"Failed to allocate uint buffer",status);
+    clMemWrapper uchar_mem(clCreateBuffer(
+        context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar), uchar_data, &status));
+    test_error_ret(status, "Failed to allocate uchar buffer", status);
+    clMemWrapper uint_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+                                         sizeof(cl_uint), uint_data, &status));
+    test_error_ret(status, "Failed to allocate uint buffer", status);
 
-    status = clSetKernelArg(reader,0,sizeof(cl_mem),&uchar_mem); test_error_ret(status,"set arg",status);
-    status = clSetKernelArg(reader,1,sizeof(cl_mem),&uint_mem); test_error_ret(status,"set arg",status);
+    status = clSetKernelArg(reader, 0, sizeof(cl_mem), &uchar_mem);
+    test_error_ret(status, "set arg", status);
+    status = clSetKernelArg(reader, 1, sizeof(cl_mem), &uint_mem);
+    test_error_ret(status, "set arg", status);
 
     cl_uchar expected_uchar = 'a';
     cl_uint expected_uint = 42;
-    for ( unsigned iter = 0; iter < 5 ; iter++ ) { // Must go around at least twice
+    for (unsigned iter = 0; iter < 5; iter++)
+    { // Must go around at least twice
         // Read back data
         *uchar_data = -1;
         *uint_data = -1;
         const size_t one = 1;
-        status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
-        status = clFinish(queue); test_error_ret(status,"finish",status);
+        status = clEnqueueNDRangeKernel(queue, reader, 1, 0, &one, 0, 0, 0, 0);
+        test_error_ret(status, "enqueue reader", status);
+        status = clFinish(queue);
+        test_error_ret(status, "finish", status);
 
-        cl_uchar *uint_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uint), 0, 0, 0, 0);
-        cl_uchar *uchar_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uchar), 0, 0, 0, 0);
+        cl_uchar* uint_data_ptr =
+            (cl_uchar*)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ,
+                                          0, sizeof(cl_uint), 0, 0, 0, 0);
+        cl_uchar* uchar_data_ptr = (cl_uchar*)clEnqueueMapBuffer(
+            queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uchar), 0, 0,
+            0, 0);
 
-        if ( expected_uchar != *uchar_data || expected_uint != *uint_data ) {
-            log_error("FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n",
-                    iter, (int)*uchar_data, *uint_data, (int)expected_uchar, expected_uint );
+        if (expected_uchar != *uchar_data || expected_uint != *uint_data)
+        {
+            log_error(
+                "FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n",
+                iter, (int)*uchar_data, *uint_data, (int)expected_uchar,
+                expected_uint);
             err |= 1;
         }
 
@@ -1499,13 +1915,21 @@
         // Write the new values into persistent store.
         *uchar_data = expected_uchar;
         *uint_data = expected_uint;
-        status = clSetKernelArg(writer,0,sizeof(cl_uchar),uchar_data); test_error_ret(status,"set arg",status);
-        status = clSetKernelArg(writer,1,sizeof(cl_uint),uint_data); test_error_ret(status,"set arg",status);
-        status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
-        status = clFinish(queue); test_error_ret(status,"finish",status);
+        status = clSetKernelArg(writer, 0, sizeof(cl_uchar), uchar_data);
+        test_error_ret(status, "set arg", status);
+        status = clSetKernelArg(writer, 1, sizeof(cl_uint), uint_data);
+        test_error_ret(status, "set arg", status);
+        status = clEnqueueNDRangeKernel(queue, writer, 1, 0, &one, 0, 0, 0, 0);
+        test_error_ret(status, "enqueue writer", status);
+        status = clFinish(queue);
+        test_error_ret(status, "finish", status);
     }
 
-    if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+    if (CL_SUCCESS == err)
+    {
+        log_info("OK\n");
+        FLUSH;
+    }
     align_free(uchar_data);
     align_free(uint_data);
     return err;
@@ -1540,7 +1964,8 @@
 
 
 // Test support for variables at program scope. Miscellaneous
-int test_progvar_prog_scope_misc(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_progvar_prog_scope_misc(cl_device_id device, cl_context context,
+                                 cl_command_queue queue, int num_elements)
 {
     cl_bool skip{ CL_FALSE };
     auto error = should_skip(device, skip);
@@ -1559,19 +1984,20 @@
 
     cl_int err = CL_SUCCESS;
 
-    err = l_get_device_info( device, &max_size, &pref_size );
-    err |= l_build_type_table( device );
+    err = l_get_device_info(device, &max_size, &pref_size);
+    err |= l_build_type_table(device);
 
-    err |= l_capacity( device, context, queue, max_size );
-    err |= l_user_type( device, context, queue, false );
-    err |= l_user_type( device, context, queue, true );
+    err |= l_capacity(device, context, queue, max_size);
+    err |= l_user_type(device, context, queue, false);
+    err |= l_user_type(device, context, queue, true);
 
     return err;
 }
 
 
 // Test support for variables at program scope. Unitialized data
-int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int num_elements)
 {
     cl_bool skip{ CL_FALSE };
     auto error = should_skip(device, skip);
@@ -1591,16 +2017,17 @@
 
     cl_int err = CL_SUCCESS;
 
-    err = l_get_device_info( device, &max_size, &pref_size );
-    err |= l_build_type_table( device );
+    err = l_get_device_info(device, &max_size, &pref_size);
+    err |= l_build_type_table(device);
 
-    err |= l_write_read( device, context, queue );
+    err |= l_write_read(device, context, queue);
 
     return err;
 }
 
 // Test support for variables at program scope. Initialized data.
-int test_progvar_prog_scope_init(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_progvar_prog_scope_init(cl_device_id device, cl_context context,
+                                 cl_command_queue queue, int num_elements)
 {
     cl_bool skip{ CL_FALSE };
     auto error = should_skip(device, skip);
@@ -1619,17 +2046,18 @@
 
     cl_int err = CL_SUCCESS;
 
-    err = l_get_device_info( device, &max_size, &pref_size );
-    err |= l_build_type_table( device );
+    err = l_get_device_info(device, &max_size, &pref_size);
+    err |= l_build_type_table(device);
 
-    err |= l_init_write_read( device, context, queue );
+    err |= l_init_write_read(device, context, queue);
 
     return err;
 }
 
 
 // A simple test for support of static variables inside a kernel.
-int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_progvar_func_scope(cl_device_id device, cl_context context,
+                            cl_command_queue queue, int num_elements)
 {
     cl_bool skip{ CL_FALSE };
     auto error = should_skip(device, skip);
@@ -1649,48 +2077,64 @@
     // Deliberately have two variables with the same name but in different
     // scopes.
     // Also, use a large initialized structure in both cases.
+    // clang-format off
     const char prog_src[] =
         "typedef struct { char c; int16 i; } mystruct_t;\n"
-        "kernel void test_bump( global int* value, int which ) {\n"
-        "  if ( which ) {\n"
+        "kernel void test_bump(global int* value, int which) {\n"
+        "  if (which) {\n"
         // Explicit address space.
         // Last element set to 0
-        "     static global mystruct_t persistent = {'a',(int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0) };\n"
+        "     static global mystruct_t persistent = { 'a', (int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0) };\n"
         "     *value = persistent.i.sf++;\n"
         "  } else {\n"
         // Implicitly global
         // Last element set to 100
-        "     static mystruct_t persistent = {'b',(int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,100) };\n"
+        "     static mystruct_t persistent = { 'b' , (int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,100) };\n"
         "     *value = persistent.i.sf++;\n"
         "  }\n"
         "}\n";
+    // clang-format on
 
     StringTable ksrc;
-    ksrc.add( prog_src );
+    ksrc.add(prog_src);
 
     int status = CL_SUCCESS;
     clProgramWrapper program;
     clKernelWrapper test_bump;
 
-    status = create_single_kernel_helper_with_build_options(context, &program, &test_bump, ksrc.num_str(), ksrc.strs(), "test_bump", OPTIONS);
-    test_error_ret(status, "Failed to create program for function static variable test", status);
+    status = create_single_kernel_helper_with_build_options(
+        context, &program, &test_bump, ksrc.num_str(), ksrc.strs(), "test_bump",
+        OPTIONS);
+    test_error_ret(status,
+                   "Failed to create program for function static variable test",
+                   status);
 
     // Check size query.
     size_t used_bytes = 0;
-    status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
-    test_error_ret(status,"Failed to query global variable total size",status);
+    status = clGetProgramBuildInfo(program, device,
+                                   CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+                                   sizeof(used_bytes), &used_bytes, 0);
+    test_error_ret(status, "Failed to query global variable total size",
+                   status);
     size_t expected_size = 2 * sizeof(cl_int); // Two ints.
-    if ( used_bytes < expected_size ) {
-        log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_size, (unsigned long long)used_bytes );
+    if (used_bytes < expected_size)
+    {
+        log_error("Error: program query for global variable total size query "
+                  "failed: Expected at least %llu but got %llu\n",
+                  (unsigned long long)expected_size,
+                  (unsigned long long)used_bytes);
         err |= 1;
     }
 
     // Prepare the data.
     cl_int counter_value = 0;
-    clMemWrapper counter_value_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(counter_value), &counter_value, &status ) );
-    test_error_ret(status,"Failed to allocate counter query buffer",status);
+    clMemWrapper counter_value_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+                                                  sizeof(counter_value),
+                                                  &counter_value, &status));
+    test_error_ret(status, "Failed to allocate counter query buffer", status);
 
-    status = clSetKernelArg(test_bump,0,sizeof(cl_mem),&counter_value_mem); test_error_ret(status,"set arg",status);
+    status = clSetKernelArg(test_bump, 0, sizeof(cl_mem), &counter_value_mem);
+    test_error_ret(status, "set arg", status);
 
     // Go a few rounds, alternating between the two counters in the kernel.
 
@@ -1700,26 +2144,41 @@
     cl_int expected_counter[2] = { 100, 0 };
 
     const size_t one = 1;
-    for ( int iround = 0; iround < 5 ; iround++ ) { // Must go at least twice around
-        for ( int iwhich = 0; iwhich < 2 ; iwhich++ ) { // Cover both counters
-            status = clSetKernelArg(test_bump,1,sizeof(iwhich),&iwhich); test_error_ret(status,"set arg",status);
-            status = clEnqueueNDRangeKernel(queue,test_bump,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue test_bump",status);
-            status = clFinish(queue); test_error_ret(status,"finish",status);
+    for (int iround = 0; iround < 5; iround++)
+    { // Must go at least twice around
+        for (int iwhich = 0; iwhich < 2; iwhich++)
+        { // Cover both counters
+            status = clSetKernelArg(test_bump, 1, sizeof(iwhich), &iwhich);
+            test_error_ret(status, "set arg", status);
+            status = clEnqueueNDRangeKernel(queue, test_bump, 1, 0, &one, 0, 0,
+                                            0, 0);
+            test_error_ret(status, "enqueue test_bump", status);
+            status = clFinish(queue);
+            test_error_ret(status, "finish", status);
 
-            cl_uchar *counter_value_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, counter_value_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(counter_value), 0, 0, 0, 0);
+            cl_uchar* counter_value_ptr = (cl_uchar*)clEnqueueMapBuffer(
+                queue, counter_value_mem, CL_TRUE, CL_MAP_READ, 0,
+                sizeof(counter_value), 0, 0, 0, 0);
 
-            if ( counter_value != expected_counter[iwhich] ) {
-                log_error("Error: Round %d on counter %d: Expected %d but got %d\n",
-                        iround, iwhich, expected_counter[iwhich], counter_value );
+            if (counter_value != expected_counter[iwhich])
+            {
+                log_error(
+                    "Error: Round %d on counter %d: Expected %d but got %d\n",
+                    iround, iwhich, expected_counter[iwhich], counter_value);
                 err |= 1;
             }
             expected_counter[iwhich]++; // Emulate behaviour of the kernel.
 
-            clEnqueueUnmapMemObject(queue, counter_value_mem, counter_value_ptr, 0, 0, 0);
+            clEnqueueUnmapMemObject(queue, counter_value_mem, counter_value_ptr,
+                                    0, 0, 0);
         }
     }
 
-    if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+    if (CL_SUCCESS == err)
+    {
+        log_info("OK\n");
+        FLUSH;
+    }
 
     return err;
 }
diff --git a/test_conformance/basic/test_sizeof.cpp b/test_conformance/basic/test_sizeof.cpp
index 6b1ddb5..e980ed6 100644
--- a/test_conformance/basic/test_sizeof.cpp
+++ b/test_conformance/basic/test_sizeof.cpp
@@ -35,9 +35,9 @@
         "}\n"
     };
 
-    cl_program  p;
-    cl_kernel   k;
-    cl_mem      m;
+    clProgramWrapper p;
+    clKernelWrapper k;
+    clMemWrapper m;
     cl_uint        temp;
 
 
@@ -51,42 +51,19 @@
     }
     cl_int err = create_single_kernel_helper_with_build_options(
         context, &p, &k, 4, sizeof_kernel_code, "test_sizeof", nullptr);
-    if( err )
-        return err;
+    test_error(err, "Failed to build kernel/program.");
 
     m = clCreateBuffer( context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof( cl_ulong ), size, &err );
-    if( NULL == m )
-    {
-        clReleaseProgram( p );
-        clReleaseKernel( k );
-        log_error("\nclCreateBuffer FAILED\n");
-        return err;
-    }
+    test_error(err, "clCreateBuffer failed.");
 
     err = clSetKernelArg( k, 0, sizeof( cl_mem ), &m );
-    if( err )
-    {
-        clReleaseProgram( p );
-        clReleaseKernel( k );
-        clReleaseMemObject( m );
-        log_error("\nclSetKernelArg FAILED\n");
-        return err;
-    }
+    test_error(err, "clSetKernelArg failed.");
 
     err = clEnqueueTask( queue, k, 0, NULL, NULL );
-    clReleaseProgram( p );
-    clReleaseKernel( k );
-    if( err )
-    {
-        clReleaseMemObject( m );
-        log_error( "\nclEnqueueTask FAILED\n" );
-        return err;
-    }
+    test_error(err, "clEnqueueTask failed.");
 
     err = clEnqueueReadBuffer( queue, m, CL_TRUE, 0, sizeof( cl_uint ), &temp, 0, NULL, NULL );
-    clReleaseMemObject( m );
-    if( err )
-        log_error( "\nclEnqueueReadBuffer FAILED\n" );
+    test_error(err, "clEnqueueReadBuffer failed.");
 
     *size = (cl_ulong) temp;
 
diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index 94657d6..b95b0f5 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -79,11 +79,13 @@
     "cl_khr_spirv_linkonce_odr",
     "cl_khr_semaphore",
     "cl_khr_external_semaphore",
-    "cl_khr_external_semaphore_opaque_fd",
+    "cl_khr_external_semaphore_win32",
     "cl_khr_external_semaphore_sync_fd",
-    "cl_khr_command_buffer",
+    "cl_khr_external_semaphore_opaque_fd",
     "cl_khr_external_memory",
+    "cl_khr_external_memory_win32",
     "cl_khr_external_memory_opaque_fd",
+    "cl_khr_command_buffer",
     "cl_khr_command_buffer_mutable_dispatch",
 };
 
diff --git a/test_conformance/contractions/contractions.cpp b/test_conformance/contractions/contractions.cpp
index dddebb4..474fd36 100644
--- a/test_conformance/contractions/contractions.cpp
+++ b/test_conformance/contractions/contractions.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -434,7 +434,6 @@
             gArgCount++;
         }
     }
-    vlog( "\n\nTest binary built %s %s\n", __DATE__, __TIME__ );
 
     PrintArch();
 
diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp
index 788af99..2b18b92 100644
--- a/test_conformance/conversions/test_conversions.cpp
+++ b/test_conformance/conversions/test_conversions.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -343,7 +343,7 @@
 static int ParseArgs( int argc, const char **argv )
 {
     int i;
-    argList = (const char **)calloc( argc - 1, sizeof( char*) );
+    argList = (const char **)calloc(argc, sizeof(char *));
     argCount = 0;
 
     if( NULL == argList && argc > 1 )
@@ -484,8 +484,6 @@
 
     vlog( "\n" );
 
-    vlog( "Test binary built %s %s\n", __DATE__, __TIME__ );
-
     PrintArch();
 
     if( gWimpyMode )
diff --git a/test_conformance/half/main.cpp b/test_conformance/half/main.cpp
index 6600cc5..6bc7db9 100644
--- a/test_conformance/half/main.cpp
+++ b/test_conformance/half/main.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -131,8 +131,7 @@
 static int ParseArgs( int argc, const char **argv )
 {
     int i;
-    argList = (const char **)calloc( argc - 1, sizeof( char*) );
-
+    argList = (const char **)calloc(argc, sizeof(char *));
     if( NULL == argList )
     {
         vlog_error( "Failed to allocate memory for argList.\n" );
@@ -222,7 +221,6 @@
       gWimpyMode = 1;
     }
 
-    vlog( "Test binary built %s %s\n", __DATE__, __TIME__ );
     PrintArch();
     if( gWimpyMode )
     {
@@ -248,4 +246,3 @@
         vlog("\t\t%s\n", test_list[i].name );
     }
 }
-
diff --git a/test_conformance/images/clCopyImage/test_copy_1D.cpp b/test_conformance/images/clCopyImage/test_copy_1D.cpp
index 2c996c7..0f6f3ce 100644
--- a/test_conformance/images/clCopyImage/test_copy_1D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_1D.cpp
@@ -113,6 +113,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
         memSize = (cl_ulong)SIZE_MAX;
+        maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
index 0b61693..f0b610b 100644
--- a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
@@ -118,6 +118,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
         memSize = (cl_ulong)SIZE_MAX;
+        maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_2D.cpp b/test_conformance/images/clCopyImage/test_copy_2D.cpp
index 1a69a1f..448b47f 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D.cpp
@@ -125,6 +125,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
         memSize = (cl_ulong)SIZE_MAX;
+        maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
index eb6dd55..1819d87 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
@@ -224,6 +224,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
index 8a56c95..4ab6b42 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
@@ -230,6 +230,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
index 6327ba5..3376bf9 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
@@ -71,6 +71,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_3D.cpp b/test_conformance/images/clCopyImage/test_copy_3D.cpp
index da6731d..cdfdcce 100644
--- a/test_conformance/images/clCopyImage/test_copy_3D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_3D.cpp
@@ -57,6 +57,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
index c098f64..1da1e47 100644
--- a/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
@@ -251,6 +251,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_1D.cpp b/test_conformance/images/clFillImage/test_fill_1D.cpp
index c3f2318..b1550bf 100644
--- a/test_conformance/images/clFillImage/test_fill_1D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_1D.cpp
@@ -80,6 +80,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if ( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_1D_array.cpp b/test_conformance/images/clFillImage/test_fill_1D_array.cpp
index b4347a4..be32ec6 100644
--- a/test_conformance/images/clFillImage/test_fill_1D_array.cpp
+++ b/test_conformance/images/clFillImage/test_fill_1D_array.cpp
@@ -83,6 +83,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if ( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_2D.cpp b/test_conformance/images/clFillImage/test_fill_2D.cpp
index bb66fc2..e941abc 100644
--- a/test_conformance/images/clFillImage/test_fill_2D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_2D.cpp
@@ -83,6 +83,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if ( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_2D_array.cpp b/test_conformance/images/clFillImage/test_fill_2D_array.cpp
index 3265aab..38196cf 100644
--- a/test_conformance/images/clFillImage/test_fill_2D_array.cpp
+++ b/test_conformance/images/clFillImage/test_fill_2D_array.cpp
@@ -87,6 +87,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if ( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_3D.cpp b/test_conformance/images/clFillImage/test_fill_3D.cpp
index 9db0ac7..0b8e4e5 100644
--- a/test_conformance/images/clFillImage/test_fill_3D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_3D.cpp
@@ -87,6 +87,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if ( gTestSmallImages )
diff --git a/test_conformance/images/clGetInfo/test_1D.cpp b/test_conformance/images/clGetInfo/test_1D.cpp
index 0d704b8..7e04485 100644
--- a/test_conformance/images/clGetInfo/test_1D.cpp
+++ b/test_conformance/images/clGetInfo/test_1D.cpp
@@ -46,6 +46,7 @@
 
   if (memSize > (cl_ulong)SIZE_MAX) {
     memSize = (cl_ulong)SIZE_MAX;
+    maxAllocSize = (cl_ulong)SIZE_MAX;
   }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clGetInfo/test_1D_2D_array.cpp b/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
index 447fc7c..c35bf22 100644
--- a/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
+++ b/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
@@ -44,6 +44,7 @@
 
   if (memSize > (cl_ulong)SIZE_MAX) {
     memSize = (cl_ulong)SIZE_MAX;
+    maxAllocSize = (cl_ulong)SIZE_MAX;
   }
 
     if( gTestSmallImages )
@@ -168,6 +169,7 @@
 
   if (memSize > (cl_ulong)SIZE_MAX) {
     memSize = (cl_ulong)SIZE_MAX;
+    maxAllocSize = (cl_ulong)SIZE_MAX;
   }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clGetInfo/test_2D.cpp b/test_conformance/images/clGetInfo/test_2D.cpp
index 74a6012..764b186 100644
--- a/test_conformance/images/clGetInfo/test_2D.cpp
+++ b/test_conformance/images/clGetInfo/test_2D.cpp
@@ -285,6 +285,7 @@
 
   if (memSize > (cl_ulong)SIZE_MAX) {
     memSize = (cl_ulong)SIZE_MAX;
+    maxAllocSize = (cl_ulong)SIZE_MAX;
   }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clGetInfo/test_3D.cpp b/test_conformance/images/clGetInfo/test_3D.cpp
index af5062e..e126186 100644
--- a/test_conformance/images/clGetInfo/test_3D.cpp
+++ b/test_conformance/images/clGetInfo/test_3D.cpp
@@ -47,6 +47,7 @@
 
   if (memSize > (cl_ulong)SIZE_MAX) {
     memSize = (cl_ulong)SIZE_MAX;
+    maxAllocSize = (cl_ulong)SIZE_MAX;
   }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_1D.cpp b/test_conformance/images/clReadWriteImage/test_read_1D.cpp
index 42933c0..2d94dc8 100644
--- a/test_conformance/images/clReadWriteImage/test_read_1D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_1D.cpp
@@ -187,6 +187,7 @@
 
   if (memSize > (cl_ulong)SIZE_MAX) {
     memSize = (cl_ulong)SIZE_MAX;
+    maxAllocSize = (cl_ulong)SIZE_MAX;
   }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
index efd2a79..cc90204 100644
--- a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
@@ -191,6 +191,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
         memSize = (cl_ulong)SIZE_MAX;
+        maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_2D.cpp b/test_conformance/images/clReadWriteImage/test_read_2D.cpp
index b7f8553..b610287 100644
--- a/test_conformance/images/clReadWriteImage/test_read_2D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_2D.cpp
@@ -194,6 +194,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
         memSize = (cl_ulong)SIZE_MAX;
+        maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
index 5889ad6..401b0e4 100644
--- a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
@@ -169,6 +169,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
         memSize = (cl_ulong)SIZE_MAX;
+        maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_3D.cpp b/test_conformance/images/clReadWriteImage/test_read_3D.cpp
index 6f73f42..ced04ab 100644
--- a/test_conformance/images/clReadWriteImage/test_read_3D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_3D.cpp
@@ -174,6 +174,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
         memSize = (cl_ulong)SIZE_MAX;
+        maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/kernel_image_methods/test_1D.cpp b/test_conformance/images/kernel_image_methods/test_1D.cpp
index 0059d4c..934e78b 100644
--- a/test_conformance/images/kernel_image_methods/test_1D.cpp
+++ b/test_conformance/images/kernel_image_methods/test_1D.cpp
@@ -171,6 +171,7 @@
 
   if (memSize > (cl_ulong)SIZE_MAX) {
     memSize = (cl_ulong)SIZE_MAX;
+    maxAllocSize = (cl_ulong)SIZE_MAX;
   }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/kernel_image_methods/test_1D_array.cpp b/test_conformance/images/kernel_image_methods/test_1D_array.cpp
index 797161c..a824f08 100644
--- a/test_conformance/images/kernel_image_methods/test_1D_array.cpp
+++ b/test_conformance/images/kernel_image_methods/test_1D_array.cpp
@@ -181,6 +181,7 @@
 
   if (memSize > (cl_ulong)SIZE_MAX) {
     memSize = (cl_ulong)SIZE_MAX;
+    maxAllocSize = (cl_ulong)SIZE_MAX;
   }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/kernel_image_methods/test_2D.cpp b/test_conformance/images/kernel_image_methods/test_2D.cpp
index b0d4a70..07f8d92 100644
--- a/test_conformance/images/kernel_image_methods/test_2D.cpp
+++ b/test_conformance/images/kernel_image_methods/test_2D.cpp
@@ -232,6 +232,7 @@
 
   if (memSize > (cl_ulong)SIZE_MAX) {
     memSize = (cl_ulong)SIZE_MAX;
+    maxAllocSize = (cl_ulong)SIZE_MAX;
   }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/kernel_read_write/CMakeLists.txt b/test_conformance/images/kernel_read_write/CMakeLists.txt
index 6eb5dc7..ccd678c 100644
--- a/test_conformance/images/kernel_read_write/CMakeLists.txt
+++ b/test_conformance/images/kernel_read_write/CMakeLists.txt
@@ -21,13 +21,7 @@
 
 # Make unused variables not fatal in this module; see
 # https://github.com/KhronosGroup/OpenCL-CTS/issues/1484
-if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
-  SET_SOURCE_FILES_PROPERTIES(
-    ${${MODULE_NAME}_SOURCES}
-    PROPERTIES
-    COMPILE_FLAGS "-Wno-error=unused-variable"
-  )
-endif()
+set_gnulike_module_compile_flags("-Wno-error=unused-variable")
 
 include(../../CMakeCommon.txt)
 
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
index 1b3b04b..2ce33a1 100644
--- a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
@@ -73,6 +73,12 @@
         return TEST_SKIPPED_ITSELF;
     }
 
+    if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+    {
+        printf("Extension cl_ext_image_requirements_info not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
     std::vector<cl_mem_object_type> imageTypes{
         CL_MEM_OBJECT_IMAGE1D,       CL_MEM_OBJECT_IMAGE2D,
         CL_MEM_OBJECT_IMAGE3D,       CL_MEM_OBJECT_IMAGE1D_BUFFER,
diff --git a/test_conformance/images/kernel_read_write/test_common.cpp b/test_conformance/images/kernel_read_write/test_common.cpp
index 62bd4ab..a22db19 100644
--- a/test_conformance/images/kernel_read_write/test_common.cpp
+++ b/test_conformance/images/kernel_read_write/test_common.cpp
@@ -34,122 +34,210 @@
     return sampler;
 }
 
-void InitFloatCoordsCommon(image_descriptor *imageInfo,
-                           image_sampler_data *imageSampler, float *xOffsets,
-                           float *yOffsets, float *zOffsets, float xfract,
-                           float yfract, float zfract, int normalized_coords,
-                           MTdata d, int lod)
+bool get_image_dimensions(image_descriptor *imageInfo, size_t &width,
+                          size_t &height, size_t &depth)
+{
+    width = imageInfo->width;
+    height = 1;
+    depth = 1;
+    switch (imageInfo->type)
+    {
+        case CL_MEM_OBJECT_IMAGE1D: break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY: height = imageInfo->arraySize; break;
+        case CL_MEM_OBJECT_IMAGE2D: height = imageInfo->height; break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            height = imageInfo->height;
+            depth = imageInfo->arraySize;
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            height = imageInfo->height;
+            depth = imageInfo->depth;
+            break;
+        default:
+            log_error("ERROR: Test does not support image type");
+            return TEST_FAIL;
+    }
+    return 0;
+}
+
+static bool InitFloatCoordsCommon(image_descriptor *imageInfo,
+                                  image_sampler_data *imageSampler,
+                                  float *xOffsets, float *yOffsets,
+                                  float *zOffsets, float xfract, float yfract,
+                                  float zfract, int normalized_coords, MTdata d,
+                                  int lod)
 {
     size_t i = 0;
-    if (gDisableOffsets)
+    size_t width_loop, height_loop, depth_loop;
+    bool error =
+        get_image_dimensions(imageInfo, width_loop, height_loop, depth_loop);
+    if (!error)
     {
-        for (size_t z = 0; z < imageInfo->depth; z++)
+        if (gDisableOffsets)
         {
-            for (size_t y = 0; y < imageInfo->height; y++)
+            for (size_t z = 0; z < depth_loop; z++)
             {
-                for (size_t x = 0; x < imageInfo->width; x++, i++)
+                for (size_t y = 0; y < height_loop; y++)
                 {
-                    xOffsets[i] = (float)(xfract + (double)x);
-                    yOffsets[i] = (float)(yfract + (double)y);
-                    zOffsets[i] = (float)(zfract + (double)z);
-                }
-            }
-        }
-    }
-    else
-    {
-        for (size_t z = 0; z < imageInfo->depth; z++)
-        {
-            for (size_t y = 0; y < imageInfo->height; y++)
-            {
-                for (size_t x = 0; x < imageInfo->width; x++, i++)
-                {
-                    xOffsets[i] =
-                        (float)(xfract
-                                + (double)((int)x
-                                           + random_in_range(-10, 10, d)));
-                    yOffsets[i] =
-                        (float)(yfract
-                                + (double)((int)y
-                                           + random_in_range(-10, 10, d)));
-                    zOffsets[i] =
-                        (float)(zfract
-                                + (double)((int)z
-                                           + random_in_range(-10, 10, d)));
-                }
-            }
-        }
-    }
-
-    if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
-    {
-        i = 0;
-        for (size_t z = 0; z < imageInfo->depth; z++)
-        {
-            for (size_t y = 0; y < imageInfo->height; y++)
-            {
-                for (size_t x = 0; x < imageInfo->width; x++, i++)
-                {
-                    xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0,
-                                               (double)imageInfo->width - 1.0);
-                    yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0,
-                                               (double)imageInfo->height - 1.0);
-                    zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0,
-                                               (double)imageInfo->depth - 1.0);
-                }
-            }
-        }
-    }
-
-    if (normalized_coords || gTestMipmaps)
-    {
-        i = 0;
-        if (lod == 0)
-        {
-            for (size_t z = 0; z < imageInfo->depth; z++)
-            {
-                for (size_t y = 0; y < imageInfo->height; y++)
-                {
-                    for (size_t x = 0; x < imageInfo->width; x++, i++)
+                    for (size_t x = 0; x < width_loop; x++, i++)
                     {
-                        xOffsets[i] = (float)((double)xOffsets[i]
-                                              / (double)imageInfo->width);
-                        yOffsets[i] = (float)((double)yOffsets[i]
-                                              / (double)imageInfo->height);
-                        zOffsets[i] = (float)((double)zOffsets[i]
-                                              / (double)imageInfo->depth);
+                        xOffsets[i] = (float)(xfract + (double)x);
+                        yOffsets[i] = (float)(yfract + (double)y);
+                        zOffsets[i] = (float)(zfract + (double)z);
                     }
                 }
             }
         }
-        else if (gTestMipmaps)
+        else
         {
-            size_t width_lod, height_lod, depth_lod;
-
-            width_lod =
-                (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
-            height_lod =
-                (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
-            depth_lod =
-                (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
-
-            for (size_t z = 0; z < depth_lod; z++)
+            for (size_t z = 0; z < depth_loop; z++)
             {
-                for (size_t y = 0; y < height_lod; y++)
+                for (size_t y = 0; y < height_loop; y++)
                 {
-                    for (size_t x = 0; x < width_lod; x++, i++)
+                    for (size_t x = 0; x < width_loop; x++, i++)
                     {
                         xOffsets[i] =
-                            (float)((double)xOffsets[i] / (double)width_lod);
+                            (float)(xfract
+                                    + (double)((int)x
+                                               + random_in_range(-10, 10, d)));
                         yOffsets[i] =
-                            (float)((double)yOffsets[i] / (double)height_lod);
+                            (float)(yfract
+                                    + (double)((int)y
+                                               + random_in_range(-10, 10, d)));
                         zOffsets[i] =
-                            (float)((double)zOffsets[i] / (double)depth_lod);
+                            (float)(zfract
+                                    + (double)((int)z
+                                               + random_in_range(-10, 10, d)));
+                    }
+                }
+            }
+        }
+
+        if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
+        {
+            i = 0;
+            for (size_t z = 0; z < depth_loop; z++)
+            {
+                for (size_t y = 0; y < height_loop; y++)
+                {
+                    for (size_t x = 0; x < width_loop; x++, i++)
+                    {
+                        xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0,
+                                                   (double)width_loop - 1.0);
+                        yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0,
+                                                   (double)height_loop - 1.0);
+                        zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0,
+                                                   (double)depth_loop - 1.0);
+                    }
+                }
+            }
+        }
+
+        if (normalized_coords || gTestMipmaps)
+        {
+            i = 0;
+            if (lod == 0)
+            {
+                for (size_t z = 0; z < depth_loop; z++)
+                {
+                    for (size_t y = 0; y < height_loop; y++)
+                    {
+                        for (size_t x = 0; x < width_loop; x++, i++)
+                        {
+                            xOffsets[i] = (float)((double)xOffsets[i]
+                                                  / (double)width_loop);
+                            if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+                            {
+                                yOffsets[i] = (float)((double)yOffsets[i]
+                                                      / (double)height_loop);
+                            }
+                            if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                            {
+                                zOffsets[i] = (float)((double)zOffsets[i]
+                                                      / (double)depth_loop);
+                            }
+                        }
+                    }
+                }
+            }
+            else if (gTestMipmaps)
+            {
+                size_t width_lod =
+                    (width_loop >> lod) ? (width_loop >> lod) : 1;
+                size_t height_lod = height_loop;
+                size_t depth_lod = depth_loop;
+                if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+                {
+                    height_lod =
+                        (height_loop >> lod) ? (height_loop >> lod) : 1;
+                }
+                if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                {
+                    depth_lod = (depth_loop >> lod) ? (depth_loop >> lod) : 1;
+                }
+
+                for (size_t z = 0; z < depth_lod; z++)
+                {
+                    for (size_t y = 0; y < height_lod; y++)
+                    {
+                        for (size_t x = 0; x < width_lod; x++, i++)
+                        {
+                            xOffsets[i] = (float)((double)xOffsets[i]
+                                                  / (double)width_lod);
+                            if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+                            {
+                                yOffsets[i] = (float)((double)yOffsets[i]
+                                                      / (double)height_lod);
+                            }
+                            if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                            {
+                                zOffsets[i] = (float)((double)zOffsets[i]
+                                                      / (double)depth_lod);
+                            }
+                        }
                     }
                 }
             }
         }
     }
+    return error;
+}
+
+cl_mem create_image_of_type(cl_context context, cl_mem_flags mem_flags,
+                            image_descriptor *imageInfo, size_t row_pitch,
+                            size_t slice_pitch, void *host_ptr, cl_int *error)
+{
+    cl_mem image;
+    switch (imageInfo->type)
+    {
+        case CL_MEM_OBJECT_IMAGE3D:
+            image = create_image_3d(context, mem_flags, imageInfo->format,
+                                    imageInfo->width, imageInfo->height,
+                                    imageInfo->depth, row_pitch, slice_pitch,
+                                    host_ptr, error);
+            break;
+        default:
+            log_error("Implementation is incomplete, only 3D images are "
+                      "supported so far");
+            return nullptr;
+    }
+    return image;
+}
+
+static size_t get_image_num_pixels(image_descriptor *imageInfo, size_t width,
+                                   size_t height, size_t depth,
+                                   size_t array_size)
+{
+    size_t image_size;
+    switch (imageInfo->type)
+    {
+        case CL_MEM_OBJECT_IMAGE3D: image_size = width * height * depth; break;
+        default:
+            log_error("Implementation is incomplete, only 3D images are "
+                      "supported so far");
+            return 0;
+    }
+    return image_size;
 }
 
 int test_read_image(cl_context context, cl_command_queue queue,
@@ -161,6 +249,17 @@
     size_t threads[3];
     static int initHalf = 0;
 
+    size_t image_size =
+        get_image_num_pixels(imageInfo, imageInfo->width, imageInfo->height,
+                             imageInfo->depth, imageInfo->arraySize);
+    test_assert_error(0 != image_size, "Invalid image size");
+    size_t width_size, height_size, depth_size;
+    if (get_image_dimensions(imageInfo, width_size, height_size, depth_size))
+    {
+        log_error("ERROR: invalid image dimensions");
+        return CL_INVALID_VALUE;
+    }
+
     cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY;
 
     clMemWrapper xOffsets, yOffsets, zOffsets, results;
@@ -169,14 +268,11 @@
 
     // Create offset data
     BufferOwningPtr<cl_float> xOffsetValues(
-        malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
-               * imageInfo->depth));
+        malloc(sizeof(cl_float) * image_size));
     BufferOwningPtr<cl_float> yOffsetValues(
-        malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
-               * imageInfo->depth));
+        malloc(sizeof(cl_float) * image_size));
     BufferOwningPtr<cl_float> zOffsetValues(
-        malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
-               * imageInfo->depth));
+        malloc(sizeof(cl_float) * image_size));
 
     if (imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
         if (DetectFloatToHalfRoundingMode(queue)) return 1;
@@ -207,26 +303,27 @@
         {
             generate_random_image_data(imageInfo,
                                        maxImageUseHostPtrBackingStore, d);
-            unprotImage = create_image_3d(
+            unprotImage = create_image_of_type(
                 context, image_read_write_flags | CL_MEM_USE_HOST_PTR,
-                imageInfo->format, imageInfo->width, imageInfo->height,
-                imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
+                imageInfo, (gEnablePitch ? imageInfo->rowPitch : 0),
                 (gEnablePitch ? imageInfo->slicePitch : 0),
                 maxImageUseHostPtrBackingStore, &error);
         }
         else
         {
-            error = protImage.Create(context, image_read_write_flags,
-                                     imageInfo->format, imageInfo->width,
-                                     imageInfo->height, imageInfo->depth);
+            error = protImage.Create(context, imageInfo->type,
+                                     image_read_write_flags, imageInfo->format,
+                                     imageInfo->width, imageInfo->height,
+                                     imageInfo->depth, imageInfo->arraySize);
         }
         if (error != CL_SUCCESS)
         {
-            log_error("ERROR: Unable to create 3D image of size %d x %d x %d "
+            log_error("ERROR: Unable to create image of size %d x %d x %d x %d "
                       "(pitch %d, %d ) (%s)",
                       (int)imageInfo->width, (int)imageInfo->height,
-                      (int)imageInfo->depth, (int)imageInfo->rowPitch,
-                      (int)imageInfo->slicePitch, IGetErrorString(error));
+                      (int)imageInfo->depth, (int)imageInfo->arraySize,
+                      (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
+                      IGetErrorString(error));
             return error;
         }
         if (gTestMaxImages)
@@ -238,18 +335,18 @@
     {
         // Don't use clEnqueueWriteImage; just use copy host ptr to get the data
         // in
-        unprotImage = create_image_3d(
-            context, image_read_write_flags | CL_MEM_COPY_HOST_PTR,
-            imageInfo->format, imageInfo->width, imageInfo->height,
-            imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
+        unprotImage = create_image_of_type(
+            context, image_read_write_flags | CL_MEM_COPY_HOST_PTR, imageInfo,
+            (gEnablePitch ? imageInfo->rowPitch : 0),
             (gEnablePitch ? imageInfo->slicePitch : 0), imageValues, &error);
         if (error != CL_SUCCESS)
         {
-            log_error("ERROR: Unable to create 3D image of size %d x %d x %d "
+            log_error("ERROR: Unable to create image of size %d x %d x %d x %d "
                       "(pitch %d, %d ) (%s)",
                       (int)imageInfo->width, (int)imageInfo->height,
-                      (int)imageInfo->depth, (int)imageInfo->rowPitch,
-                      (int)imageInfo->slicePitch, IGetErrorString(error));
+                      (int)imageInfo->depth, (int)imageInfo->arraySize,
+                      (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
+                      IGetErrorString(error));
             return error;
         }
         image = unprotImage;
@@ -261,19 +358,19 @@
         // specified, so we just do the same thing either way
         if (!gTestMipmaps)
         {
-            unprotImage = create_image_3d(
-                context, image_read_write_flags | gMemFlagsToUse,
-                imageInfo->format, imageInfo->width, imageInfo->height,
-                imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
+            unprotImage = create_image_of_type(
+                context, image_read_write_flags | gMemFlagsToUse, imageInfo,
+                (gEnablePitch ? imageInfo->rowPitch : 0),
                 (gEnablePitch ? imageInfo->slicePitch : 0), imageValues,
                 &error);
             if (error != CL_SUCCESS)
             {
-                log_error("ERROR: Unable to create 3D image of size %d x %d x "
-                          "%d (pitch %d, %d ) (%s)",
+                log_error("ERROR: Unable to create image of size %d x %d x "
+                          "%d x %d (pitch %d, %d ) (%s)",
                           (int)imageInfo->width, (int)imageInfo->height,
-                          (int)imageInfo->depth, (int)imageInfo->rowPitch,
-                          (int)imageInfo->slicePitch, IGetErrorString(error));
+                          (int)imageInfo->depth, (int)imageInfo->arraySize,
+                          (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
+                          IGetErrorString(error));
                 return error;
             }
             image = unprotImage;
@@ -281,10 +378,11 @@
         else
         {
             cl_image_desc image_desc = { 0 };
-            image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+            image_desc.image_type = imageInfo->type;
             image_desc.image_width = imageInfo->width;
             image_desc.image_height = imageInfo->height;
             image_desc.image_depth = imageInfo->depth;
+            image_desc.image_array_size = imageInfo->arraySize;
             image_desc.num_mip_levels = imageInfo->num_mip_levels;
 
 
@@ -293,23 +391,24 @@
                               imageInfo->format, &image_desc, NULL, &error);
             if (error != CL_SUCCESS)
             {
-                log_error("ERROR: Unable to create %d level mipmapped 3D image "
-                          "of size %d x %d x %d (pitch %d, %d ) (%s)",
+                log_error("ERROR: Unable to create %d level mipmapped image "
+                          "of size %d x %d x %d x %d (pitch %d, %d ) (%s)",
                           (int)imageInfo->num_mip_levels, (int)imageInfo->width,
                           (int)imageInfo->height, (int)imageInfo->depth,
-                          (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
-                          IGetErrorString(error));
+                          (int)imageInfo->arraySize, (int)imageInfo->rowPitch,
+                          (int)imageInfo->slicePitch, IGetErrorString(error));
                 return error;
             }
             image = unprotImage;
         }
     }
 
+    test_assert_error(nullptr != image, "Image creation failed");
+
     if (gMemFlagsToUse != CL_MEM_COPY_HOST_PTR)
     {
         size_t origin[4] = { 0, 0, 0, 0 };
-        size_t region[3] = { imageInfo->width, imageInfo->height,
-                             imageInfo->depth };
+        size_t region[3] = { width_size, height_size, depth_size };
 
         if (gDebugTrace) log_info(" - Writing image...\n");
 
@@ -324,10 +423,10 @@
 
             if (error != CL_SUCCESS)
             {
-                log_error("ERROR: Unable to write to 3D image of size %d x %d "
-                          "x %d \n",
+                log_error("ERROR: Unable to write to image of size %d x %d "
+                          "x %d x %d\n",
                           (int)imageInfo->width, (int)imageInfo->height,
-                          (int)imageInfo->depth);
+                          (int)imageInfo->depth, (int)imageInfo->arraySize);
                 return error;
             }
         }
@@ -339,17 +438,15 @@
             {
                 origin[3] = i;
                 error = clEnqueueWriteImage(
-                    queue, image, CL_TRUE, origin, region,
-                    /*gEnablePitch ? imageInfo->rowPitch :*/ 0,
-                    /*gEnablePitch ? imageInfo->slicePitch :*/ 0,
+                    queue, image, CL_TRUE, origin, region, 0, 0,
                     ((char *)imageValues + nextLevelOffset), 0, NULL, NULL);
                 if (error != CL_SUCCESS)
                 {
-                    log_error("ERROR: Unable to write to %d level mipmapped 3D "
-                              "image of size %d x %d x %d\n",
+                    log_error("ERROR: Unable to write to %d level mipmapped "
+                              "image of size %d x %d x %d x %d\n",
                               (int)imageInfo->num_mip_levels,
                               (int)imageInfo->width, (int)imageInfo->height,
-                              (int)imageInfo->depth);
+                              (int)imageInfo->arraySize, (int)imageInfo->depth);
                     return error;
                 }
                 nextLevelOffset += region[0] * region[1] * region[2]
@@ -362,26 +459,21 @@
         }
     }
 
-    xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->height * imageInfo->depth,
-                              xOffsetValues, &error);
+    xOffsets =
+        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                       sizeof(cl_float) * image_size, xOffsetValues, &error);
     test_error(error, "Unable to create x offset buffer");
-    yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->height * imageInfo->depth,
-                              yOffsetValues, &error);
+    yOffsets =
+        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                       sizeof(cl_float) * image_size, yOffsetValues, &error);
     test_error(error, "Unable to create y offset buffer");
-    zOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->height * imageInfo->depth,
-                              zOffsetValues, &error);
+    zOffsets =
+        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                       sizeof(cl_float) * image_size, zOffsetValues, &error);
     test_error(error, "Unable to create y offset buffer");
-    results =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       get_explicit_type_size(outputType) * 4 * imageInfo->width
-                           * imageInfo->height * imageInfo->depth,
-                       NULL, &error);
+    results = clCreateBuffer(
+        context, CL_MEM_READ_WRITE,
+        get_explicit_type_size(outputType) * 4 * image_size, NULL, &error);
     test_error(error, "Unable to create result buffer");
 
     // Create sampler to use
@@ -444,16 +536,19 @@
     }
 
     int nextLevelOffset = 0;
-    size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
-           depth_lod = imageInfo->depth;
+    size_t width_lod = width_size, height_lod = height_size,
+           depth_lod = depth_size;
 
     // Loop over all mipmap levels, if we are testing mipmapped images.
     for (int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels)
          || (!gTestMipmaps && lod < 1);
          lod++)
     {
-        size_t resultValuesSize = width_lod * height_lod * depth_lod
-            * get_explicit_type_size(outputType) * 4;
+        size_t image_lod_size = get_image_num_pixels(
+            imageInfo, width_lod, height_lod, depth_lod, imageInfo->arraySize);
+        test_assert_error(0 != image_lod_size, "Invalid image size");
+        size_t resultValuesSize =
+            image_lod_size * get_explicit_type_size(outputType) * 4;
         BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
         float lod_float = (float)lod;
         if (gTestMipmaps)
@@ -469,30 +564,25 @@
             float offset = float_offsets[q % float_offset_count];
 
             // Init the coordinates
-            InitFloatCoordsCommon(imageInfo, imageSampler, xOffsetValues,
-                                  yOffsetValues, zOffsetValues,
-                                  q >= float_offset_count ? -offset : offset,
-                                  q >= float_offset_count ? offset : -offset,
-                                  q >= float_offset_count ? -offset : offset,
-                                  imageSampler->normalized_coords, d, lod);
+            error = InitFloatCoordsCommon(
+                imageInfo, imageSampler, xOffsetValues, yOffsetValues,
+                zOffsetValues, q >= float_offset_count ? -offset : offset,
+                q >= float_offset_count ? offset : -offset,
+                q >= float_offset_count ? -offset : offset,
+                imageSampler->normalized_coords, d, lod);
+            test_error(error, "Unable to initialise coordinates");
 
-            error =
-                clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0,
-                                     sizeof(cl_float) * imageInfo->height
-                                         * imageInfo->width * imageInfo->depth,
-                                     xOffsetValues, 0, NULL, NULL);
+            error = clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0,
+                                         sizeof(cl_float) * image_size,
+                                         xOffsetValues, 0, NULL, NULL);
             test_error(error, "Unable to write x offsets");
-            error =
-                clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0,
-                                     sizeof(cl_float) * imageInfo->height
-                                         * imageInfo->width * imageInfo->depth,
-                                     yOffsetValues, 0, NULL, NULL);
+            error = clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0,
+                                         sizeof(cl_float) * image_size,
+                                         yOffsetValues, 0, NULL, NULL);
             test_error(error, "Unable to write y offsets");
-            error =
-                clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0,
-                                     sizeof(cl_float) * imageInfo->height
-                                         * imageInfo->width * imageInfo->depth,
-                                     zOffsetValues, 0, NULL, NULL);
+            error = clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0,
+                                         sizeof(cl_float) * image_size,
+                                         zOffsetValues, 0, NULL, NULL);
             test_error(error, "Unable to write z offsets");
 
 
@@ -511,11 +601,10 @@
             test_error(error, "Unable to run kernel");
 
             // Get results
-            error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0,
-                                        width_lod * height_lod * depth_lod
-                                            * get_explicit_type_size(outputType)
-                                            * 4,
-                                        resultValues, 0, NULL, NULL);
+            error = clEnqueueReadBuffer(
+                queue, results, CL_TRUE, 0,
+                image_lod_size * get_explicit_type_size(outputType) * 4,
+                resultValues, 0, NULL, NULL);
             test_error(error, "Unable to read results from kernel");
             if (gDebugTrace) log_info("    results read\n");
 
@@ -1540,8 +1629,14 @@
             nextLevelOffset += width_lod * height_lod * depth_lod
                 * get_pixel_size(imageInfo->format);
             width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
-            height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
-            depth_lod = (depth_lod >> 1) ? (depth_lod >> 1) : 1;
+            if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+            {
+                height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
+            }
+            if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
+            {
+                depth_lod = (depth_lod >> 1) ? (depth_lod >> 1) : 1;
+            }
         }
     }
 
diff --git a/test_conformance/images/kernel_read_write/test_common.h b/test_conformance/images/kernel_read_write/test_common.h
index 656c41f..fc95bee 100644
--- a/test_conformance/images/kernel_read_write/test_common.h
+++ b/test_conformance/images/kernel_read_write/test_common.h
@@ -42,12 +42,8 @@
                            bool useFloatCoords, ExplicitType outputType,
                            MTdata d);
 
-extern void InitFloatCoordsCommon(image_descriptor *imageInfo,
-                                  image_sampler_data *imageSampler,
-                                  float *xOffsets, float *yOffsets,
-                                  float *zOffsets, float xfract, float yfract,
-                                  float zfract, int normalized_coords, MTdata d,
-                                  int lod);
+extern bool get_image_dimensions(image_descriptor *imageInfo, size_t &width,
+                                 size_t &height, size_t &depth);
 
 template <class T>
 int determine_validation_error_offset(
@@ -63,8 +59,12 @@
     bool clampingErr = false, clamped = false, otherClampingBug = false;
     int clampedX, clampedY, clampedZ;
 
-    size_t imageWidth = imageInfo->width, imageHeight = imageInfo->height,
-           imageDepth = imageInfo->depth;
+    size_t imageWidth, imageHeight, imageDepth;
+    if (get_image_dimensions(imageInfo, imageWidth, imageHeight, imageDepth))
+    {
+        log_error("ERROR: invalid image dimensions");
+        return TEST_FAIL;
+    }
 
     clamped = get_integer_coords_offset(x, y, z, xAddressOffset, yAddressOffset,
                                         zAddressOffset, imageWidth, imageHeight,
@@ -147,82 +147,67 @@
     }
     if (!clampingErr)
     {
-        /*        if( clamped && ( (int)x + (int)xOffsetValues[ j ] < 0 ||
-         (int)y + (int)yOffsetValues[ j ] < 0 ) )
-         {
-         log_error( "NEGATIVE COORDINATE ERROR\n" );
-         return -1;
-         }
-         */
-        if (true) // gExtraValidateInfo )
+        if (printAsFloat)
         {
-            if (printAsFloat)
-            {
-                log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
-                          "validate!\n\tExpected (%g,%g,%g,%g),\n\t     got "
-                          "(%g,%g,%g,%g), error of %g\n",
-                          j, x, x, y, y, z, z, (float)expected[0],
-                          (float)expected[1], (float)expected[2],
-                          (float)expected[3], (float)resultPtr[0],
-                          (float)resultPtr[1], (float)resultPtr[2],
-                          (float)resultPtr[3], error);
-            }
-            else
-            {
-                log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
-                          "validate!\n\tExpected (%x,%x,%x,%x),\n\t     got "
-                          "(%x,%x,%x,%x)\n",
-                          j, x, x, y, y, z, z, (int)expected[0],
-                          (int)expected[1], (int)expected[2], (int)expected[3],
-                          (int)resultPtr[0], (int)resultPtr[1],
-                          (int)resultPtr[2], (int)resultPtr[3]);
-            }
-            log_error(
-                "Integer coords resolve to %d,%d,%d   with img size %d,%d,%d\n",
-                clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight,
-                (int)imageDepth);
+            log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
+                      "validate!\n\tExpected (%g,%g,%g,%g),\n\t     got "
+                      "(%g,%g,%g,%g), error of %g\n",
+                      j, x, x, y, y, z, z, (float)expected[0],
+                      (float)expected[1], (float)expected[2],
+                      (float)expected[3], (float)resultPtr[0],
+                      (float)resultPtr[1], (float)resultPtr[2],
+                      (float)resultPtr[3], error);
+        }
+        else
+        {
+            log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
+                      "validate!\n\tExpected (%x,%x,%x,%x),\n\t     got "
+                      "(%x,%x,%x,%x)\n",
+                      j, x, x, y, y, z, z, (int)expected[0], (int)expected[1],
+                      (int)expected[2], (int)expected[3], (int)resultPtr[0],
+                      (int)resultPtr[1], (int)resultPtr[2], (int)resultPtr[3]);
+        }
+        log_error(
+            "Integer coords resolve to %d,%d,%d   with img size %d,%d,%d\n",
+            clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight,
+            (int)imageDepth);
 
-            if (printAsFloat && gExtraValidateInfo)
+        if (printAsFloat && gExtraValidateInfo)
+        {
+            log_error("\nNearby values:\n");
+            for (int zOff = -1; zOff <= 1; zOff++)
             {
-                log_error("\nNearby values:\n");
-                for (int zOff = -1; zOff <= 1; zOff++)
+                for (int yOff = -1; yOff <= 1; yOff++)
                 {
-                    for (int yOff = -1; yOff <= 1; yOff++)
-                    {
-                        float top[4], real[4], bot[4];
-                        read_image_pixel_float(imagePtr, imageInfo,
-                                               clampedX - 1, clampedY + yOff,
-                                               clampedZ + zOff, top);
-                        read_image_pixel_float(imagePtr, imageInfo, clampedX,
-                                               clampedY + yOff, clampedZ + zOff,
-                                               real);
-                        read_image_pixel_float(imagePtr, imageInfo,
-                                               clampedX + 1, clampedY + yOff,
-                                               clampedZ + zOff, bot);
-                        log_error("\t(%g,%g,%g,%g)", top[0], top[1], top[2],
-                                  top[3]);
-                        log_error(" (%g,%g,%g,%g)", real[0], real[1], real[2],
-                                  real[3]);
-                        log_error(" (%g,%g,%g,%g)\n", bot[0], bot[1], bot[2],
-                                  bot[3]);
-                    }
+                    float top[4], real[4], bot[4];
+                    read_image_pixel_float(imagePtr, imageInfo, clampedX - 1,
+                                           clampedY + yOff, clampedZ + zOff,
+                                           top);
+                    read_image_pixel_float(imagePtr, imageInfo, clampedX,
+                                           clampedY + yOff, clampedZ + zOff,
+                                           real);
+                    read_image_pixel_float(imagePtr, imageInfo, clampedX + 1,
+                                           clampedY + yOff, clampedZ + zOff,
+                                           bot);
+                    log_error("\t(%g,%g,%g,%g)", top[0], top[1], top[2],
+                              top[3]);
+                    log_error(" (%g,%g,%g,%g)", real[0], real[1], real[2],
+                              real[3]);
+                    log_error(" (%g,%g,%g,%g)\n", bot[0], bot[1], bot[2],
+                              bot[3]);
                 }
             }
-            //        }
-            //        else
-            //            log_error( "\n" );
-            if (imageSampler->filter_mode != CL_FILTER_LINEAR)
-            {
-                if (found)
-                    log_error(
-                        "\tValue really found in image at %d,%d,%d (%s)\n",
-                        actualX, actualY, actualZ,
-                        (found > 1) ? "NOT unique!!" : "unique");
-                else
-                    log_error("\tValue not actually found in image\n");
-            }
-            log_error("\n");
         }
+        if (imageSampler->filter_mode != CL_FILTER_LINEAR)
+        {
+            if (found)
+                log_error("\tValue really found in image at %d,%d,%d (%s)\n",
+                          actualX, actualY, actualZ,
+                          (found > 1) ? "NOT unique!!" : "unique");
+            else
+                log_error("\tValue not actually found in image\n");
+        }
+        log_error("\n");
 
         numClamped = -1; // We force the clamped counter to never work
         if ((--numTries) == 0) return -1;
diff --git a/test_conformance/images/samplerlessReads/test_iterations.cpp b/test_conformance/images/samplerlessReads/test_iterations.cpp
index 55eaaf4..e2f89aa 100644
--- a/test_conformance/images/samplerlessReads/test_iterations.cpp
+++ b/test_conformance/images/samplerlessReads/test_iterations.cpp
@@ -215,6 +215,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     // Determine types
diff --git a/test_conformance/images/samplerlessReads/test_read_1D.cpp b/test_conformance/images/samplerlessReads/test_read_1D.cpp
index aa261b7..6ed9910 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D.cpp
@@ -215,6 +215,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     // Determine types
diff --git a/test_conformance/images/samplerlessReads/test_read_1D_array.cpp b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
index fb0c263..677eb9f 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
@@ -214,6 +214,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     // Determine types
diff --git a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
index 7a3084d..c3a991a 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
@@ -219,6 +219,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     // note: image_buffer test uses image1D for results validation.
diff --git a/test_conformance/images/samplerlessReads/test_read_2D_array.cpp b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
index 99f2426..8273f53 100644
--- a/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
@@ -202,6 +202,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     // Determine types
diff --git a/test_conformance/images/samplerlessReads/test_read_3D.cpp b/test_conformance/images/samplerlessReads/test_read_3D.cpp
index cf41140..0df46c8 100644
--- a/test_conformance/images/samplerlessReads/test_read_3D.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_3D.cpp
@@ -206,6 +206,7 @@
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     // Determine types
diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt
index 28d2716..3281402 100644
--- a/test_conformance/math_brute_force/CMakeLists.txt
+++ b/test_conformance/math_brute_force/CMakeLists.txt
@@ -9,6 +9,7 @@
     binary_operator_float.cpp
     binary_two_results_i_double.cpp
     binary_two_results_i_float.cpp
+    common.cpp
     common.h
     function_list.cpp
     function_list.h
@@ -40,4 +41,8 @@
     utility.h
 )
 
+# math_brute_force compiles cleanly with -Wall (except for a few remaining
+# warnings), but other tests not (yet); so enable -Wall locally.
+set_gnulike_module_compile_flags("-Wall -Wno-strict-aliasing -Wno-unknown-pragmas")
+
 include(../CMakeCommon.txt)
diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index 1b1f7d4..f18d0b9 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -134,7 +134,7 @@
         maxErrorValue; // position of the max error value (param 1).  Init to 0.
     double maxErrorValue2; // position of the max error value (param 2).  Init
                            // to 0.
-    MTdata d;
+    MTdataHolder d;
 
     // Per thread command queue to improve performance
     clCommandQueueWrapper tQueue;
@@ -630,7 +630,7 @@
     {
         if (gVerboseBruteForce)
         {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
                  "ThreadCount:%2u\n",
                  base, job->step, job->scale, buffer_elements, job->ulps,
                  job->threadCount);
@@ -691,7 +691,7 @@
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -740,7 +740,7 @@
             goto exit;
         }
 
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
@@ -792,10 +792,5 @@
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        free_mtdata(threadInfo.d);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index d229a37..fe1491d 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -132,7 +132,7 @@
         maxErrorValue; // position of the max error value (param 1).  Init to 0.
     double maxErrorValue2; // position of the max error value (param 2).  Init
                            // to 0.
-    MTdata d;
+    MTdataHolder d;
 
     // Per thread command queue to improve performance
     clCommandQueueWrapper tQueue;
@@ -755,7 +755,7 @@
                     {
                         vlog_error(
                             "\nERROR: %s%s: %f ulp error at {%a (0x%x), %a "
-                            "(0x%x)}: *%a vs. %a (0x%8.8x) at index: %d\n",
+                            "(0x%x)}: *%a vs. %a (0x%8.8x) at index: %zu\n",
                             name, sizeNames[k], err, s[j], ((cl_uint *)s)[j],
                             s2[j], ((cl_uint *)s2)[j], r[j], test,
                             ((cl_uint *)&test)[0], j);
@@ -787,7 +787,7 @@
     {
         if (gVerboseBruteForce)
         {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
                  "ThreadCount:%2u\n",
                  base, job->step, job->scale, buffer_elements, job->ulps,
                  job->threadCount);
@@ -848,7 +848,7 @@
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -897,7 +897,7 @@
             goto exit;
         }
 
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
@@ -949,10 +949,5 @@
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        free_mtdata(threadInfo.d);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
index 7baa21a..f8786e6 100644
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -133,7 +133,7 @@
         maxErrorValue; // position of the max error value (param 1).  Init to 0.
     cl_int maxErrorValue2; // position of the max error value (param 2).  Init
                            // to 0.
-    MTdata d;
+    MTdataHolder d;
 
     // Per thread command queue to improve performance
     clCommandQueueWrapper tQueue;
@@ -610,7 +610,7 @@
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -662,7 +662,7 @@
             goto exit;
         }
 
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
@@ -714,10 +714,5 @@
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        free_mtdata(threadInfo.d);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index 3f998e2..d855f44 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -131,7 +131,7 @@
         maxErrorValue; // position of the max error value (param 1).  Init to 0.
     cl_int maxErrorValue2; // position of the max error value (param 2).  Init
                            // to 0.
-    MTdata d;
+    MTdataHolder d;
 
     // Per thread command queue to improve performance
     clCommandQueueWrapper tQueue;
@@ -516,7 +516,7 @@
                 {
                     vlog_error(
                         "\nERROR: %s%s: %f ulp error at {%a (0x%8.8x), %d}: "
-                        "*%a (0x%8.8x) vs. %a (0x%8.8x) at index: %d\n",
+                        "*%a (0x%8.8x) vs. %a (0x%8.8x) at index: %zu\n",
                         name, sizeNames[k], err, s[j], ((uint32_t *)s)[j],
                         s2[j], r[j], ((uint32_t *)r)[j], test,
                         ((cl_uint *)&test)[0], j);
@@ -545,7 +545,7 @@
     {
         if (gVerboseBruteForce)
         {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
                  "ThreadCount:%2u\n",
                  base, job->step, job->scale, buffer_elements, job->ulps,
                  job->threadCount);
@@ -603,7 +603,7 @@
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -655,7 +655,7 @@
             goto exit;
         }
 
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
@@ -707,10 +707,5 @@
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        free_mtdata(threadInfo.d);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index 7488366..bbe5c43 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -133,7 +133,7 @@
         maxErrorValue; // position of the max error value (param 1).  Init to 0.
     double maxErrorValue2; // position of the max error value (param 2).  Init
                            // to 0.
-    MTdata d;
+    MTdataHolder d;
 
     // Per thread command queue to improve performance
     clCommandQueueWrapper tQueue;
@@ -601,7 +601,7 @@
     {
         if (gVerboseBruteForce)
         {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
                  "ThreadCount:%2u\n",
                  base, job->step, job->scale, buffer_elements, job->ulps,
                  job->threadCount);
@@ -658,7 +658,7 @@
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -707,7 +707,7 @@
             goto exit;
         }
 
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
@@ -759,10 +759,5 @@
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        free_mtdata(threadInfo.d);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index 56f293c..1a28d8d 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -131,7 +131,7 @@
         maxErrorValue; // position of the max error value (param 1).  Init to 0.
     double maxErrorValue2; // position of the max error value (param 2).  Init
                            // to 0.
-    MTdata d;
+    MTdataHolder d;
 
     // Per thread command queue to improve performance
     clCommandQueueWrapper tQueue;
@@ -698,7 +698,7 @@
                 if (fail)
                 {
                     vlog_error("\nERROR: %s%s: %f ulp error at {%a, %a}: *%a "
-                               "vs. %a (0x%8.8x) at index: %d\n",
+                               "vs. %a (0x%8.8x) at index: %zu\n",
                                name, sizeNames[k], err, s[j], s2[j], r[j], test,
                                ((cl_uint *)&test)[0], j);
                     error = -1;
@@ -726,7 +726,7 @@
     {
         if (gVerboseBruteForce)
         {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
                  "ThreadCount:%2u\n",
                  base, job->step, job->scale, buffer_elements, job->ulps,
                  job->threadCount);
@@ -785,7 +785,7 @@
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -834,7 +834,7 @@
             goto exit;
         }
 
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
@@ -886,10 +886,5 @@
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        free_mtdata(threadInfo.d);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
index 59a5bfe..bbfd707 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <climits>
 #include <cstring>
 
@@ -527,17 +528,20 @@
 
                 if (fail)
                 {
-                    vlog_error(
-                        "\nERROR: %sD%s: {%f, %lld} ulp error at {%.13la, "
-                        "%.13la} ({ 0x%16.16llx, 0x%16.16llx}): *{%.13la, "
-                        "%d} ({ 0x%16.16llx, 0x%8.8x}) vs. {%.13la, %d} ({ "
-                        "0x%16.16llx, 0x%8.8x})\n",
-                        f->name, sizeNames[k], err, iErr, ((double *)gIn)[j],
-                        ((double *)gIn2)[j], ((cl_ulong *)gIn)[j],
-                        ((cl_ulong *)gIn2)[j], ((double *)gOut_Ref)[j],
-                        ((int *)gOut_Ref2)[j], ((cl_ulong *)gOut_Ref)[j],
-                        ((cl_uint *)gOut_Ref2)[j], test, q2[j],
-                        ((cl_ulong *)q)[j], ((cl_uint *)q2)[j]);
+                    vlog_error("\nERROR: %sD%s: {%f, %" PRId64
+                               "} ulp error at {%.13la, "
+                               "%.13la} ({ 0x%16.16" PRIx64 ", 0x%16.16" PRIx64
+                               "}): *{%.13la, "
+                               "%d} ({ 0x%16.16" PRIx64
+                               ", 0x%8.8x}) vs. {%.13la, %d} ({ "
+                               "0x%16.16" PRIx64 ", 0x%8.8x})\n",
+                               f->name, sizeNames[k], err, iErr,
+                               ((double *)gIn)[j], ((double *)gIn2)[j],
+                               ((cl_ulong *)gIn)[j], ((cl_ulong *)gIn2)[j],
+                               ((double *)gOut_Ref)[j], ((int *)gOut_Ref2)[j],
+                               ((cl_ulong *)gOut_Ref)[j],
+                               ((cl_uint *)gOut_Ref2)[j], test, q2[j],
+                               ((cl_ulong *)q)[j], ((cl_uint *)q2)[j]);
                     error = -1;
                     goto exit;
                 }
@@ -548,8 +552,9 @@
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
@@ -566,8 +571,8 @@
         else
             vlog("passed");
 
-        vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
-             maxErrorVal2);
+        vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
+             maxErrorVal, maxErrorVal2);
     }
 
     vlog("\n");
diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
index 6c1dd3b..0747337 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <climits>
 #include <cstring>
 
@@ -513,16 +514,17 @@
 
                 if (fail)
                 {
-                    vlog_error(
-                        "\nERROR: %s%s: {%f, %lld} ulp error at {%a, %a} "
-                        "({0x%8.8x, 0x%8.8x}): *{%a, %d} ({0x%8.8x, "
-                        "0x%8.8x}) vs. {%a, %d} ({0x%8.8x, 0x%8.8x})\n",
-                        f->name, sizeNames[k], err, iErr, ((float *)gIn)[j],
-                        ((float *)gIn2)[j], ((cl_uint *)gIn)[j],
-                        ((cl_uint *)gIn2)[j], ((float *)gOut_Ref)[j],
-                        ((int *)gOut_Ref2)[j], ((cl_uint *)gOut_Ref)[j],
-                        ((cl_uint *)gOut_Ref2)[j], test, q2[j],
-                        ((cl_uint *)&test)[0], ((cl_uint *)q2)[j]);
+                    vlog_error("\nERROR: %s%s: {%f, %" PRId64
+                               "} ulp error at {%a, %a} "
+                               "({0x%8.8x, 0x%8.8x}): *{%a, %d} ({0x%8.8x, "
+                               "0x%8.8x}) vs. {%a, %d} ({0x%8.8x, 0x%8.8x})\n",
+                               f->name, sizeNames[k], err, iErr,
+                               ((float *)gIn)[j], ((float *)gIn2)[j],
+                               ((cl_uint *)gIn)[j], ((cl_uint *)gIn2)[j],
+                               ((float *)gOut_Ref)[j], ((int *)gOut_Ref2)[j],
+                               ((cl_uint *)gOut_Ref)[j],
+                               ((cl_uint *)gOut_Ref2)[j], test, q2[j],
+                               ((cl_uint *)&test)[0], ((cl_uint *)q2)[j]);
                     error = -1;
                     goto exit;
                 }
@@ -533,8 +535,9 @@
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
@@ -551,8 +554,8 @@
         else
             vlog("passed");
 
-        vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
-             maxErrorVal2);
+        vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
+             maxErrorVal, maxErrorVal2);
     }
 
     vlog("\n");
diff --git a/test_conformance/math_brute_force/common.cpp b/test_conformance/math_brute_force/common.cpp
new file mode 100644
index 0000000..f5e9f99
--- /dev/null
+++ b/test_conformance/math_brute_force/common.cpp
@@ -0,0 +1,170 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "common.h"
+
+#include "utility.h" // for sizeNames and sizeValues.
+
+#include <sstream>
+#include <string>
+
+namespace {
+
+const char *GetTypeName(ParameterType type)
+{
+    switch (type)
+    {
+        case ParameterType::Float: return "float";
+        case ParameterType::Double: return "double";
+    }
+    return nullptr;
+}
+
+const char *GetUndefValue(ParameterType type)
+{
+    switch (type)
+    {
+        case ParameterType::Float:
+        case ParameterType::Double: return "NAN";
+    }
+    return nullptr;
+}
+
+void EmitDefineType(std::ostringstream &kernel, const char *name,
+                    ParameterType type, int vector_size_index)
+{
+    kernel << "#define " << name << " " << GetTypeName(type)
+           << sizeNames[vector_size_index] << '\n';
+    kernel << "#define " << name << "_SCALAR " << GetTypeName(type) << '\n';
+}
+
+void EmitDefineUndef(std::ostringstream &kernel, const char *name,
+                     ParameterType type)
+{
+    kernel << "#define " << name << " " << GetUndefValue(type) << '\n';
+}
+
+void EmitEnableExtension(std::ostringstream &kernel, ParameterType type)
+{
+    switch (type)
+    {
+        case ParameterType::Double:
+            kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+            break;
+
+        case ParameterType::Float:
+            // No extension required.
+            break;
+    }
+}
+
+} // anonymous namespace
+
+std::string GetKernelName(int vector_size_index)
+{
+    return std::string("math_kernel") + sizeNames[vector_size_index];
+}
+
+std::string GetTernaryKernel(const std::string &kernel_name,
+                             const char *builtin, ParameterType retType,
+                             ParameterType type1, ParameterType type2,
+                             ParameterType type3, int vector_size_index)
+{
+    // To keep the kernel code readable, use macros for types and undef values.
+    std::ostringstream kernel;
+    EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
+    EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
+    EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
+    EmitDefineType(kernel, "TYPE3", type3, vector_size_index);
+    EmitDefineUndef(kernel, "UNDEF1", type1);
+    EmitDefineUndef(kernel, "UNDEF2", type2);
+    EmitDefineUndef(kernel, "UNDEF3", type3);
+    EmitEnableExtension(kernel, type1);
+
+    // clang-format off
+    const char *kernel_nonvec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
+                          __global TYPE1* in1,
+                          __global TYPE2* in2,
+                          __global TYPE3* in3)
+{
+    size_t i = get_global_id(0);
+    out[i] = )", builtin, R"((in1[i], in2[i], in3[i]);
+}
+)" };
+
+    const char *kernel_vec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
+                          __global TYPE1_SCALAR* in1,
+                          __global TYPE2_SCALAR* in2,
+                          __global TYPE3_SCALAR* in3)
+{
+    size_t i = get_global_id(0);
+
+    if (i + 1 < get_global_size(0))
+    {
+        TYPE1 a = vload3(0, in1 + 3 * i);
+        TYPE2 b = vload3(0, in2 + 3 * i);
+        TYPE3 c = vload3(0, in3 + 3 * i);
+        RETTYPE res = )", builtin, R"((a, b, c);
+        vstore3(res, 0, out + 3 * i);
+    }
+    else
+    {
+        // Figure out how many elements are left over after
+        // BUFFER_SIZE % (3 * sizeof(type)).
+        // Assume power of two buffer size.
+        size_t parity = i & 1;
+        TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
+        TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
+        TYPE3 c = (TYPE3)(UNDEF3, UNDEF3, UNDEF3);
+        switch (parity)
+        {
+            case 0:
+                a.y = in1[3 * i + 1];
+                b.y = in2[3 * i + 1];
+                c.y = in3[3 * i + 1];
+                // fall through
+            case 1:
+                a.x = in1[3 * i];
+                b.x = in2[3 * i];
+                c.x = in3[3 * i];
+                break;
+        }
+
+        RETTYPE res = )", builtin, R"((a, b, c);
+
+        switch (parity)
+        {
+            case 0:
+                out[3 * i + 1] = res.y;
+                // fall through
+            case 1:
+                out[3 * i] = res.x;
+                break;
+        }
+    }
+}
+)" };
+    // clang-format on
+
+    if (sizeValues[vector_size_index] != 3)
+        for (const auto &chunk : kernel_nonvec3) kernel << chunk;
+    else
+        for (const auto &chunk : kernel_vec3) kernel << chunk;
+
+    return kernel.str();
+}
diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h
index 6f17898..143814c 100644
--- a/test_conformance/math_brute_force/common.h
+++ b/test_conformance/math_brute_force/common.h
@@ -20,6 +20,7 @@
 #include "utility.h"
 
 #include <array>
+#include <string>
 #include <vector>
 
 // Array of thread-specific kernels for each vector size.
@@ -31,6 +32,22 @@
 // Array of buffers for each vector size.
 using Buffers = std::array<clMemWrapper, VECTOR_SIZE_COUNT>;
 
+// Types supported for kernel code generation.
+enum class ParameterType
+{
+    Float,
+    Double,
+};
+
+// Return kernel name suffixed with vector size.
+std::string GetKernelName(int vector_size_index);
+
+// Generate kernel code for the given builtin function/operator.
+std::string GetTernaryKernel(const std::string &kernel_name,
+                             const char *builtin, ParameterType retType,
+                             ParameterType type1, ParameterType type2,
+                             ParameterType type3, int vector_size_index);
+
 // Information to generate OpenCL kernels.
 struct BuildKernelInfo
 {
diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp
index a05737d..0cbcf86 100644
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ b/test_conformance/math_brute_force/i_unary_double.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 namespace {
@@ -271,8 +272,9 @@
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp
index 13442e6..90bb1e1 100644
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ b/test_conformance/math_brute_force/i_unary_float.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 namespace {
@@ -268,8 +269,9 @@
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index a697a7b..412f210 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 namespace {
@@ -127,7 +128,7 @@
     clMemWrapper inBuf2;
     Buffers outBuf;
 
-    MTdata d;
+    MTdataHolder d;
 
     // Per thread command queue to improve performance
     clCommandQueueWrapper tQueue;
@@ -487,8 +488,9 @@
 
             cl_ulong err = t[j] - q[j];
             if (q[j] > t[j]) err = q[j] - t[j];
-            vlog_error("\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld "
-                       "vs. %lld  (index: %d)\n",
+            vlog_error("\nERROR: %s: %" PRId64
+                       " ulp error at {%.13la, %.13la}: *%" PRId64 " "
+                       "vs. %" PRId64 "  (index: %zu)\n",
                        name, err, ((double *)s)[j], ((double *)s2)[j], t[j],
                        q[j], j);
             error = -1;
@@ -535,8 +537,9 @@
 
                 cl_ulong err = -t[j] - q[j];
                 if (q[j] > -t[j]) err = q[j] + t[j];
-                vlog_error("\nERROR: %sD%s: %lld ulp error at {%.13la, "
-                           "%.13la}: *%lld vs. %lld  (index: %d)\n",
+                vlog_error("\nERROR: %sD%s: %" PRId64 " ulp error at {%.13la, "
+                           "%.13la}: *%" PRId64 " vs. %" PRId64
+                           "  (index: %zu)\n",
                            name, sizeNames[k], err, ((double *)s)[j],
                            ((double *)s2)[j], -t[j], q[j], j);
                 error = -1;
@@ -616,7 +619,7 @@
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -665,7 +668,7 @@
             goto exit;
         }
 
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
@@ -704,10 +707,5 @@
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        free_mtdata(threadInfo.d);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index 97e2f67..cb915fc 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -125,7 +125,7 @@
     clMemWrapper inBuf2;
     Buffers outBuf;
 
-    MTdata d;
+    MTdataHolder d;
 
     // Per thread command queue to improve performance
     clCommandQueueWrapper tQueue;
@@ -478,7 +478,7 @@
             uint32_t err = t[j] - q[j];
             if (q[j] > t[j]) err = q[j] - t[j];
             vlog_error("\nERROR: %s: %d ulp error at {%a, %a}: *0x%8.8x vs. "
-                       "0x%8.8x (index: %d)\n",
+                       "0x%8.8x (index: %zu)\n",
                        name, err, ((float *)s)[j], ((float *)s2)[j], t[j], q[j],
                        j);
             error = -1;
@@ -524,7 +524,7 @@
                 cl_uint err = -t[j] - q[j];
                 if (q[j] > -t[j]) err = q[j] + t[j];
                 vlog_error("\nERROR: %s%s: %d ulp error at {%a, %a}: *0x%8.8x "
-                           "vs. 0x%8.8x (index: %d)\n",
+                           "vs. 0x%8.8x (index: %zu)\n",
                            name, sizeNames[k], err, ((float *)s)[j],
                            ((float *)s2)[j], -t[j], q[j], j);
                 error = -1;
@@ -605,7 +605,7 @@
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -654,7 +654,7 @@
             goto exit;
         }
 
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
@@ -693,10 +693,5 @@
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        free_mtdata(threadInfo.d);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index 5a3ad35..c2e7cdc 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 namespace {
@@ -297,7 +298,8 @@
 
             cl_ulong err = t[j] - q[j];
             if (q[j] > t[j]) err = q[j] - t[j];
-            vlog_error("\nERROR: %sD: %zd ulp error at %.13la: *%zd vs. %zd\n",
+            vlog_error("\nERROR: %sD: %" PRId64
+                       " ulp error at %.13la: *%" PRId64 " vs. %" PRId64 "\n",
                        name, err, ((double *)gIn)[j], t[j], q[j]);
             return -1;
         }
@@ -323,7 +325,8 @@
                 cl_ulong err = -t[j] - q[j];
                 if (q[j] > -t[j]) err = q[j] + t[j];
                 vlog_error(
-                    "\nERROR: %sD%s: %zd ulp error at %.13la: *%zd vs. %zd\n",
+                    "\nERROR: %sD%s: %" PRId64 " ulp error at %.13la: *%" PRId64
+                    " vs. %" PRId64 "\n",
                     name, sizeNames[k], err, ((double *)gIn)[j], -t[j], q[j]);
                 return -1;
             }
@@ -400,7 +403,7 @@
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index d298215..6a1b9b9 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -414,7 +414,7 @@
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp
index 3def6a8..8d8fec5 100644
--- a/test_conformance/math_brute_force/mad_double.cpp
+++ b/test_conformance/math_brute_force/mad_double.cpp
@@ -26,94 +26,13 @@
 int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
                 bool relaxedMode)
 {
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2,  __global double",
-                        sizeNames[vectorSize],
-                        "* in3 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], in3[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in, __global double* in2, "
-        "__global double* in3)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
-        "       double3 d2 = vload3( 0, in3 + 3 * i );\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, d2 );\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       double3 d1;\n"
-        "       double3 d2;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               d2 = (double3)( in3[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, d2 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
+    auto kernel_name = GetKernelName(vectorSize);
+    auto source = GetTernaryKernel(kernel_name, name, ParameterType::Double,
+                                   ParameterType::Double, ParameterType::Double,
+                                   ParameterType::Double, vectorSize);
+    std::array<const char *, 1> sources{ source.c_str() };
+    return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
+                      relaxedMode);
 }
 
 struct BuildKernelInfo2
diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp
index 498f25e..04ac5aa 100644
--- a/test_conformance/math_brute_force/mad_float.cpp
+++ b/test_conformance/math_brute_force/mad_float.cpp
@@ -26,92 +26,13 @@
 int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
                 bool relaxedMode)
 {
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2,  __global float",
-                        sizeNames[vectorSize],
-                        "* in3 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], in3[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in, __global float* in2, "
-        "__global float* in3)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       float3 f2 = vload3( 0, in3 + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, f2 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       float3 f2;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               f2 = (float3)( in3[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, f2 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
+    auto kernel_name = GetKernelName(vectorSize);
+    auto source = GetTernaryKernel(kernel_name, name, ParameterType::Float,
+                                   ParameterType::Float, ParameterType::Float,
+                                   ParameterType::Float, vectorSize);
+    std::array<const char *, 1> sources{ source.c_str() };
+    return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
+                      relaxedMode);
 }
 
 struct BuildKernelInfo2
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index ee3fcbd..64491bd 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -98,7 +98,7 @@
 cl_mem gInBuffer3 = NULL;
 cl_mem gOutBuffer[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
 cl_mem gOutBuffer2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
-static MTdata gMTdata;
+static MTdataHolder gMTdata;
 cl_device_fp_config gFloatCapabilities = 0;
 int gWimpyReductionFactor = 32;
 int gVerboseBruteForce = 0;
@@ -132,7 +132,7 @@
             if ((gStartTestNumber != ~0u && i < gStartTestNumber)
                 || i > gEndTestNumber)
             {
-                vlog("Skipping function #%d\n", i);
+                vlog("Skipping function #%zu\n", i);
                 return 0;
             }
 
@@ -326,7 +326,7 @@
     vlog("\n-------------------------------------------------------------------"
          "----------------------------------------\n");
 
-    gMTdata = init_genrand(gRandomSeed);
+    gMTdata = MTdataHolder(gRandomSeed);
 
     FPU_mode_type oldMode;
     DisableFTZ(&oldMode);
@@ -336,8 +336,6 @@
 
     RestoreFPState(&oldMode);
 
-    free_mtdata(gMTdata);
-
     if (gQueue)
     {
         int error_code = clFinish(gQueue);
@@ -504,8 +502,6 @@
         gWimpyMode = 1;
     }
 
-    vlog("\nTest binary built %s %s\n", __DATE__, __TIME__);
-
     PrintArch();
 
     if (gWimpyMode)
diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp
index 94fbe26..b5f1ab0 100644
--- a/test_conformance/math_brute_force/ternary_double.cpp
+++ b/test_conformance/math_brute_force/ternary_double.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 #define CORRECTLY_ROUNDED 0
@@ -29,94 +30,13 @@
 int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
                 bool relaxedMode)
 {
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2,  __global double",
-                        sizeNames[vectorSize],
-                        "* in3 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], in3[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in, __global double* in2, "
-        "__global double* in3)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
-        "       double3 d2 = vload3( 0, in3 + 3 * i );\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, d2 );\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       double3 d1;\n"
-        "       double3 d2;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               d2 = (double3)( in3[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, d2 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
+    auto kernel_name = GetKernelName(vectorSize);
+    auto source = GetTernaryKernel(kernel_name, name, ParameterType::Double,
+                                   ParameterType::Double, ParameterType::Double,
+                                   ParameterType::Double, vectorSize);
+    std::array<const char *, 1> sources{ source.c_str() };
+    return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
+                      relaxedMode);
 }
 
 struct BuildKernelInfo2
@@ -708,8 +628,9 @@
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp
index 762c57d..cf36184 100644
--- a/test_conformance/math_brute_force/ternary_float.cpp
+++ b/test_conformance/math_brute_force/ternary_float.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 #define CORRECTLY_ROUNDED 0
@@ -29,92 +30,13 @@
 int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
                 bool relaxedMode)
 {
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2,  __global float",
-                        sizeNames[vectorSize],
-                        "* in3 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], in3[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in, __global float* in2, "
-        "__global float* in3)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       float3 f2 = vload3( 0, in3 + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, f2 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       float3 f2;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               f2 = (float3)( in3[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, f2 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
+    auto kernel_name = GetKernelName(vectorSize);
+    auto source = GetTernaryKernel(kernel_name, name, ParameterType::Float,
+                                   ParameterType::Float, ParameterType::Float,
+                                   ParameterType::Float, vectorSize);
+    std::array<const char *, 1> sources{ source.c_str() };
+    return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
+                      relaxedMode);
 }
 
 struct BuildKernelInfo2
@@ -843,8 +765,8 @@
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10u bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64 " bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index 7dfc12b..177cfe5 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 namespace {
@@ -345,7 +346,7 @@
                 if (fail)
                 {
                     vlog_error("\nERROR: %s%s: %f ulp error at %.13la "
-                               "(0x%16.16llx): *%.13la vs. %.13la\n",
+                               "(0x%16.16" PRIx64 "): *%.13la vs. %.13la\n",
                                job->f->name, sizeNames[k], err,
                                ((cl_double *)gIn)[j], ((cl_ulong *)gIn)[j],
                                ((cl_double *)gOut_Ref)[j], test);
@@ -427,7 +428,7 @@
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index 6a5c353..4c1f1a1 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -580,7 +580,7 @@
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp
index 858b2c3..6d7c61d 100644
--- a/test_conformance/math_brute_force/unary_two_results_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_double.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 namespace {
@@ -414,8 +415,9 @@
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp
index 85e5d01..42e858c 100644
--- a/test_conformance/math_brute_force/unary_two_results_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_float.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 namespace {
@@ -546,8 +547,9 @@
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
index 4cfbca9..8b75194 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <climits>
 #include <cstring>
 
@@ -386,8 +387,9 @@
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
@@ -404,8 +406,8 @@
         else
             vlog("passed");
 
-        vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
-             maxErrorVal2);
+        vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
+             maxErrorVal, maxErrorVal2);
     }
 
     vlog("\n");
diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
index e324ad0..54843a2 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <climits>
 #include <cstring>
 
@@ -384,8 +385,9 @@
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
@@ -402,8 +404,8 @@
         else
             vlog("passed");
 
-        vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
-             maxErrorVal2);
+        vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
+             maxErrorVal, maxErrorVal2);
     }
 
     vlog("\n");
diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp
index a0c6b79..9b60904 100644
--- a/test_conformance/math_brute_force/unary_u_double.cpp
+++ b/test_conformance/math_brute_force/unary_u_double.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 namespace {
@@ -267,11 +268,11 @@
                     }
                     if (fail)
                     {
-                        vlog_error("\n%s%sD: %f ulp error at 0x%16.16llx: "
-                                   "*%.13la vs. %.13la\n",
-                                   f->name, sizeNames[k], err,
-                                   ((uint64_t *)gIn)[j],
-                                   ((double *)gOut_Ref)[j], test);
+                        vlog_error(
+                            "\n%s%sD: %f ulp error at 0x%16.16" PRIx64 ": "
+                            "*%.13la vs. %.13la\n",
+                            f->name, sizeNames[k], err, ((uint64_t *)gIn)[j],
+                            ((double *)gOut_Ref)[j], test);
                         error = -1;
                         goto exit;
                     }
@@ -283,8 +284,9 @@
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp
index ccfbc3b..b67a9bd 100644
--- a/test_conformance/math_brute_force/unary_u_float.cpp
+++ b/test_conformance/math_brute_force/unary_u_float.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 namespace {
@@ -285,8 +286,9 @@
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
diff --git a/test_conformance/pipes/test_pipe_limits.cpp b/test_conformance/pipes/test_pipe_limits.cpp
index 7e97925..e1048f5 100644
--- a/test_conformance/pipes/test_pipe_limits.cpp
+++ b/test_conformance/pipes/test_pipe_limits.cpp
@@ -69,7 +69,7 @@
                   }
           }
           )";
-          // clang-format om
+        // clang-format on
     }
     stream << R"(
     }
diff --git a/test_conformance/pipes/test_pipe_read_write.cpp b/test_conformance/pipes/test_pipe_read_write.cpp
index a502e03..425c7ae 100644
--- a/test_conformance/pipes/test_pipe_read_write.cpp
+++ b/test_conformance/pipes/test_pipe_read_write.cpp
@@ -414,9 +414,9 @@
 static int verify_readwrite_double(void *ptr1, void *ptr2, int n)
 {
     int                i;
-    long long int    sum_input = 0, sum_output = 0;
-    long long int    *inptr = (long long int *)ptr1;
-    long long int    *outptr = (long long int *)ptr2;
+    cl_long sum_input = 0, sum_output = 0;
+    cl_long *inptr = (cl_long *)ptr1;
+    cl_long *outptr = (cl_long *)ptr2;
 
     for(i = 0; i < n; i++)
     {
@@ -1075,7 +1075,8 @@
 
     if(!is_extension_available(deviceID, "cl_khr_fp16"))
     {
-        log_info("cl_khr_fp16 is not supported on this platoform. Skipping test.\n");
+        log_info(
+            "cl_khr_fp16 is not supported on this platform. Skipping test.\n");
         return CL_SUCCESS;
     }
     ptrSizes[0] = sizeof(cl_float) / 2;
@@ -1245,7 +1246,7 @@
 
     size_t  min_alignment = get_min_alignment(context);
 
-    foo = verify_readwrite_long;
+    foo = verify_readwrite_double;
 
     ptrSizes[0] = sizeof(cl_double);
     ptrSizes[1] = ptrSizes[0] << 1;
@@ -1256,7 +1257,8 @@
     //skip devices that don't support double
     if(!is_extension_available(deviceID, "cl_khr_fp64"))
     {
-        log_info("cl_khr_fp64 is not supported on this platoform. Skipping test.\n");
+        log_info(
+            "cl_khr_fp64 is not supported on this platform. Skipping test.\n");
         return CL_SUCCESS;
     }
 
@@ -1403,7 +1405,8 @@
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_int(deviceID, context, queue, num_elements);
@@ -1417,7 +1420,8 @@
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_uint(deviceID, context, queue, num_elements);
@@ -1431,7 +1435,8 @@
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_short(deviceID, context, queue, num_elements);
@@ -1445,7 +1450,8 @@
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_ushort(deviceID, context, queue, num_elements);
@@ -1459,7 +1465,8 @@
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_char(deviceID, context, queue, num_elements);
@@ -1473,7 +1480,8 @@
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_uchar(deviceID, context, queue, num_elements);
@@ -1488,7 +1496,8 @@
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_float(deviceID, context, queue, num_elements);
@@ -1502,7 +1511,8 @@
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_half(deviceID, context, queue, num_elements);
@@ -1516,7 +1526,8 @@
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_long(deviceID, context, queue, num_elements);
@@ -1530,7 +1541,8 @@
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_ulong(deviceID, context, queue, num_elements);
@@ -1544,7 +1556,8 @@
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_double(deviceID, context, queue, num_elements);
@@ -1554,7 +1567,8 @@
 {
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     const char *kernelNames[] = {"test_pipe_subgroup_write_struct","test_pipe_subgroup_read_struct"};
diff --git a/test_conformance/pipes/test_pipe_subgroups.cpp b/test_conformance/pipes/test_pipe_subgroups.cpp
index b3e1718..8e2f6e5 100644
--- a/test_conformance/pipes/test_pipe_subgroups.cpp
+++ b/test_conformance/pipes/test_pipe_subgroups.cpp
@@ -114,9 +114,8 @@
 
     if (!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info(
-            "cl_khr_subgroups is not supported on this platoform. Skipping "
-            "test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
 
diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp
index a32ee4e..d638cd4 100644
--- a/test_conformance/printf/test_printf.cpp
+++ b/test_conformance/printf/test_printf.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -1030,8 +1030,6 @@
         return TEST_SKIP;
     }
 
-    log_info( "Test binary built %s %s\n", __DATE__, __TIME__ );
-
     gFd = acquireOutputStream(&err);
     if (err != 0)
     {
diff --git a/test_conformance/run_conformance.py b/test_conformance/run_conformance.py
index bb8f86f..974491e 100755
--- a/test_conformance/run_conformance.py
+++ b/test_conformance/run_conformance.py
@@ -16,7 +16,6 @@
 import subprocess
 import time
 import tempfile
-import string
 
 DEBUG = 0
 
@@ -27,7 +26,6 @@
 #  to the screen while the tests are running.
 seconds_between_status_updates = 60 * 60 * 24 * 7  # effectively never
 
-
 # Help info
 def write_help_info():
     print("run_conformance.py test_list [CL_DEVICE_TYPE(s) to test] [partial-test-names, ...] [log=path/to/log/file/]")
@@ -66,16 +64,16 @@
         device_specific_match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*,\s*(.+?)\s*$", line)
         if device_specific_match:
             if device_specific_match.group(1) in devices_to_test:
-                test_path = string.replace(device_specific_match.group(3), '/', os.sep)
-                test_name = string.replace(device_specific_match.group(2), '/', os.sep)
+                test_path = str.replace(device_specific_match.group(3), '/', os.sep)
+                test_name = str.replace(device_specific_match.group(2), '/', os.sep)
                 tests.append((test_name, test_path))
             else:
                 print("Skipping " + device_specific_match.group(2) + " because " + device_specific_match.group(1) + " is not in the list of devices to test.")
             continue
         match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*$", line)
         if match:
-            test_path = string.replace(match.group(2), '/', os.sep)
-            test_name = string.replace(match.group(1), '/', os.sep)
+            test_path = str.replace(match.group(2), '/', os.sep)
+            test_name = str.replace(match.group(1), '/', os.sep)
             tests.append((test_name, test_path))
     return tests
 
@@ -243,7 +241,10 @@
             # Catch an interrupt from the user
             write_screen_log("\nFAILED: Execution interrupted.  Killing test process, but not aborting full test run.")
             os.kill(process_pid, 9)
-            answer = raw_input("Abort all tests? (y/n)")
+            if sys.version_info[0] < 3:
+                answer = raw_input("Abort all tests? (y/n)")
+            else:
+                answer = input("Abort all tests? (y/n)")
             if answer.find("y") != -1:
                 write_screen_log("\nUser chose to abort all tests.")
                 log_file.close()
diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp
index 27ee5ff..972a53c 100644
--- a/test_conformance/select/test_select.cpp
+++ b/test_conformance/select/test_select.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -639,7 +639,6 @@
         s_wimpy_mode = true;
     }
 
-    log_info( "Test binary built %s %s\n", __DATE__, __TIME__ );
     if (s_wimpy_mode) {
         log_info("\n");
         log_info("*** WARNING: Testing in Wimpy mode!                     ***\n");
@@ -668,4 +667,3 @@
         log_info( "\t%s\n", test_list[i].name );
     }
 }
-
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index 0944ffb..0a2c390 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -72,7 +72,7 @@
     size_t subgroup_size;
     cl_uint cluster_size;
     bs128 work_items_mask;
-    int dynsc;
+    size_t dynsc;
     bool use_core_subgroups;
     std::vector<bs128> all_work_item_masks;
     int divergence_mask_arg;
@@ -1495,7 +1495,7 @@
     {
         size_t tmp;
         cl_int error;
-        int subgroup_size, num_subgroups;
+        size_t subgroup_size, num_subgroups;
         size_t global = test_params.global_workgroup_size;
         size_t local = test_params.local_workgroup_size;
         clProgramWrapper program;
@@ -1580,7 +1580,7 @@
             return TEST_FAIL;
         }
 
-        subgroup_size = (int)tmp;
+        subgroup_size = tmp;
 
         error = clGetKernelSubGroupInfo_ptr(
             kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
@@ -1593,11 +1593,11 @@
             return TEST_FAIL;
         }
 
-        num_subgroups = (int)tmp;
+        num_subgroups = tmp;
         // Make sure the number of sub groups is what we expect
         if (num_subgroups != (local + subgroup_size - 1) / subgroup_size)
         {
-            log_error("ERROR: unexpected number of subgroups (%d) returned\n",
+            log_error("ERROR: unexpected number of subgroups (%zu) returned\n",
                       num_subgroups);
             return TEST_FAIL;
         }
@@ -1606,13 +1606,12 @@
         std::vector<Ty> odata;
         size_t input_array_size = global;
         size_t output_array_size = global;
-        int dynscl = test_params.dynsc;
+        size_t dynscl = test_params.dynsc;
 
         if (dynscl != 0)
         {
-            input_array_size =
-                (int)global / (int)local * num_subgroups * dynscl;
-            output_array_size = (int)global / (int)local * dynscl;
+            input_array_size = global / local * num_subgroups * dynscl;
+            output_array_size = global / local * dynscl;
         }
 
         idata.resize(input_array_size);
diff --git a/test_conformance/vulkan/main.cpp b/test_conformance/vulkan/main.cpp
index 6cbde5c..2eeb0c3 100644
--- a/test_conformance/vulkan/main.cpp
+++ b/test_conformance/vulkan/main.cpp
@@ -134,7 +134,6 @@
 const size_t bufsize = BUFFERSIZE;
 char buf[BUFFERSIZE];
 cl_uchar uuid[CL_UUID_SIZE_KHR];
-VulkanDevice vkDevice;
 unsigned int numCQ;
 bool multiImport;
 bool multiCtx;
@@ -220,9 +219,12 @@
     if (!checkVkSupport())
     {
         log_info("Vulkan supported GPU not found \n");
+        log_info("TEST SKIPPED \n");
         return 0;
     }
 
+    VulkanDevice vkDevice;
+
     cl_device_type requestedDeviceType = CL_DEVICE_TYPE_GPU;
     char *force_cpu = getenv("CL_DEVICE_TYPE");
     if (force_cpu != NULL)
diff --git a/test_conformance/vulkan/shaders/buffer.comp b/test_conformance/vulkan/shaders/buffer.comp
new file mode 100644
index 0000000..d8756f9
--- /dev/null
+++ b/test_conformance/vulkan/shaders/buffer.comp
@@ -0,0 +1,28 @@
+#version 450
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int8    : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int32   : enable
+
+#define MAX_BUFFERS 5
+
+layout(binding = 0) buffer Params
+{
+  uint32_t numBuffers;
+  uint32_t bufferSize;
+  uint32_t interBufferOffset;
+};
+layout(binding = 1) buffer Buffer
+{
+  uint8_t ptr[];
+} bufferPtrList[MAX_BUFFERS];
+layout(local_size_x = 512) in;
+void main() {
+    for (uint32_t bufIdx = 0; bufIdx < numBuffers; bufIdx++) {
+        uint32_t ptrIdx = gl_GlobalInvocationID.x;
+        uint32_t limit = bufferSize;
+        while (ptrIdx < limit) {
+            bufferPtrList[bufIdx].ptr[ptrIdx]++;
+            ptrIdx += (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
+        }
+    }
+}
\ No newline at end of file
diff --git a/test_conformance/vulkan/shaders/buffer.spv b/test_conformance/vulkan/shaders/buffer.spv
new file mode 100644
index 0000000..685523b
--- /dev/null
+++ b/test_conformance/vulkan/shaders/buffer.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D.comp b/test_conformance/vulkan/shaders/image2D.comp
new file mode 100644
index 0000000..42fa2f7
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D.comp
@@ -0,0 +1,31 @@
+#version 450
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int32   : enable
+
+#define MAX_2D_IMAGES               5
+#define MAX_2D_IMAGE_MIP_LEVELS     11
+#define MAX_2D_IMAGE_DESCRIPTORS    MAX_2D_IMAGES * MAX_2D_IMAGE_MIP_LEVELS
+
+layout(binding = 0) buffer Params
+{
+    uint32_t numImage2DDescriptors;
+};
+layout(binding = 1, rgba32f ) uniform image2D image2DList[ MAX_2D_IMAGE_DESCRIPTORS ];
+layout(local_size_x = 32, local_size_y = 32) in;
+void main() {
+    uvec3 numThreads = gl_NumWorkGroups * gl_WorkGroupSize;
+    for (uint32_t image2DIdx = 0; image2DIdx < numImage2DDescriptors; image2DIdx++) {
+        ivec2 imageDim = imageSize(image2DList[image2DIdx]);
+        uint32_t heightBy2 = imageDim.y / 2;
+        for (uint32_t row = gl_GlobalInvocationID.y; row < heightBy2; row += numThreads.y) {
+            for (uint32_t col = gl_GlobalInvocationID.x; col < imageDim.x; col += numThreads.x) {
+                ivec2 coordsA = ivec2(col, row);
+                ivec2 coordsB = ivec2(col, imageDim.y - row - 1);
+                vec4 dataA = imageLoad(image2DList[image2DIdx], coordsA);
+                vec4 dataB = imageLoad(image2DList[image2DIdx], coordsB);
+                imageStore(image2DList[image2DIdx], coordsA, dataB);
+                imageStore(image2DList[image2DIdx], coordsB, dataA);
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/test_conformance/vulkan/shaders/image2D_r16i.spv b/test_conformance/vulkan/shaders/image2D_r16i.spv
new file mode 100644
index 0000000..00c5c28
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r16i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_r16ui.spv b/test_conformance/vulkan/shaders/image2D_r16ui.spv
new file mode 100644
index 0000000..87514d9
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r16ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_r32f.spv b/test_conformance/vulkan/shaders/image2D_r32f.spv
new file mode 100644
index 0000000..e82c9c1
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r32f.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_r32i.spv b/test_conformance/vulkan/shaders/image2D_r32i.spv
new file mode 100644
index 0000000..7ea8d26
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r32i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_r32ui.spv b/test_conformance/vulkan/shaders/image2D_r32ui.spv
new file mode 100644
index 0000000..dbcdbc5
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r32ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_r8i.spv b/test_conformance/vulkan/shaders/image2D_r8i.spv
new file mode 100644
index 0000000..1a64147
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r8i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_r8ui.spv b/test_conformance/vulkan/shaders/image2D_r8ui.spv
new file mode 100644
index 0000000..a90ccf9
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r8ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg16i.spv b/test_conformance/vulkan/shaders/image2D_rg16i.spv
new file mode 100644
index 0000000..0799617
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg16i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg16ui.spv b/test_conformance/vulkan/shaders/image2D_rg16ui.spv
new file mode 100644
index 0000000..f73e096
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg16ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg32f.spv b/test_conformance/vulkan/shaders/image2D_rg32f.spv
new file mode 100644
index 0000000..1489660
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg32f.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg32i.spv b/test_conformance/vulkan/shaders/image2D_rg32i.spv
new file mode 100644
index 0000000..b7d302f
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg32i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg32ui.spv b/test_conformance/vulkan/shaders/image2D_rg32ui.spv
new file mode 100644
index 0000000..6cf2f1b
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg32ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg8i.spv b/test_conformance/vulkan/shaders/image2D_rg8i.spv
new file mode 100644
index 0000000..a71b9bf
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg8i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg8ui.spv b/test_conformance/vulkan/shaders/image2D_rg8ui.spv
new file mode 100644
index 0000000..2aca929
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg8ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba16i.spv b/test_conformance/vulkan/shaders/image2D_rgba16i.spv
new file mode 100644
index 0000000..0cb95df
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba16i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba16ui.spv b/test_conformance/vulkan/shaders/image2D_rgba16ui.spv
new file mode 100644
index 0000000..84c3d3d
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba16ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba32f.spv b/test_conformance/vulkan/shaders/image2D_rgba32f.spv
new file mode 100644
index 0000000..35136c5
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba32f.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba32i.spv b/test_conformance/vulkan/shaders/image2D_rgba32i.spv
new file mode 100644
index 0000000..4d1ae58
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba32i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba32ui.spv b/test_conformance/vulkan/shaders/image2D_rgba32ui.spv
new file mode 100644
index 0000000..bed86f0
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba32ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba8i.spv b/test_conformance/vulkan/shaders/image2D_rgba8i.spv
new file mode 100644
index 0000000..edf8c58
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba8i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba8ui.spv b/test_conformance/vulkan/shaders/image2D_rgba8ui.spv
new file mode 100644
index 0000000..bb9a770
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba8ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/test_vulkan_api_consistency.cpp b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
index 2987418..f22ac31 100644
--- a/test_conformance/vulkan/test_vulkan_api_consistency.cpp
+++ b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
@@ -238,7 +238,7 @@
     const VulkanMemoryTypeList& memoryTypeList = vkImage2D->getMemoryTypeList();
     uint64_t totalImageMemSize = vkImage2D->getSize();
 
-    log_info("Memory type index: %d\n", (uint32_t)memoryTypeList[0]);
+    log_info("Memory type index: %lu\n", (uint32_t)memoryTypeList[0]);
     log_info("Memory type property: %d\n",
              memoryTypeList[0].getMemoryTypeProperty());
     log_info("Image size : %d\n", totalImageMemSize);
@@ -552,17 +552,17 @@
 
 
     // Pass invalid object to release call
-    errNum = clReleaseSemaphoreObjectKHRptr(NULL);
+    errNum = clReleaseSemaphoreKHRptr(NULL);
     test_failure_error(errNum, CL_INVALID_VALUE,
-                       "clReleaseSemaphoreObjectKHRptr fails with "
+                       "clReleaseSemaphoreKHRptr fails with "
                        "CL_INVALID_VALUE when NULL semaphore object is passed");
 
     // Release both semaphore objects
-    errNum = clReleaseSemaphoreObjectKHRptr(clVk2Clsemaphore);
-    test_error(errNum, "clReleaseSemaphoreObjectKHRptr failed");
+    errNum = clReleaseSemaphoreKHRptr(clVk2Clsemaphore);
+    test_error(errNum, "clReleaseSemaphoreKHRptr failed");
 
-    errNum = clReleaseSemaphoreObjectKHRptr(clCl2Vksemaphore);
-    test_error(errNum, "clReleaseSemaphoreObjectKHRptr failed");
+    errNum = clReleaseSemaphoreKHRptr(clCl2Vksemaphore);
+    test_error(errNum, "clReleaseSemaphoreKHRptr failed");
 
     return TEST_PASS;
 }
diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
index 7daf96d..9b0bc9d 100644
--- a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
+++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
@@ -39,35 +39,6 @@
 };
 }
 
-static const char *vkBufferShader =
-    "#version 450\n"
-    "#extension GL_ARB_separate_shader_objects : enable\n"
-    "#extension GL_NV_gpu_shader5 : enable\n"
-    "layout(binding = 0) buffer Params\n"
-    "{\n"
-    "    uint32_t numBuffers;\n"
-    "    uint32_t bufferSize;\n"
-    "    uint32_t interBufferOffset;\n"
-    "};\n"
-    "layout(binding = 1) buffer Buffer\n"
-    "{\n"
-    "    uint8_t ptr[];\n"
-    "} bufferPtrList[" STRING(
-        MAX_BUFFERS) "];\n"
-                     "layout(local_size_x = 512) in;\n"
-                     "void main() {\n"
-                     "    for (uint32_t bufIdx = 0; bufIdx < numBuffers;"
-                     " bufIdx++) {\n"
-                     "        uint32_t ptrIdx = gl_GlobalInvocationID.x;\n"
-                     "        uint32_t limit = bufferSize;\n"
-                     "        while (ptrIdx < limit) {\n"
-                     "            bufferPtrList[bufIdx].ptr[ptrIdx]++;\n"
-                     "            ptrIdx += (gl_NumWorkGroups.x * "
-                     "gl_WorkGroupSize.x);\n"
-                     "        }\n"
-                     "    }\n"
-                     "}\n";
-
 const char *kernel_text_numbuffer_1 = " \
 __kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a) {  \n\
     int gid = get_global_id(0); \n\
@@ -149,6 +120,8 @@
 
     VulkanQueue &vkQueue = vkDevice.getQueue();
 
+    std::vector<char> vkBufferShader = readFile("buffer.spv");
+
     VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
     VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
         MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
@@ -446,6 +419,7 @@
 
     VulkanQueue &vkQueue = vkDevice.getQueue();
 
+    std::vector<char> vkBufferShader = readFile("buffer.spv");
     VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
     VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
         MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
@@ -716,6 +690,8 @@
 
     VulkanQueue &vkQueue = vkDevice.getQueue();
 
+    std::vector<char> vkBufferShader = readFile("buffer.spv");
+
     VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
     VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
         MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
@@ -1050,6 +1026,8 @@
 
     VulkanQueue &vkQueue = vkDevice.getQueue();
 
+    std::vector<char> vkBufferShader = readFile("buffer.spv");
+
     VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
     VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
         MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
diff --git a/test_conformance/vulkan/test_vulkan_interop_image.cpp b/test_conformance/vulkan/test_vulkan_interop_image.cpp
index f1d0af1..7577de0 100644
--- a/test_conformance/vulkan/test_vulkan_interop_image.cpp
+++ b/test_conformance/vulkan/test_vulkan_interop_image.cpp
@@ -25,8 +25,6 @@
 #define MAX_2D_IMAGE_ELEMENT_SIZE 16
 #define MAX_2D_IMAGE_MIP_LEVELS 11
 #define MAX_2D_IMAGE_DESCRIPTORS MAX_2D_IMAGES *MAX_2D_IMAGE_MIP_LEVELS
-#define GLSL_FORMAT_STRING "<GLSL_FORMAT>"
-#define GLSL_TYPE_PREFIX_STRING "<GLSL_TYPE_PREFIX>"
 #define NUM_THREADS_PER_GROUP_X 32
 #define NUM_THREADS_PER_GROUP_Y 32
 #define NUM_BLOCKS(size, blockSize)                                            \
@@ -54,61 +52,8 @@
 }
 static cl_uchar uuid[CL_UUID_SIZE_KHR];
 static cl_device_id deviceId = NULL;
-
-static const char *vkImage2DShader =
-    "#version 450\n"
-    "#extension GL_ARB_separate_shader_objects : enable\n"
-    "#extension GL_NV_gpu_shader5 : enable\n"
-    "layout(binding = 0) buffer Params\n"
-    "{\n"
-    "    uint32_t numImage2DDescriptors;\n"
-    "};\n"
-    "layout(binding = 1, " GLSL_FORMAT_STRING
-    ") uniform " GLSL_TYPE_PREFIX_STRING "image2D image2DList[" STRING(
-        MAX_2D_IMAGE_DESCRIPTORS) "];\n"
-                                  "layout(local_size_x = 32, local_size_y = "
-                                  "32) in;\n"
-                                  "void main() {\n"
-                                  "    uvec3 numThreads = gl_NumWorkGroups * "
-                                  "gl_WorkGroupSize;\n"
-                                  "    for (uint32_t image2DIdx = 0; "
-                                  "image2DIdx < numImage2DDescriptors; "
-                                  "image2DIdx++)"
-                                  "    {\n"
-                                  "        ivec2 imageDim = "
-                                  "imageSize(image2DList[image2DIdx]);\n"
-                                  "        uint32_t heightBy2 = imageDim.y / "
-                                  "2;\n"
-                                  "        for (uint32_t row = "
-                                  "gl_GlobalInvocationID.y; row < heightBy2; "
-                                  "row += numThreads.y)"
-                                  "        {\n"
-                                  "            for (uint32_t col = "
-                                  "gl_GlobalInvocationID.x; col < imageDim.x; "
-                                  "col += numThreads.x)"
-                                  "            {\n"
-                                  "                ivec2 coordsA = ivec2(col, "
-                                  "row);\n"
-                                  "                ivec2 coordsB = ivec2(col, "
-                                  "imageDim.y - row - 1);\n"
-                                  "                " GLSL_TYPE_PREFIX_STRING
-                                  "vec4 dataA = "
-                                  "imageLoad(image2DList[image2DIdx], "
-                                  "coordsA);\n"
-                                  "                " GLSL_TYPE_PREFIX_STRING
-                                  "vec4 dataB = "
-                                  "imageLoad(image2DList[image2DIdx], "
-                                  "coordsB);\n"
-                                  "                "
-                                  "imageStore(image2DList[image2DIdx], "
-                                  "coordsA, dataB);\n"
-                                  "                "
-                                  "imageStore(image2DList[image2DIdx], "
-                                  "coordsB, dataA);\n"
-                                  "            }\n"
-                                  "        }\n"
-                                  "    }\n"
-                                  "}\n";
+size_t max_width = MAX_2D_IMAGE_WIDTH;
+size_t max_height = MAX_2D_IMAGE_HEIGHT;
 
 const char *kernel_text_numImage_1 = " \
 __constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
@@ -268,8 +213,8 @@
                             VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
     vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
 
-    uint64_t maxImage2DSize = MAX_2D_IMAGE_WIDTH * MAX_2D_IMAGE_HEIGHT
-        * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
+    uint64_t maxImage2DSize =
+        max_width * max_height * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
     VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize);
     VulkanDeviceMemory vkSrcBufferDeviceMemory(
         vkDevice, vkSrcBuffer.getSize(),
@@ -310,6 +255,12 @@
     clCl2VkExternalSemaphore = new clExternalSemaphore(
         vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
 
+    std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory1;
+    std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory2;
+    std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory1;
+    std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory2;
+    std::vector<char> vkImage2DShader;
+
     for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
     {
         VulkanFormat vkFormat = vkFormatList[fIdx];
@@ -317,15 +268,13 @@
         uint32_t elementSize = getVulkanFormatElementSize(vkFormat);
         ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE);
         log_info("elementSize= %d\n", elementSize);
-        std::map<std::string, std::string> patternToSubstituteMap;
-        patternToSubstituteMap[GLSL_FORMAT_STRING] =
-            getVulkanFormatGLSLFormat(vkFormat);
-        patternToSubstituteMap[GLSL_TYPE_PREFIX_STRING] =
-            getVulkanFormatGLSLTypePrefix(vkFormat);
 
-        VulkanShaderModule vkImage2DShaderModule(
-            vkDevice,
-            prepareVulkanShader(vkImage2DShader, patternToSubstituteMap));
+        std::string fileName = "image2D_"
+            + std::string(getVulkanFormatGLSLFormat(vkFormat)) + ".spv";
+        log_info("Load %s file", fileName.c_str());
+        vkImage2DShader = readFile(fileName);
+        VulkanShaderModule vkImage2DShaderModule(vkDevice, vkImage2DShader);
+
         VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
                                                 vkImage2DShaderModule);
 
@@ -333,13 +282,13 @@
         {
             uint32_t width = widthList[wIdx];
             log_info("Width: %d\n", width);
-            ASSERT_LEQ(width, (uint32_t)MAX_2D_IMAGE_WIDTH);
+            if (width > max_width) continue;
             region[0] = width;
             for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++)
             {
                 uint32_t height = heightList[hIdx];
                 log_info("Height: %d", height);
-                ASSERT_LEQ(height, (uint32_t)MAX_2D_IMAGE_HEIGHT);
+                if (height > max_height) continue;
                 region[1] = height;
 
                 uint32_t numMipLevels = 1;
@@ -418,14 +367,6 @@
                         const VulkanMemoryTypeList &memoryTypeList =
                             vkDummyImage2D.getMemoryTypeList();
 
-                        std::vector<VulkanDeviceMemory *>
-                            vkNonDedicatedImage2DListDeviceMemory1;
-                        std::vector<VulkanDeviceMemory *>
-                            vkNonDedicatedImage2DListDeviceMemory2;
-                        std::vector<clExternalMemoryImage *>
-                            nonDedicatedExternalMemory1;
-                        std::vector<clExternalMemoryImage *>
-                            nonDedicatedExternalMemory2;
                         for (size_t mtIdx = 0; mtIdx < memoryTypeList.size();
                              mtIdx++)
                         {
@@ -834,6 +775,8 @@
                 }
             }
         }
+
+        vkImage2DShader.clear();
     }
 CLEANUP:
     if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
@@ -866,8 +809,8 @@
                             VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
     vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
 
-    uint64_t maxImage2DSize = MAX_2D_IMAGE_WIDTH * MAX_2D_IMAGE_HEIGHT
-        * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
+    uint64_t maxImage2DSize =
+        max_width * max_height * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
     VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize);
     VulkanDeviceMemory vkSrcBufferDeviceMemory(
         vkDevice, vkSrcBuffer.getSize(),
@@ -908,6 +851,12 @@
     clCl2VkExternalSemaphore = new clExternalSemaphore(
         vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
 
+    std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory1;
+    std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory2;
+    std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory1;
+    std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory2;
+    std::vector<char> vkImage2DShader;
+
     for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
     {
         VulkanFormat vkFormat = vkFormatList[fIdx];
@@ -915,15 +864,13 @@
         uint32_t elementSize = getVulkanFormatElementSize(vkFormat);
         ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE);
         log_info("elementSize= %d\n", elementSize);
-        std::map<std::string, std::string> patternToSubstituteMap;
-        patternToSubstituteMap[GLSL_FORMAT_STRING] =
-            getVulkanFormatGLSLFormat(vkFormat);
-        patternToSubstituteMap[GLSL_TYPE_PREFIX_STRING] =
-            getVulkanFormatGLSLTypePrefix(vkFormat);
 
-        VulkanShaderModule vkImage2DShaderModule(
-            vkDevice,
-            prepareVulkanShader(vkImage2DShader, patternToSubstituteMap));
+        std::string fileName = "image2D_"
+            + std::string(getVulkanFormatGLSLFormat(vkFormat)) + ".spv";
+        log_info("Load %s file", fileName.c_str());
+        vkImage2DShader = readFile(fileName);
+        VulkanShaderModule vkImage2DShaderModule(vkDevice, vkImage2DShader);
+
         VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
                                                 vkImage2DShaderModule);
 
@@ -931,13 +878,13 @@
         {
             uint32_t width = widthList[wIdx];
             log_info("Width: %d\n", width);
-            ASSERT_LEQ(width, (uint32_t)MAX_2D_IMAGE_WIDTH);
+            if (width > max_width) continue;
             region[0] = width;
             for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++)
             {
                 uint32_t height = heightList[hIdx];
                 log_info("Height: %d\n", height);
-                ASSERT_LEQ(height, (uint32_t)MAX_2D_IMAGE_HEIGHT);
+                if (height > max_height) continue;
                 region[1] = height;
 
                 uint32_t numMipLevels = 1;
@@ -1016,14 +963,6 @@
                         const VulkanMemoryTypeList &memoryTypeList =
                             vkDummyImage2D.getMemoryTypeList();
 
-                        std::vector<VulkanDeviceMemory *>
-                            vkNonDedicatedImage2DListDeviceMemory1;
-                        std::vector<VulkanDeviceMemory *>
-                            vkNonDedicatedImage2DListDeviceMemory2;
-                        std::vector<clExternalMemoryImage *>
-                            nonDedicatedExternalMemory1;
-                        std::vector<clExternalMemoryImage *>
-                            nonDedicatedExternalMemory2;
                         for (size_t mtIdx = 0; mtIdx < memoryTypeList.size();
                              mtIdx++)
                         {
@@ -1368,6 +1307,7 @@
                 }
             }
         }
+        vkImage2DShader.clear();
     }
 CLEANUP:
     if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
@@ -1494,6 +1434,14 @@
         goto CLEANUP;
     }
     deviceId = devices[device_no];
+    err = setMaxImageDimensions(deviceId, max_width, max_height);
+    if (CL_SUCCESS != err)
+    {
+        print_error(err, "error setting max image dimensions");
+        goto CLEANUP;
+    }
+    log_info("Set max_width to %lu and max_height to %lu\n", max_width,
+             max_height);
     context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
                                       NULL, NULL, &err);
     if (CL_SUCCESS != err)
diff --git a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp
index 136818f..9d9a660 100644
--- a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp
+++ b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp
@@ -23,6 +23,7 @@
 #include <stdexcept>
 
 #define ASSERT(x) assert((x))
+#define GB(x) ((unsigned long long)(x) << 30)
 
 pfnclCreateSemaphoreWithPropertiesKHR clCreateSemaphoreWithPropertiesKHRptr;
 pfnclEnqueueWaitSemaphoresKHR clEnqueueWaitSemaphoresKHRptr;
@@ -31,7 +32,7 @@
     clEnqueueAcquireExternalMemObjectsKHRptr;
 pfnclEnqueueReleaseExternalMemObjectsKHR
     clEnqueueReleaseExternalMemObjectsKHRptr;
-pfnclReleaseSemaphoreObjectKHR clReleaseSemaphoreObjectKHRptr;
+pfnclReleaseSemaphoreKHR clReleaseSemaphoreKHRptr;
 
 void init_cl_vk_ext(cl_platform_id opencl_platform)
 {
@@ -51,13 +52,13 @@
         throw std::runtime_error("Failed to get the function pointer of "
                                  "clEnqueueSignalSemaphoresKHRptr!");
     }
-    clReleaseSemaphoreObjectKHRptr = (pfnclReleaseSemaphoreObjectKHR)
-        clGetExtensionFunctionAddressForPlatform(opencl_platform,
-                                                 "clReleaseSemaphoreObjectKHR");
-    if (NULL == clReleaseSemaphoreObjectKHRptr)
+    clReleaseSemaphoreKHRptr =
+        (pfnclReleaseSemaphoreKHR)clGetExtensionFunctionAddressForPlatform(
+            opencl_platform, "clReleaseSemaphoreKHR");
+    if (NULL == clReleaseSemaphoreKHRptr)
     {
         throw std::runtime_error("Failed to get the function pointer of "
-                                 "clReleaseSemaphoreObjectKHRptr!");
+                                 "clReleaseSemaphoreKHRptr!");
     }
     clCreateSemaphoreWithPropertiesKHRptr =
         (pfnclCreateSemaphoreWithPropertiesKHR)
@@ -70,6 +71,40 @@
     }
 }
 
+cl_int setMaxImageDimensions(cl_device_id deviceID, size_t &max_width,
+                             size_t &max_height)
+{
+    cl_int result = CL_SUCCESS;
+    cl_ulong val;
+    size_t paramSize;
+
+    result = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE,
+                             sizeof(cl_ulong), &val, &paramSize);
+
+    if (result != CL_SUCCESS)
+    {
+        return result;
+    }
+
+    if (val < GB(4))
+    {
+        max_width = 256;
+        max_height = 256;
+    }
+    else if (val < GB(8))
+    {
+        max_width = 512;
+        max_height = 256;
+    }
+    else
+    {
+        max_width = 1024;
+        max_height = 512;
+    }
+
+    return result;
+}
+
 cl_int getCLFormatFromVkFormat(VkFormat vkFormat,
                                cl_image_format *clImageFormat)
 {
@@ -798,10 +833,10 @@
 
 clExternalSemaphore::~clExternalSemaphore()
 {
-    cl_int err = clReleaseSemaphoreObjectKHRptr(m_externalSemaphore);
+    cl_int err = clReleaseSemaphoreKHRptr(m_externalSemaphore);
     if (err != CL_SUCCESS)
     {
-        throw std::runtime_error("clReleaseSemaphoreObjectKHR failed!");
+        throw std::runtime_error("clReleaseSemaphoreKHR failed!");
     }
 }
 
diff --git a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp
index c1d2a76..d9f8dcc 100644
--- a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp
+++ b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp
@@ -49,7 +49,7 @@
     cl_command_queue command_queue, cl_uint num_mem_objects,
     const cl_mem *mem_objects, cl_uint num_events_in_wait_list,
     const cl_event *event_wait_list, cl_event *event);
-typedef cl_int (*pfnclReleaseSemaphoreObjectKHR)(cl_semaphore_khr sema_object);
+typedef cl_int (*pfnclReleaseSemaphoreKHR)(cl_semaphore_khr sema_object);
 
 extern pfnclCreateSemaphoreWithPropertiesKHR
     clCreateSemaphoreWithPropertiesKHRptr;
@@ -59,7 +59,7 @@
     clEnqueueAcquireExternalMemObjectsKHRptr;
 extern pfnclEnqueueReleaseExternalMemObjectsKHR
     clEnqueueReleaseExternalMemObjectsKHRptr;
-extern pfnclReleaseSemaphoreObjectKHR clReleaseSemaphoreObjectKHRptr;
+extern pfnclReleaseSemaphoreKHR clReleaseSemaphoreKHRptr;
 
 cl_int getCLImageInfoFromVkImageInfo(const VkImageCreateInfo *, size_t,
                                      cl_image_format *, cl_image_desc *);
@@ -69,6 +69,8 @@
 cl_int check_external_semaphore_handle_type(
     cl_device_id deviceID,
     cl_external_semaphore_handle_type_khr requiredHandleType);
+cl_int setMaxImageDimensions(cl_device_id deviceID, size_t &width,
+                             size_t &height);
 
 class clExternalMemory {
 protected:
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp
index 831403e..5220677 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp
@@ -37,7 +37,7 @@
     virtual size_t size() const;
     virtual const VulkanWrapper &operator[](size_t idx) const;
     virtual VulkanWrapper &operator[](size_t idx);
-    virtual operator const VulkanNative *() const;
+    virtual const VulkanNative *operator()() const;
 };
 
 template <class VulkanKey, class VulkanValue> class VulkanMap {
@@ -335,15 +335,12 @@
 template <class VulkanWrapper, class VulkanNative>
 VulkanWrapper &VulkanList<VulkanWrapper, VulkanNative>::operator[](size_t idx)
 {
-    if (idx < m_wrapperList.size())
-    {
-        // CHECK_LT(idx, m_wrapperList.size());
-        return m_wrapperList[idx].get();
-    }
+    // CHECK_LT(idx, m_wrapperList.size());
+    return m_wrapperList[idx].get();
 }
 
 template <class VulkanWrapper, class VulkanNative>
-VulkanList<VulkanWrapper, VulkanNative>::operator const VulkanNative *() const
+const VulkanNative *VulkanList<VulkanWrapper, VulkanNative>::operator()() const
 {
     return m_nativeList.data();
 }
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp
index 81e1262..1a313cc 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp
@@ -18,6 +18,7 @@
 #include "vulkan_wrapper.hpp"
 #include <assert.h>
 #include <iostream>
+#include <fstream>
 #include <set>
 #include <string>
 #include <CL/cl.h>
@@ -182,7 +183,7 @@
     const VulkanInstance &instance = getVulkanInstance();
     const VulkanPhysicalDeviceList &physicalDeviceList =
         instance.getPhysicalDeviceList();
-    if (physicalDeviceList == NULL)
+    if (physicalDeviceList() == NULL)
     {
         std::cout << "physicalDeviceList is null, No GPUs found with "
                      "Vulkan support !!!\n";
@@ -541,59 +542,6 @@
     return (const char *)size_t(0);
 }
 
-const char *getVulkanFormatGLSLTypePrefix(VulkanFormat format)
-{
-    switch (format)
-    {
-        case VULKAN_FORMAT_R8_UINT:
-        case VULKAN_FORMAT_R8G8_UINT:
-        case VULKAN_FORMAT_R8G8B8A8_UINT:
-        case VULKAN_FORMAT_R16_UINT:
-        case VULKAN_FORMAT_R16G16_UINT:
-        case VULKAN_FORMAT_R16G16B16A16_UINT:
-        case VULKAN_FORMAT_R32_UINT:
-        case VULKAN_FORMAT_R32G32_UINT:
-        case VULKAN_FORMAT_R32G32B32A32_UINT: return "u";
-
-        case VULKAN_FORMAT_R8_SINT:
-        case VULKAN_FORMAT_R8G8_SINT:
-        case VULKAN_FORMAT_R8G8B8A8_SINT:
-        case VULKAN_FORMAT_R16_SINT:
-        case VULKAN_FORMAT_R16G16_SINT:
-        case VULKAN_FORMAT_R16G16B16A16_SINT:
-        case VULKAN_FORMAT_R32_SINT:
-        case VULKAN_FORMAT_R32G32_SINT:
-        case VULKAN_FORMAT_R32G32B32A32_SINT: return "i";
-
-        case VULKAN_FORMAT_R32_SFLOAT:
-        case VULKAN_FORMAT_R32G32_SFLOAT:
-        case VULKAN_FORMAT_R32G32B32A32_SFLOAT: return "";
-
-        default: ASSERT(0); std::cout << "Unknown format";
-    }
-
-    return "";
-}
-
-std::string prepareVulkanShader(
-    std::string shaderCode,
-    const std::map<std::string, std::string> &patternToSubstituteMap)
-{
-    for (std::map<std::string, std::string>::const_iterator psIt =
-             patternToSubstituteMap.begin();
-         psIt != patternToSubstituteMap.end(); ++psIt)
-    {
-        std::string::size_type pos = 0u;
-        while ((pos = shaderCode.find(psIt->first, pos)) != std::string::npos)
-        {
-            shaderCode.replace(pos, psIt->first.length(), psIt->second);
-            pos += psIt->second.length();
-        }
-    }
-
-    return shaderCode;
-}
-
 std::ostream &operator<<(std::ostream &os,
                          VulkanMemoryTypeProperty memoryTypeProperty)
 {
@@ -691,3 +639,54 @@
 
     return os;
 }
+
+static char *findFilePath(const std::string filename)
+{
+    const char *searchPath[] = {
+        "./", // Same dir
+        "./shaders/", // In shaders folder in same dir
+        "../test_conformance/vulkan/shaders/" // In src folder
+    };
+    for (unsigned int i = 0; i < sizeof(searchPath) / sizeof(char *); ++i)
+    {
+        std::string path(searchPath[i]);
+
+        path.append(filename);
+        FILE *fp;
+        fp = fopen(path.c_str(), "rb");
+
+        if (fp != NULL)
+        {
+            fclose(fp);
+            // File found
+            char *file_path = (char *)(malloc(path.length() + 1));
+            strncpy(file_path, path.c_str(), path.length() + 1);
+            return file_path;
+        }
+        if (fp)
+        {
+            fclose(fp);
+        }
+    }
+    // File not found
+    return 0;
+}
+
+std::vector<char> readFile(const std::string &filename)
+{
+    char *file_path = findFilePath(filename);
+
+    std::ifstream file(file_path, std::ios::ate | std::ios::binary);
+
+    if (!file.is_open())
+    {
+        throw std::runtime_error("failed to open shader spv file!\n");
+    }
+    size_t fileSize = (size_t)file.tellg();
+    std::vector<char> buffer(fileSize);
+    file.seekg(0);
+    file.read(buffer.data(), fileSize);
+    file.close();
+    printf("filesize is %d", fileSize);
+    return buffer;
+}
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp
index 7022fd5..04f5a59 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp
@@ -66,4 +66,5 @@
            VulkanExternalSemaphoreHandleType externalSemaphoreHandleType);
 std::ostream& operator<<(std::ostream& os, VulkanFormat format);
 
+std::vector<char> readFile(const std::string& filename);
 #endif // _vulkan_utility_hpp_
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp
index c044e00..6209a74 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp
@@ -201,7 +201,8 @@
 
     if (physicalDeviceCount == uint32_t(0))
     {
-        throw std::runtime_error("failed to find GPUs with Vulkan support!");
+        std::cout << "failed to find GPUs with Vulkan support!\n";
+        return;
     }
 
     std::vector<VkPhysicalDevice> vkPhysicalDeviceList(physicalDeviceCount,
@@ -625,12 +626,12 @@
     vkSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
     vkSubmitInfo.pNext = NULL;
     vkSubmitInfo.waitSemaphoreCount = (uint32_t)waitSemaphoreList.size();
-    vkSubmitInfo.pWaitSemaphores = waitSemaphoreList;
+    vkSubmitInfo.pWaitSemaphores = waitSemaphoreList();
     vkSubmitInfo.pWaitDstStageMask = vkPipelineStageFlagsList.data();
     vkSubmitInfo.commandBufferCount = (uint32_t)commandBufferList.size();
-    vkSubmitInfo.pCommandBuffers = commandBufferList;
+    vkSubmitInfo.pCommandBuffers = commandBufferList();
     vkSubmitInfo.signalSemaphoreCount = (uint32_t)signalSemaphoreList.size();
-    vkSubmitInfo.pSignalSemaphores = signalSemaphoreList;
+    vkSubmitInfo.pSignalSemaphores = signalSemaphoreList();
 
     vkQueueSubmit(m_vkQueue, 1, &vkSubmitInfo, NULL);
 }
@@ -728,7 +729,8 @@
     vkDescriptorSetLayoutCreateInfo.flags = 0;
     vkDescriptorSetLayoutCreateInfo.bindingCount =
         (uint32_t)descriptorSetLayoutBindingList.size();
-    vkDescriptorSetLayoutCreateInfo.pBindings = descriptorSetLayoutBindingList;
+    vkDescriptorSetLayoutCreateInfo.pBindings =
+        descriptorSetLayoutBindingList();
 
     vkCreateDescriptorSetLayout(m_device, &vkDescriptorSetLayoutCreateInfo,
                                 NULL, &m_vkDescriptorSetLayout);
@@ -799,7 +801,7 @@
     vkPipelineLayoutCreateInfo.flags = 0;
     vkPipelineLayoutCreateInfo.setLayoutCount =
         (uint32_t)descriptorSetLayoutList.size();
-    vkPipelineLayoutCreateInfo.pSetLayouts = descriptorSetLayoutList;
+    vkPipelineLayoutCreateInfo.pSetLayouts = descriptorSetLayoutList();
     vkPipelineLayoutCreateInfo.pushConstantRangeCount = 0;
     vkPipelineLayoutCreateInfo.pPushConstantRanges = NULL;
 
@@ -846,23 +848,18 @@
 {}
 
 VulkanShaderModule::VulkanShaderModule(const VulkanDevice &device,
-                                       const std::string &code)
+                                       const std::vector<char> &code)
     : m_device(device)
 {
-    std::string paddedCode = code;
-    while (paddedCode.size() % 4)
-    {
-        paddedCode += " ";
-    }
 
     VkShaderModuleCreateInfo vkShaderModuleCreateInfo = {};
     vkShaderModuleCreateInfo.sType =
         VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
     vkShaderModuleCreateInfo.pNext = NULL;
     vkShaderModuleCreateInfo.flags = 0;
-    vkShaderModuleCreateInfo.codeSize = paddedCode.size();
+    vkShaderModuleCreateInfo.codeSize = code.size();
     vkShaderModuleCreateInfo.pCode =
-        (const uint32_t *)(void *)paddedCode.c_str();
+        reinterpret_cast<const uint32_t *>(code.data());
 
     vkCreateShaderModule(m_device, &vkShaderModuleCreateInfo, NULL,
                          &m_vkShaderModule);
@@ -1577,7 +1574,7 @@
     vkImageCreateInfo.queueFamilyIndexCount =
         (uint32_t)m_device.getPhysicalDevice().getQueueFamilyList().size();
     vkImageCreateInfo.pQueueFamilyIndices =
-        m_device.getPhysicalDevice().getQueueFamilyList();
+        m_device.getPhysicalDevice().getQueueFamilyList()();
     vkImageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
 
     VkExternalMemoryImageCreateInfo vkExternalMemoryImageCreateInfo = {};
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp
index 1f68a92..37925ee 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp
@@ -240,7 +240,8 @@
     VulkanShaderModule(const VulkanShaderModule &shaderModule);
 
 public:
-    VulkanShaderModule(const VulkanDevice &device, const std::string &code);
+    VulkanShaderModule(const VulkanDevice &device,
+                       const std::vector<char> &code);
     virtual ~VulkanShaderModule();
     operator VkShaderModule() const;
 };