Upgrade OpenCL-CTS to 90a5183ec499d5b4701f58f6134dd424d82c4dca am: aba6556309 am: f089d50c94 am: 85f8a87730
Original change: https://android-review.googlesource.com/c/platform/external/OpenCL-CTS/+/2271531
Change-Id: Ifc65a9be0007fffcbdb8a9d536b1dd0cda9e0e3b
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
diff --git a/CMakeLists.txt b/CMakeLists.txt
index b7c86ba..6a25d5b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -113,6 +113,17 @@
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__")
endif()
+# Set a module's COMPILE_FLAGS if using gcc or clang.
+macro(set_gnulike_module_compile_flags flags)
+ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
+ SET_SOURCE_FILES_PROPERTIES(
+ ${${MODULE_NAME}_SOURCES}
+ PROPERTIES
+ COMPILE_FLAGS ${flags}
+ )
+ endif()
+endmacro(set_gnulike_module_compile_flags)
+
if(MSVC)
# Don't warn when using standard non-secure functions.
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
diff --git a/METADATA b/METADATA
index 3f60212..235bc5a 100644
--- a/METADATA
+++ b/METADATA
@@ -1,13 +1,19 @@
-name: "OpenCL-CTS"
-description:
- "OpenCL Conformance Tests"
+# This project was upgraded with external_updater.
+# Usage: tools/external_updater/updater.sh update OpenCL-CTS
+# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md
+name: "OpenCL-CTS"
+description: "OpenCL Conformance Tests"
third_party {
url {
type: GIT
value: "https://github.com/KhronosGroup/OpenCL-CTS.git"
}
- version: "a87e686757f9fda5377baf73a32bb3c791eae70c"
- last_upgrade_date { year: 2022 month: 9 day: 18 }
+ version: "90a5183ec499d5b4701f58f6134dd424d82c4dca"
license_type: NOTICE
+ last_upgrade_date {
+ year: 2022
+ month: 10
+ day: 26
+ }
}
diff --git a/test_common/harness/conversions.cpp b/test_common/harness/conversions.cpp
index c773126..d52a2ac 100644
--- a/test_common/harness/conversions.cpp
+++ b/test_common/harness/conversions.cpp
@@ -14,6 +14,7 @@
// limitations under the License.
//
#include "conversions.h"
+#include <cinttypes>
#include <limits.h>
#include <time.h>
#include <assert.h>
@@ -50,10 +51,10 @@
case kInt: sprintf(string, "%d", *((cl_int *)data)); return;
case kUInt:
case kUnsignedInt: sprintf(string, "%u", *((cl_uint *)data)); return;
- case kLong: sprintf(string, "%lld", *((cl_long *)data)); return;
+ case kLong: sprintf(string, "%" PRId64 "", *((cl_long *)data)); return;
case kULong:
case kUnsignedLong:
- sprintf(string, "%llu", *((cl_ulong *)data));
+ sprintf(string, "%" PRIu64 "", *((cl_ulong *)data));
return;
case kFloat: sprintf(string, "%f", *((cl_float *)data)); return;
case kHalf: sprintf(string, "half"); return;
diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp
index 3dbdffa..f1694e8 100644
--- a/test_common/harness/imageHelpers.cpp
+++ b/test_common/harness/imageHelpers.cpp
@@ -23,6 +23,7 @@
#include <malloc.h>
#endif
#include <algorithm>
+#include <cinttypes>
#include <iterator>
#if !defined(_WIN32)
#include <cmath>
@@ -421,7 +422,7 @@
(int)thirdDim, (int)imageInfo->rowPitch,
(int)imageInfo->rowPitch
- (int)imageInfo->width * (int)pixel_size);
- log_error("Failed at column: %ld ", where);
+ log_error("Failed at column: %zu ", where);
switch (pixel_size)
{
@@ -454,7 +455,7 @@
((cl_ushort *)destPixel)[1], ((cl_ushort *)destPixel)[2]);
break;
case 8:
- log_error("*0x%16.16llx vs. 0x%16.16llx\n",
+ log_error("*0x%16.16" PRIx64 " vs. 0x%16.16" PRIx64 "\n",
((cl_ulong *)sourcePixel)[0], ((cl_ulong *)destPixel)[0]);
break;
case 12:
@@ -473,7 +474,7 @@
((cl_uint *)destPixel)[2], ((cl_uint *)destPixel)[3]);
break;
default:
- log_error("Don't know how to print pixel size of %ld\n",
+ log_error("Don't know how to print pixel size of %zu\n",
pixel_size);
break;
}
diff --git a/test_common/harness/mt19937.h b/test_common/harness/mt19937.h
index 98eec84..447ca25 100644
--- a/test_common/harness/mt19937.h
+++ b/test_common/harness/mt19937.h
@@ -94,23 +94,42 @@
bool genrand_bool(MTdata /*data*/);
#include <cassert>
+#include <utility>
-struct MTdataHolder
-{
- MTdataHolder(cl_uint seed)
+class MTdataHolder {
+public:
+ MTdataHolder() = default;
+ explicit MTdataHolder(cl_uint seed)
{
m_mtdata = init_genrand(seed);
assert(m_mtdata != nullptr);
}
- MTdataHolder(MTdata mtdata): m_mtdata(mtdata) {}
+ // Forbid copy.
+ MTdataHolder(const MTdataHolder&) = delete;
+ MTdataHolder& operator=(const MTdataHolder&) = delete;
- ~MTdataHolder() { free_mtdata(m_mtdata); }
+ // Support move semantics.
+ MTdataHolder(MTdataHolder&& h) { std::swap(m_mtdata, h.m_mtdata); }
+ MTdataHolder& operator=(MTdataHolder&& h)
+ {
+ std::swap(m_mtdata, h.m_mtdata);
+ return *this;
+ }
- operator MTdata() const { return m_mtdata; }
+ ~MTdataHolder()
+ {
+ if (m_mtdata) free_mtdata(m_mtdata);
+ }
+
+ operator MTdata() const
+ {
+ assert(m_mtdata && "Object wasn't initialised");
+ return m_mtdata;
+ }
private:
- MTdata m_mtdata;
+ MTdata m_mtdata = nullptr;
};
#endif // #ifdef __cplusplus
diff --git a/test_common/harness/propertyHelpers.cpp b/test_common/harness/propertyHelpers.cpp
index e368f9b..6a10c07 100644
--- a/test_common/harness/propertyHelpers.cpp
+++ b/test_common/harness/propertyHelpers.cpp
@@ -19,6 +19,7 @@
#include <assert.h>
#include <algorithm>
+#include <cinttypes>
#include <vector>
static bool findProperty(const std::vector<cl_properties>& props,
@@ -97,16 +98,16 @@
if (!found)
{
- log_error("ERROR: expected property 0x%llx not found!\n",
+ log_error("ERROR: expected property 0x%" PRIx64 " not found!\n",
check_prop);
return TEST_FAIL;
}
else if (check_value != queried_value)
{
- log_error(
- "ERROR: mis-matched value for property 0x%llx: wanted "
- "0x%llx, got 0x%llx\n",
- check_prop, check_value, queried_value);
+ log_error("ERROR: mis-matched value for property 0x%" PRIx64
+ ": wanted "
+ "0x%" PRIx64 ", got 0x%" PRIx64 "\n",
+ check_prop, check_value, queried_value);
return TEST_FAIL;
}
}
diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp
index b386391..a309f53 100644
--- a/test_common/harness/testHarness.cpp
+++ b/test_common/harness/testHarness.cpp
@@ -60,6 +60,54 @@
#define DEFAULT_NUM_ELEMENTS 0x4000
+static int saveResultsToJson(const char *suiteName, test_definition testList[],
+ unsigned char selectedTestList[],
+ test_status resultTestList[], int testNum)
+{
+ char *fileName = getenv("CL_CONFORMANCE_RESULTS_FILENAME");
+ if (fileName == nullptr)
+ {
+ return EXIT_SUCCESS;
+ }
+
+ FILE *file = fopen(fileName, "w");
+ if (NULL == file)
+ {
+ log_error("ERROR: Failed to open '%s' for writing results.\n",
+ fileName);
+ return EXIT_FAILURE;
+ }
+
+ const char *save_map[] = { "success", "failure" };
+ const char *result_map[] = { "pass", "fail", "skip" };
+ const char *linebreak[] = { "", ",\n" };
+ int add_linebreak = 0;
+
+ fprintf(file, "{\n");
+ fprintf(file, "\t\"cmd\": \"%s\",\n", suiteName);
+ fprintf(file, "\t\"results\": {\n");
+
+ for (int i = 0; i < testNum; ++i)
+ {
+ if (selectedTestList[i])
+ {
+ fprintf(file, "%s\t\t\"%s\": \"%s\"", linebreak[add_linebreak],
+ testList[i].name, result_map[(int)resultTestList[i]]);
+ add_linebreak = 1;
+ }
+ }
+ fprintf(file, "\n");
+
+ fprintf(file, "\t}\n");
+ fprintf(file, "}\n");
+
+ int ret = fclose(file) ? EXIT_FAILURE : EXIT_SUCCESS;
+
+ log_info("Saving results to %s: %s!\n", fileName, save_map[ret]);
+
+ return ret;
+}
+
int runTestHarness(int argc, const char *argv[], int testNum,
test_definition testList[], int forceNoContextCreation,
cl_command_queue_properties queueProps)
@@ -68,19 +116,28 @@
forceNoContextCreation, queueProps, NULL);
}
-int skip_init_info(int count)
+int suite_did_not_pass_init(const char *suiteName, test_status status,
+ int testNum, test_definition testList[])
{
- log_info("Test skipped while initialization\n");
- log_info("SKIPPED %d of %d tests.\n", count, count);
- return EXIT_SUCCESS;
+ std::vector<unsigned char> selectedTestList(testNum, 1);
+ std::vector<test_status> resultTestList(testNum, status);
+
+ int ret = saveResultsToJson(suiteName, testList, selectedTestList.data(),
+ resultTestList.data(), testNum);
+
+ log_info("Test %s while initialization\n",
+ status == TEST_SKIP ? "skipped" : "failed");
+ log_info("%s %d of %d tests.\n", status == TEST_SKIP ? "SKIPPED" : "FAILED",
+ testNum, testNum);
+
+ if (ret != EXIT_SUCCESS)
+ {
+ return ret;
+ }
+
+ return status == TEST_SKIP ? EXIT_SUCCESS : EXIT_FAILURE;
}
-int fail_init_info(int count)
-{
- log_info("Test failed while initialization\n");
- log_info("FAILED %d of %d tests.\n", count, count);
- return EXIT_FAILURE;
-}
void version_expected_info(const char *test_name, const char *api_name,
const char *expected_version,
const char *device_version)
@@ -470,6 +527,7 @@
log_error("Invalid device address bit size returned by device.\n");
return EXIT_FAILURE;
}
+ const char *suiteName = argv[0];
if (gCompilationMode == kSpir_v)
{
test_status spirv_readiness = check_spirv_compilation_readiness(device);
@@ -478,9 +536,15 @@
switch (spirv_readiness)
{
case TEST_PASS: break;
- case TEST_FAIL: return fail_init_info(testNum);
- case TEST_SKIP: return skip_init_info(testNum);
- case TEST_SKIPPED_ITSELF: return skip_init_info(testNum);
+ case TEST_FAIL:
+ return suite_did_not_pass_init(suiteName, TEST_FAIL,
+ testNum, testList);
+ case TEST_SKIP:
+ return suite_did_not_pass_init(suiteName, TEST_SKIP,
+ testNum, testList);
+ case TEST_SKIPPED_ITSELF:
+ return suite_did_not_pass_init(suiteName, TEST_SKIP,
+ testNum, testList);
}
}
}
@@ -492,9 +556,15 @@
switch (status)
{
case TEST_PASS: break;
- case TEST_FAIL: return fail_init_info(testNum);
- case TEST_SKIP: return skip_init_info(testNum);
- case TEST_SKIPPED_ITSELF: return skip_init_info(testNum);
+ case TEST_FAIL:
+ return suite_did_not_pass_init(suiteName, TEST_FAIL, testNum,
+ testList);
+ case TEST_SKIP:
+ return suite_did_not_pass_init(suiteName, TEST_SKIP, testNum,
+ testList);
+ case TEST_SKIPPED_ITSELF:
+ return suite_did_not_pass_init(suiteName, TEST_SKIP, testNum,
+ testList);
}
}
@@ -574,49 +644,6 @@
return EXIT_SUCCESS;
}
-static int saveResultsToJson(const char *fileName, const char *suiteName,
- test_definition testList[],
- unsigned char selectedTestList[],
- test_status resultTestList[], int testNum)
-{
- FILE *file = fopen(fileName, "w");
- if (NULL == file)
- {
- log_error("ERROR: Failed to open '%s' for writing results.\n",
- fileName);
- return EXIT_FAILURE;
- }
-
- const char *save_map[] = { "success", "failure" };
- const char *result_map[] = { "pass", "fail", "skip" };
- const char *linebreak[] = { "", ",\n" };
- int add_linebreak = 0;
-
- fprintf(file, "{\n");
- fprintf(file, "\t\"cmd\": \"%s\",\n", suiteName);
- fprintf(file, "\t\"results\": {\n");
-
- for (int i = 0; i < testNum; ++i)
- {
- if (selectedTestList[i])
- {
- fprintf(file, "%s\t\t\"%s\": \"%s\"", linebreak[add_linebreak],
- testList[i].name, result_map[(int)resultTestList[i]]);
- add_linebreak = 1;
- }
- }
- fprintf(file, "\n");
-
- fprintf(file, "\t}\n");
- fprintf(file, "}\n");
-
- int ret = fclose(file) ? 1 : 0;
-
- log_info("Saving results to %s: %s!\n", fileName, save_map[ret]);
-
- return ret;
-}
-
static void print_results(int failed, int count, const char *name)
{
if (count < failed)
@@ -658,7 +685,6 @@
int ret = EXIT_SUCCESS;
unsigned char *selectedTestList = (unsigned char *)calloc(testNum, 1);
- test_status *resultTestList = NULL;
if (argc == 1)
{
@@ -697,24 +723,19 @@
if (ret == EXIT_SUCCESS)
{
- resultTestList =
- (test_status *)calloc(testNum, sizeof(*resultTestList));
+ std::vector<test_status> resultTestList(testNum, TEST_PASS);
- callTestFunctions(testList, selectedTestList, resultTestList, testNum,
- device, forceNoContextCreation, num_elements,
+ callTestFunctions(testList, selectedTestList, resultTestList.data(),
+ testNum, device, forceNoContextCreation, num_elements,
queueProps);
print_results(gFailCount, gTestCount, "sub-test");
print_results(gTestsFailed, gTestsFailed + gTestsPassed, "test");
- char *filename = getenv("CL_CONFORMANCE_RESULTS_FILENAME");
- if (filename != NULL)
- {
- ret = saveResultsToJson(filename, argv[0], testList,
- selectedTestList, resultTestList, testNum);
- }
+ ret = saveResultsToJson(argv[0], testList, selectedTestList,
+ resultTestList.data(), testNum);
- if (std::any_of(resultTestList, resultTestList + testNum,
+ if (std::any_of(resultTestList.begin(), resultTestList.end(),
[](test_status result) {
switch (result)
{
@@ -730,7 +751,6 @@
}
free(selectedTestList);
- free(resultTestList);
return ret;
}
@@ -1178,7 +1198,7 @@
void PrintArch(void)
{
- vlog("sizeof( void*) = %ld\n", sizeof(void *));
+ vlog("sizeof( void*) = %zu\n", sizeof(void *));
#if defined(__ppc__)
vlog("ARCH:\tppc\n");
#elif defined(__ppc64__)
diff --git a/test_conformance/SVM/test_cross_buffer_pointers.cpp b/test_conformance/SVM/test_cross_buffer_pointers.cpp
index c1caebb..2baa7ad 100644
--- a/test_conformance/SVM/test_cross_buffer_pointers.cpp
+++ b/test_conformance/SVM/test_cross_buffer_pointers.cpp
@@ -162,7 +162,8 @@
test_error(error, "clCreateBuffer failed.");
// this buffer holds the index into the nodes buffer that is used for node allocation
- clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
+ clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
+ sizeof(size_t), NULL, &error);
test_error(error, "clCreateBuffer failed.");
// this buffer holds the count of correct nodes which is computed by the verify kernel.
diff --git a/test_conformance/SVM/test_shared_sub_buffers.cpp b/test_conformance/SVM/test_shared_sub_buffers.cpp
index a79484c..2532886 100644
--- a/test_conformance/SVM/test_shared_sub_buffers.cpp
+++ b/test_conformance/SVM/test_shared_sub_buffers.cpp
@@ -182,7 +182,8 @@
// this buffer holds the index into the nodes buffer that is used for node allocation
- clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
+ clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
+ sizeof(size_t), NULL, &error);
test_error(error, "clCreateBuffer failed.");
// this buffer holds the count of correct nodes which is computed by the verify kernel.
diff --git a/test_conformance/atomics/main.cpp b/test_conformance/atomics/main.cpp
index afdea37..987d6bf 100644
--- a/test_conformance/atomics/main.cpp
+++ b/test_conformance/atomics/main.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -24,6 +24,7 @@
#include <unistd.h>
#endif
+// clang-format off
test_definition test_list[] = {
ADD_TEST( atomic_add ),
ADD_TEST( atomic_sub ),
@@ -40,11 +41,11 @@
ADD_TEST( atomic_add_index ),
ADD_TEST( atomic_add_index_bin ),
};
+// clang-format on
-const int test_num = ARRAY_SIZE( test_list );
+const int test_num = ARRAY_SIZE(test_list);
int main(int argc, const char *argv[])
{
return runTestHarness(argc, argv, test_num, test_list, false, 0);
}
-
diff --git a/test_conformance/atomics/procs.h b/test_conformance/atomics/procs.h
index bf053f2..fa85aad 100644
--- a/test_conformance/atomics/procs.h
+++ b/test_conformance/atomics/procs.h
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -18,22 +18,35 @@
#include "harness/threadTesting.h"
#include "harness/typeWrappers.h"
-extern int create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
+extern int create_program_and_kernel(const char *source,
+ const char *kernel_name,
+ cl_program *program_ret,
+ cl_kernel *kernel_ret);
-extern int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_atomic_add(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_sub(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_xchg(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_min(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_max(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_inc(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_dec(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_and(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_or(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_xor(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
-extern int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-
-
-
+extern int test_atomic_add_index(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
diff --git a/test_conformance/atomics/testBase.h b/test_conformance/atomics/testBase.h
index ba67d14..22bce1d 100644
--- a/test_conformance/atomics/testBase.h
+++ b/test_conformance/atomics/testBase.h
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -26,6 +26,3 @@
#include "procs.h"
#endif // _testBase_h
-
-
-
diff --git a/test_conformance/atomics/test_atomics.cpp b/test_conformance/atomics/test_atomics.cpp
index c0c0136..caa4b78 100644
--- a/test_conformance/atomics/test_atomics.cpp
+++ b/test_conformance/atomics/test_atomics.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -19,10 +19,12 @@
#include <unistd.h>
#endif
+#include <cinttypes>
+
#define INT_TEST_VALUE 402258822
#define LONG_TEST_VALUE 515154531254381446LL
-
+// clang-format off
const char *atomic_global_pattern[] = {
"__kernel void test_atomic_fn(volatile __global %s *destMemory, __global %s *oldValues)\n"
"{\n"
@@ -36,19 +38,20 @@
"__kernel void test_atomic_fn(__global %s *finalDest, __global %s *oldValues, volatile __local %s *destMemory, int numDestItems )\n"
"{\n"
" int tid = get_global_id(0);\n"
- " int dstItemIdx;\n"
+ " int dstItemIdx;\n"
"\n"
" // Everybody does the following line(s), but it all has the same result. We still need to ensure we sync before the atomic op, though\n"
- " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
+ " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
" destMemory[ dstItemIdx ] = finalDest[ dstItemIdx ];\n"
" barrier( CLK_LOCAL_MEM_FENCE );\n"
"\n"
,
" barrier( CLK_LOCAL_MEM_FENCE );\n"
" // Finally, write out the last value. Again, we're synced, so everyone will be writing the same value\n"
- " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
+ " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
" finalDest[ dstItemIdx ] = destMemory[ dstItemIdx ];\n"
"}\n" };
+// clang-format on
#define TEST_COUNT 128 * 1024
@@ -56,41 +59,48 @@
struct TestFns
{
- cl_int mIntStartValue;
- cl_long mLongStartValue;
+ cl_int mIntStartValue;
+ cl_long mLongStartValue;
- size_t (*NumResultsFn)( size_t threadSize, ExplicitType dataType );
+ size_t (*NumResultsFn)(size_t threadSize, ExplicitType dataType);
// Integer versions
- cl_int (*ExpectedValueIntFn)( size_t size, cl_int *startRefValues, size_t whichDestValue );
- void (*GenerateRefsIntFn)( size_t size, cl_int *startRefValues, MTdata d );
- bool (*VerifyRefsIntFn)( size_t size, cl_int *refValues, cl_int finalValue );
+ cl_int (*ExpectedValueIntFn)(size_t size, cl_int *startRefValues,
+ size_t whichDestValue);
+ void (*GenerateRefsIntFn)(size_t size, cl_int *startRefValues, MTdata d);
+ bool (*VerifyRefsIntFn)(size_t size, cl_int *refValues, cl_int finalValue);
// Long versions
- cl_long (*ExpectedValueLongFn)( size_t size, cl_long *startRefValues, size_t whichDestValue );
- void (*GenerateRefsLongFn)( size_t size, cl_long *startRefValues, MTdata d );
- bool (*VerifyRefsLongFn)( size_t size, cl_long *refValues, cl_long finalValue );
+ cl_long (*ExpectedValueLongFn)(size_t size, cl_long *startRefValues,
+ size_t whichDestValue);
+ void (*GenerateRefsLongFn)(size_t size, cl_long *startRefValues, MTdata d);
+ bool (*VerifyRefsLongFn)(size_t size, cl_long *refValues,
+ cl_long finalValue);
// Float versions
- cl_float (*ExpectedValueFloatFn)( size_t size, cl_float *startRefValues, size_t whichDestValue );
- void (*GenerateRefsFloatFn)( size_t size, cl_float *startRefValues, MTdata d );
- bool (*VerifyRefsFloatFn)( size_t size, cl_float *refValues, cl_float finalValue );
+ cl_float (*ExpectedValueFloatFn)(size_t size, cl_float *startRefValues,
+ size_t whichDestValue);
+ void (*GenerateRefsFloatFn)(size_t size, cl_float *startRefValues,
+ MTdata d);
+ bool (*VerifyRefsFloatFn)(size_t size, cl_float *refValues,
+ cl_float finalValue);
};
-bool check_atomic_support( cl_device_id device, bool extended, bool isLocal, ExplicitType dataType )
+bool check_atomic_support(cl_device_id device, bool extended, bool isLocal,
+ ExplicitType dataType)
{
+ // clang-format off
const char *extensionNames[8] = {
"cl_khr_global_int32_base_atomics", "cl_khr_global_int32_extended_atomics",
"cl_khr_local_int32_base_atomics", "cl_khr_local_int32_extended_atomics",
"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics",
"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics" // this line intended to be the same as the last one
};
+ // clang-format on
size_t index = 0;
- if( extended )
- index += 1;
- if( isLocal )
- index += 2;
+ if (extended) index += 1;
+ if (isLocal) index += 2;
Version version = get_device_cl_version(device);
@@ -98,26 +108,28 @@
{
case kInt:
case kUInt:
- if( version >= Version(1,1) )
- return 1;
+ if (version >= Version(1, 1)) return 1;
break;
case kLong:
- case kULong:
- index += 4;
- break;
- case kFloat: // this has to stay separate since the float atomics arent in the 1.0 extensions
- return version >= Version(1,1);
+ case kULong: index += 4; break;
+ case kFloat: // this has to stay separate since the float atomics arent
+ // in the 1.0 extensions
+ return version >= Version(1, 1);
default:
- log_error( "ERROR: Unsupported data type (%d) in check_atomic_support\n", dataType );
+ log_error(
+ "ERROR: Unsupported data type (%d) in check_atomic_support\n",
+ dataType);
return 0;
}
- return is_extension_available( device, extensionNames[index] );
+ return is_extension_available(device, extensionNames[index]);
}
-int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore,
- TestFns testFns,
- bool extended, bool isLocal, ExplicitType dataType, bool matchGroupSize )
+int test_atomic_function(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ const char *programCore, TestFns testFns,
+ bool extended, bool isLocal, ExplicitType dataType,
+ bool matchGroupSize)
{
clProgramWrapper program;
clKernelWrapper kernel;
@@ -127,55 +139,65 @@
void *refValues, *startRefValues;
size_t threadSize, groupSize;
const char *programLines[4];
- char pragma[ 512 ];
- char programHeader[ 512 ];
+ char pragma[512];
+ char programHeader[512];
MTdata d;
- size_t typeSize = get_explicit_type_size( dataType );
+ size_t typeSize = get_explicit_type_size(dataType);
// Verify we can run first
- bool isUnsigned = ( dataType == kULong ) || ( dataType == kUInt );
- if( !check_atomic_support( deviceID, extended, isLocal, dataType ) )
+ bool isUnsigned = (dataType == kULong) || (dataType == kUInt);
+ if (!check_atomic_support(deviceID, extended, isLocal, dataType))
{
- // Only print for the signed (unsigned comes right after, and if signed isn't supported, unsigned isn't either)
- if( dataType == kFloat )
- log_info( "\t%s float not supported\n", isLocal ? "Local" : "Global" );
- else if( !isUnsigned )
- log_info( "\t%s %sint%d not supported\n", isLocal ? "Local" : "Global", isUnsigned ? "u" : "", (int)typeSize * 8 );
+ // Only print for the signed (unsigned comes right after, and if signed
+ // isn't supported, unsigned isn't either)
+ if (dataType == kFloat)
+ log_info("\t%s float not supported\n",
+ isLocal ? "Local" : "Global");
+ else if (!isUnsigned)
+ log_info("\t%s %sint%d not supported\n",
+ isLocal ? "Local" : "Global", isUnsigned ? "u" : "",
+ (int)typeSize * 8);
// Since we don't support the operation, they implicitly pass
return 0;
}
else
{
- if( dataType == kFloat )
- log_info( "\t%s float%s...", isLocal ? "local" : "global", isLocal ? " " : "" );
+ if (dataType == kFloat)
+ log_info("\t%s float%s...", isLocal ? "local" : "global",
+ isLocal ? " " : "");
else
- log_info( "\t%s %sint%d%s%s...", isLocal ? "local" : "global", isUnsigned ? "u" : "",
- (int)typeSize * 8, isUnsigned ? "" : " ", isLocal ? " " : "" );
+ log_info("\t%s %sint%d%s%s...", isLocal ? "local" : "global",
+ isUnsigned ? "u" : "", (int)typeSize * 8,
+ isUnsigned ? "" : " ", isLocal ? " " : "");
}
//// Set up the kernel code
// Create the pragma line for this kernel
- bool isLong = ( dataType == kLong || dataType == kULong );
- sprintf( pragma, "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n",
- isLong ? "" : (isLocal ? "_local" : "_global"), isLong ? "64" : "32",
- extended ? "extended" : "base" );
+ bool isLong = (dataType == kLong || dataType == kULong);
+ sprintf(pragma,
+ "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n",
+ isLong ? "" : (isLocal ? "_local" : "_global"),
+ isLong ? "64" : "32", extended ? "extended" : "base");
// Now create the program header
- const char *typeName = get_explicit_type_name( dataType );
- if( isLocal )
- sprintf( programHeader, atomic_local_pattern[ 0 ], typeName, typeName, typeName );
+ const char *typeName = get_explicit_type_name(dataType);
+ if (isLocal)
+ sprintf(programHeader, atomic_local_pattern[0], typeName, typeName,
+ typeName);
else
- sprintf( programHeader, atomic_global_pattern[ 0 ], typeName, typeName );
+ sprintf(programHeader, atomic_global_pattern[0], typeName, typeName);
// Set up our entire program now
- programLines[ 0 ] = pragma;
- programLines[ 1 ] = programHeader;
- programLines[ 2 ] = programCore;
- programLines[ 3 ] = ( isLocal ) ? atomic_local_pattern[ 1 ] : atomic_global_pattern[ 1 ];
+ programLines[0] = pragma;
+ programLines[1] = programHeader;
+ programLines[2] = programCore;
+ programLines[3] =
+ (isLocal) ? atomic_local_pattern[1] : atomic_global_pattern[1];
- if( create_single_kernel_helper( context, &program, &kernel, 4, programLines, "test_atomic_fn" ) )
+ if (create_single_kernel_helper(context, &program, &kernel, 4, programLines,
+ "test_atomic_fn"))
{
return -1;
}
@@ -183,29 +205,37 @@
//// Set up to actually run
threadSize = num_elements;
- error = get_max_common_work_group_size( context, kernel, threadSize, &groupSize );
- test_error( error, "Unable to get thread group max size" );
+ error =
+ get_max_common_work_group_size(context, kernel, threadSize, &groupSize);
+ test_error(error, "Unable to get thread group max size");
- if( matchGroupSize )
+ if (matchGroupSize)
// HACK because xchg and cmpxchg apparently are limited by hardware
threadSize = groupSize;
- if( isLocal )
+ if (isLocal)
{
- size_t maxSizes[3] = {0, 0, 0};
- error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3*sizeof(size_t), maxSizes, 0);
- test_error( error, "Unable to obtain max work item sizes for the device" );
+ size_t maxSizes[3] = { 0, 0, 0 };
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+ 3 * sizeof(size_t), maxSizes, 0);
+ test_error(error,
+ "Unable to obtain max work item sizes for the device");
size_t workSize;
- error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workSize ), &workSize, NULL );
- test_error( error, "Unable to obtain max work group size for device and kernel combo" );
+ error = clGetKernelWorkGroupInfo(kernel, deviceID,
+ CL_KERNEL_WORK_GROUP_SIZE,
+ sizeof(workSize), &workSize, NULL);
+ test_error(
+ error,
+ "Unable to obtain max work group size for device and kernel combo");
// Limit workSize to avoid extremely large local buffer size and slow
// run.
if (workSize > 65536) workSize = 65536;
- // "workSize" is limited to that of the first dimension as only a 1DRange is executed.
- if( maxSizes[0] < workSize )
+ // "workSize" is limited to that of the first dimension as only a
+ // 1DRange is executed.
+ if (maxSizes[0] < workSize)
{
workSize = maxSizes[0];
}
@@ -214,38 +244,43 @@
}
- log_info( "\t(thread count %d, group size %d)\n", (int)threadSize, (int)groupSize );
+ log_info("\t(thread count %d, group size %d)\n", (int)threadSize,
+ (int)groupSize);
- refValues = (cl_int *)malloc( typeSize * threadSize );
+ refValues = (cl_int *)malloc(typeSize * threadSize);
- if( testFns.GenerateRefsIntFn != NULL )
+ if (testFns.GenerateRefsIntFn != NULL)
{
// We have a ref generator provided
- d = init_genrand( gRandomSeed );
- startRefValues = malloc( typeSize * threadSize );
- if( typeSize == 4 )
- testFns.GenerateRefsIntFn( threadSize, (cl_int *)startRefValues, d );
+ d = init_genrand(gRandomSeed);
+ startRefValues = malloc(typeSize * threadSize);
+ if (typeSize == 4)
+ testFns.GenerateRefsIntFn(threadSize, (cl_int *)startRefValues, d);
else
- testFns.GenerateRefsLongFn( threadSize, (cl_long *)startRefValues, d );
+ testFns.GenerateRefsLongFn(threadSize, (cl_long *)startRefValues,
+ d);
free_mtdata(d);
d = NULL;
}
else
startRefValues = NULL;
- // If we're given a num_results function, we need to determine how many result objects we need. If
- // we don't have it, we assume it's just 1
- size_t numDestItems = ( testFns.NumResultsFn != NULL ) ? testFns.NumResultsFn( threadSize, dataType ) : 1;
+ // If we're given a num_results function, we need to determine how many
+ // result objects we need. If we don't have it, we assume it's just 1
+ size_t numDestItems = (testFns.NumResultsFn != NULL)
+ ? testFns.NumResultsFn(threadSize, dataType)
+ : 1;
- char * destItems = new char[ typeSize * numDestItems ];
- if( destItems == NULL )
+ char *destItems = new char[typeSize * numDestItems];
+ if (destItems == NULL)
{
- log_error( "ERROR: Unable to allocate memory!\n" );
+ log_error("ERROR: Unable to allocate memory!\n");
return -1;
}
- void * startValue = ( typeSize == 4 ) ? (void *)&testFns.mIntStartValue : (void *)&testFns.mLongStartValue;
- for( size_t i = 0; i < numDestItems; i++ )
- memcpy( destItems + i * typeSize, startValue, typeSize );
+ void *startValue = (typeSize == 4) ? (void *)&testFns.mIntStartValue
+ : (void *)&testFns.mLongStartValue;
+ for (size_t i = 0; i < numDestItems; i++)
+ memcpy(destItems + i * typeSize, startValue, typeSize);
streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
typeSize * numDestItems, destItems, NULL);
@@ -265,82 +300,97 @@
}
/* Set the arguments */
- error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
- test_error( error, "Unable to set indexed kernel arguments" );
- error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
- test_error( error, "Unable to set indexed kernel arguments" );
+ error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+ test_error(error, "Unable to set indexed kernel arguments");
+ error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
+ test_error(error, "Unable to set indexed kernel arguments");
- if( isLocal )
+ if (isLocal)
{
- error = clSetKernelArg( kernel, 2, typeSize * numDestItems, NULL );
- test_error( error, "Unable to set indexed local kernel argument" );
+ error = clSetKernelArg(kernel, 2, typeSize * numDestItems, NULL);
+ test_error(error, "Unable to set indexed local kernel argument");
cl_int numDestItemsInt = (cl_int)numDestItems;
- error = clSetKernelArg( kernel, 3, sizeof( cl_int ), &numDestItemsInt );
- test_error( error, "Unable to set indexed kernel argument" );
+ error = clSetKernelArg(kernel, 3, sizeof(cl_int), &numDestItemsInt);
+ test_error(error, "Unable to set indexed kernel argument");
}
/* Run the kernel */
threads[0] = threadSize;
- error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &groupSize, 0, NULL, NULL );
- test_error( error, "Unable to execute test kernel" );
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, &groupSize,
+ 0, NULL, NULL);
+ test_error(error, "Unable to execute test kernel");
- error = clEnqueueReadBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
- test_error( error, "Unable to read result value!" );
+ error =
+ clEnqueueReadBuffer(queue, streams[0], true, 0, typeSize * numDestItems,
+ destItems, 0, NULL, NULL);
+ test_error(error, "Unable to read result value!");
- error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize * threadSize, refValues, 0, NULL, NULL );
- test_error( error, "Unable to read reference values!" );
+ error =
+ clEnqueueReadBuffer(queue, streams[1], true, 0, typeSize * threadSize,
+ refValues, 0, NULL, NULL);
+ test_error(error, "Unable to read reference values!");
- // If we have an expectedFn, then we need to generate a final value to compare against. If we don't
- // have one, it's because we're comparing ref values only
- if( testFns.ExpectedValueIntFn != NULL )
+ // If we have an expectedFn, then we need to generate a final value to
+ // compare against. If we don't have one, it's because we're comparing ref
+ // values only
+ if (testFns.ExpectedValueIntFn != NULL)
{
- for( size_t i = 0; i < numDestItems; i++ )
+ for (size_t i = 0; i < numDestItems; i++)
{
- char expected[ 8 ];
+ char expected[8];
cl_int intVal;
cl_long longVal;
- if( typeSize == 4 )
+ if (typeSize == 4)
{
// Int version
- intVal = testFns.ExpectedValueIntFn( threadSize, (cl_int *)startRefValues, i );
- memcpy( expected, &intVal, sizeof( intVal ) );
+ intVal = testFns.ExpectedValueIntFn(
+ threadSize, (cl_int *)startRefValues, i);
+ memcpy(expected, &intVal, sizeof(intVal));
}
else
{
// Long version
- longVal = testFns.ExpectedValueLongFn( threadSize, (cl_long *)startRefValues, i );
- memcpy( expected, &longVal, sizeof( longVal ) );
+ longVal = testFns.ExpectedValueLongFn(
+ threadSize, (cl_long *)startRefValues, i);
+ memcpy(expected, &longVal, sizeof(longVal));
}
- if( memcmp( expected, destItems + i * typeSize, typeSize ) != 0 )
+ if (memcmp(expected, destItems + i * typeSize, typeSize) != 0)
{
- if( typeSize == 4 )
+ if (typeSize == 4)
{
- cl_int *outValue = (cl_int *)( destItems + i * typeSize );
- log_error( "ERROR: Result %ld from kernel does not validate! (should be %d, was %d)\n", i, intVal, *outValue );
+ cl_int *outValue = (cl_int *)(destItems + i * typeSize);
+ log_error("ERROR: Result %zu from kernel does not "
+ "validate! (should be %d, was %d)\n",
+ i, intVal, *outValue);
cl_int *startRefs = (cl_int *)startRefValues;
cl_int *refs = (cl_int *)refValues;
- for( i = 0; i < threadSize; i++ )
+ for (i = 0; i < threadSize; i++)
{
- if( startRefs != NULL )
- log_info( " --- %ld - %d --- %d\n", i, startRefs[i], refs[i] );
+ if (startRefs != NULL)
+ log_info(" --- %zu - %d --- %d\n", i, startRefs[i],
+ refs[i]);
else
- log_info( " --- %ld --- %d\n", i, refs[i] );
+ log_info(" --- %zu --- %d\n", i, refs[i]);
}
}
else
{
- cl_long *outValue = (cl_long *)( destItems + i * typeSize );
- log_error( "ERROR: Result %ld from kernel does not validate! (should be %lld, was %lld)\n", i, longVal, *outValue );
+ cl_long *outValue = (cl_long *)(destItems + i * typeSize);
+ log_error("ERROR: Result %zu from kernel does not "
+ "validate! (should be %" PRId64 ", was %" PRId64
+ ")\n",
+ i, longVal, *outValue);
cl_long *startRefs = (cl_long *)startRefValues;
cl_long *refs = (cl_long *)refValues;
- for( i = 0; i < threadSize; i++ )
+ for (i = 0; i < threadSize; i++)
{
- if( startRefs != NULL )
- log_info( " --- %ld - %lld --- %lld\n", i, startRefs[i], refs[i] );
+ if (startRefs != NULL)
+ log_info(" --- %zu - %" PRId64 " --- %" PRId64 "\n",
+ i, startRefs[i], refs[i]);
else
- log_info( " --- %ld --- %lld\n", i, refs[i] );
+ log_info(" --- %zu --- %" PRId64 "\n", i, refs[i]);
}
}
return -1;
@@ -348,104 +398,141 @@
}
}
- if( testFns.VerifyRefsIntFn != NULL )
+ if (testFns.VerifyRefsIntFn != NULL)
{
/* Use the verify function to also check the results */
- if( dataType == kFloat )
+ if (dataType == kFloat)
{
cl_float *outValue = (cl_float *)destItems;
- if( !testFns.VerifyRefsFloatFn( threadSize, (cl_float *)refValues, *outValue ) != 0 )
+ if (!testFns.VerifyRefsFloatFn(threadSize, (cl_float *)refValues,
+ *outValue)
+ != 0)
{
- log_error( "ERROR: Reference values did not validate!\n" );
+ log_error("ERROR: Reference values did not validate!\n");
return -1;
}
}
- else if( typeSize == 4 )
+ else if (typeSize == 4)
{
cl_int *outValue = (cl_int *)destItems;
- if( !testFns.VerifyRefsIntFn( threadSize, (cl_int *)refValues, *outValue ) != 0 )
+ if (!testFns.VerifyRefsIntFn(threadSize, (cl_int *)refValues,
+ *outValue)
+ != 0)
{
- log_error( "ERROR: Reference values did not validate!\n" );
+ log_error("ERROR: Reference values did not validate!\n");
return -1;
}
}
else
{
cl_long *outValue = (cl_long *)destItems;
- if( !testFns.VerifyRefsLongFn( threadSize, (cl_long *)refValues, *outValue ) != 0 )
+ if (!testFns.VerifyRefsLongFn(threadSize, (cl_long *)refValues,
+ *outValue)
+ != 0)
{
- log_error( "ERROR: Reference values did not validate!\n" );
+ log_error("ERROR: Reference values did not validate!\n");
return -1;
}
}
}
- else if( testFns.ExpectedValueIntFn == NULL )
+ else if (testFns.ExpectedValueIntFn == NULL)
{
- log_error( "ERROR: Test doesn't check total or refs; no values are verified!\n" );
+ log_error("ERROR: Test doesn't check total or refs; no values are "
+ "verified!\n");
return -1;
}
/* Re-write the starting value */
- for( size_t i = 0; i < numDestItems; i++ )
- memcpy( destItems + i * typeSize, startValue, typeSize );
- error = clEnqueueWriteBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
- test_error( error, "Unable to write starting values!" );
+ for (size_t i = 0; i < numDestItems; i++)
+ memcpy(destItems + i * typeSize, startValue, typeSize);
+ error =
+ clEnqueueWriteBuffer(queue, streams[0], true, 0,
+ typeSize * numDestItems, destItems, 0, NULL, NULL);
+ test_error(error, "Unable to write starting values!");
- /* Run the kernel once for a single thread, so we can verify that the returned value is the original one */
+ /* Run the kernel once for a single thread, so we can verify that the
+ * returned value is the original one */
threads[0] = 1;
- error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, threads, 0, NULL, NULL );
- test_error( error, "Unable to execute test kernel" );
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, threads, 0,
+ NULL, NULL);
+ test_error(error, "Unable to execute test kernel");
- error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize, refValues, 0, NULL, NULL );
- test_error( error, "Unable to read reference values!" );
+ error = clEnqueueReadBuffer(queue, streams[1], true, 0, typeSize, refValues,
+ 0, NULL, NULL);
+ test_error(error, "Unable to read reference values!");
- if( memcmp( refValues, destItems, typeSize ) != 0 )
+ if (memcmp(refValues, destItems, typeSize) != 0)
{
- if( typeSize == 4 )
+ if (typeSize == 4)
{
cl_int *s = (cl_int *)destItems;
cl_int *r = (cl_int *)refValues;
- log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
- " (should have been %d, returned %d)!\n", *s, *r );
+ log_error("ERROR: atomic function operated correctly but did NOT "
+ "return correct 'old' value "
+ " (should have been %d, returned %d)!\n",
+ *s, *r);
}
else
{
cl_long *s = (cl_long *)destItems;
cl_long *r = (cl_long *)refValues;
- log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
- " (should have been %lld, returned %lld)!\n", *s, *r );
+ log_error("ERROR: atomic function operated correctly but did NOT "
+ "return correct 'old' value "
+ " (should have been %" PRId64 ", returned %" PRId64
+ ")!\n",
+ *s, *r);
}
return -1;
}
- delete [] destItems;
- free( refValues );
- if( startRefValues != NULL )
- free( startRefValues );
+ delete[] destItems;
+ free(refValues);
+ if (startRefValues != NULL) free(startRefValues);
return 0;
}
-int test_atomic_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore,
- TestFns testFns,
- bool extended, bool matchGroupSize, bool usingAtomicPrefix )
+int test_atomic_function_set(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements,
+ const char *programCore, TestFns testFns,
+ bool extended, bool matchGroupSize,
+ bool usingAtomicPrefix)
{
- log_info(" Testing %s functions...\n", usingAtomicPrefix ? "atomic_" : "atom_");
+ log_info(" Testing %s functions...\n",
+ usingAtomicPrefix ? "atomic_" : "atom_");
int errors = 0;
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kInt, matchGroupSize );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kUInt, matchGroupSize );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kInt, matchGroupSize );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kUInt, matchGroupSize );
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, false, kInt,
+ matchGroupSize);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, false, kUInt,
+ matchGroupSize);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, true, kInt,
+ matchGroupSize);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, true, kUInt,
+ matchGroupSize);
- // Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64 bit functions still use the "atom" prefix.
- // The argument usingAtomicPrefix is set to true if programCore was generated with the "atomic" prefix.
- if (!usingAtomicPrefix) {
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kLong, matchGroupSize );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kULong, matchGroupSize );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kLong, matchGroupSize );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kULong, matchGroupSize );
+ // Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64
+ // bit functions still use the "atom" prefix. The argument usingAtomicPrefix
+ // is set to true if programCore was generated with the "atomic" prefix.
+ if (!usingAtomicPrefix)
+ {
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, false,
+ kLong, matchGroupSize);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, false,
+ kULong, matchGroupSize);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, true,
+ kLong, matchGroupSize);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ programCore, testFns, extended, true,
+ kULong, matchGroupSize);
}
return errors;
@@ -454,265 +541,346 @@
#pragma mark ---- add
const char atom_add_core[] =
-" oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n"
-" atom_add( &destMemory[0], tid + 3 );\n"
-" atom_add( &destMemory[0], tid + 3 );\n"
-" atom_add( &destMemory[0], tid + 3 );\n";
+ " oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n"
+ " atom_add( &destMemory[0], tid + 3 );\n"
+ " atom_add( &destMemory[0], tid + 3 );\n"
+ " atom_add( &destMemory[0], tid + 3 );\n";
const char atomic_add_core[] =
-" oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n"
-" atomic_add( &destMemory[0], tid + 3 );\n"
-" atomic_add( &destMemory[0], tid + 3 );\n"
-" atomic_add( &destMemory[0], tid + 3 );\n";
+ " oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n"
+ " atomic_add( &destMemory[0], tid + 3 );\n"
+ " atomic_add( &destMemory[0], tid + 3 );\n"
+ " atomic_add( &destMemory[0], tid + 3 );\n";
-cl_int test_atomic_add_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_add_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
cl_int total = 0;
- for( size_t i = 0; i < size; i++ )
- total += ( (cl_int)i + 3 ) * 4;
+ for (size_t i = 0; i < size; i++) total += ((cl_int)i + 3) * 4;
return total;
}
-cl_long test_atomic_add_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_add_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
cl_long total = 0;
- for( size_t i = 0; i < size; i++ )
- total += ( ( i + 3 ) * 4 );
+ for (size_t i = 0; i < size; i++) total += ((i + 3) * 4);
return total;
}
-int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_add(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { 0, 0LL, NULL, test_atomic_add_result_int, NULL, NULL, test_atomic_add_result_long, NULL, NULL };
+ TestFns set = { 0,
+ 0LL,
+ NULL,
+ test_atomic_add_result_int,
+ NULL,
+ NULL,
+ test_atomic_add_result_long,
+ NULL,
+ NULL };
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_add_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
- return -1;
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_add_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
+ return -1;
return 0;
}
#pragma mark ---- sub
-const char atom_sub_core[] = " oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n";
+const char atom_sub_core[] =
+ " oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n";
-const char atomic_sub_core[] = " oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n";
+const char atomic_sub_core[] =
+ " oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n";
-cl_int test_atomic_sub_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_sub_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
cl_int total = INT_TEST_VALUE;
- for( size_t i = 0; i < size; i++ )
- total -= (cl_int)i + 3;
+ for (size_t i = 0; i < size; i++) total -= (cl_int)i + 3;
return total;
}
-cl_long test_atomic_sub_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_sub_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
cl_long total = LONG_TEST_VALUE;
- for( size_t i = 0; i < size; i++ )
- total -= i + 3;
+ for (size_t i = 0; i < size; i++) total -= i + 3;
return total;
}
-int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_sub(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_sub_result_int, NULL, NULL, test_atomic_sub_result_long, NULL, NULL };
+ TestFns set = { INT_TEST_VALUE,
+ LONG_TEST_VALUE,
+ NULL,
+ test_atomic_sub_result_int,
+ NULL,
+ NULL,
+ test_atomic_sub_result_long,
+ NULL,
+ NULL };
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_sub_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_sub_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
#pragma mark ---- xchg
-const char atom_xchg_core[] = " oldValues[tid] = atom_xchg( &destMemory[0], tid );\n";
+const char atom_xchg_core[] =
+ " oldValues[tid] = atom_xchg( &destMemory[0], tid );\n";
-const char atomic_xchg_core[] = " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
-const char atomic_xchg_float_core[] = " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
+const char atomic_xchg_core[] =
+ " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
+const char atomic_xchg_float_core[] =
+ " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
-bool test_atomic_xchg_verify_int( size_t size, cl_int *refValues, cl_int finalValue )
+bool test_atomic_xchg_verify_int(size_t size, cl_int *refValues,
+ cl_int finalValue)
{
- /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
+ /* For xchg, each value from 0 to size - 1 should have an entry in the ref
+ * array, and ONLY one entry */
char *valids;
size_t i;
char originalValidCount = 0;
- valids = (char *)malloc( sizeof( char ) * size );
- memset( valids, 0, sizeof( char ) * size );
+ valids = (char *)malloc(sizeof(char) * size);
+ memset(valids, 0, sizeof(char) * size);
- for( i = 0; i < size; i++ )
+ for (i = 0; i < size; i++)
{
- if( refValues[ i ] == INT_TEST_VALUE )
+ if (refValues[i] == INT_TEST_VALUE)
{
// Special initial value
originalValidCount++;
continue;
}
- if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
+ if (refValues[i] < 0 || (size_t)refValues[i] >= size)
{
- log_error( "ERROR: Reference value %ld outside of valid range! (%d)\n", i, refValues[ i ] );
+ log_error(
+ "ERROR: Reference value %zu outside of valid range! (%d)\n", i,
+ refValues[i]);
return false;
}
- valids[ refValues[ i ] ] ++;
+ valids[refValues[i]]++;
}
- /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
- the final value outputted */
- if( valids[ finalValue ] > 0 )
+ /* Note: ONE entry will have zero count. It'll be the last one that
+ executed, because that value should be the final value outputted */
+ if (valids[finalValue] > 0)
{
- log_error( "ERROR: Final value %d was also in ref list!\n", finalValue );
+ log_error("ERROR: Final value %d was also in ref list!\n", finalValue);
return false;
}
else
- valids[ finalValue ] = 1; // So the following loop will be okay
+ valids[finalValue] = 1; // So the following loop will be okay
/* Now check that every entry has one and only one count */
- if( originalValidCount != 1 )
+ if (originalValidCount != 1)
{
- log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
+ log_error("ERROR: Starting reference value %d did not occur "
+ "once-and-only-once (occurred %d)\n",
+ 65191, originalValidCount);
return false;
}
- for( i = 0; i < size; i++ )
+ for (i = 0; i < size; i++)
{
- if( valids[ i ] != 1 )
+ if (valids[i] != 1)
{
- log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
- for( size_t j = 0; j < size; j++ )
- log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
+ log_error("ERROR: Reference value %zu did not occur "
+ "once-and-only-once (occurred %d)\n",
+ i, valids[i]);
+ for (size_t j = 0; j < size; j++)
+ log_info("%d: %d\n", (int)j, (int)valids[j]);
return false;
}
}
- free( valids );
+ free(valids);
return true;
}
-bool test_atomic_xchg_verify_long( size_t size, cl_long *refValues, cl_long finalValue )
+bool test_atomic_xchg_verify_long(size_t size, cl_long *refValues,
+ cl_long finalValue)
{
- /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
+ /* For xchg, each value from 0 to size - 1 should have an entry in the ref
+ * array, and ONLY one entry */
char *valids;
size_t i;
char originalValidCount = 0;
- valids = (char *)malloc( sizeof( char ) * size );
- memset( valids, 0, sizeof( char ) * size );
+ valids = (char *)malloc(sizeof(char) * size);
+ memset(valids, 0, sizeof(char) * size);
- for( i = 0; i < size; i++ )
+ for (i = 0; i < size; i++)
{
- if( refValues[ i ] == LONG_TEST_VALUE )
+ if (refValues[i] == LONG_TEST_VALUE)
{
// Special initial value
originalValidCount++;
continue;
}
- if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
+ if (refValues[i] < 0 || (size_t)refValues[i] >= size)
{
- log_error( "ERROR: Reference value %ld outside of valid range! (%lld)\n", i, refValues[ i ] );
+ log_error(
+ "ERROR: Reference value %zu outside of valid range! (%" PRId64
+ ")\n",
+ i, refValues[i]);
return false;
}
- valids[ refValues[ i ] ] ++;
+ valids[refValues[i]]++;
}
- /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
- the final value outputted */
- if( valids[ finalValue ] > 0 )
+ /* Note: ONE entry will have zero count. It'll be the last one that
+ executed, because that value should be the final value outputted */
+ if (valids[finalValue] > 0)
{
- log_error( "ERROR: Final value %lld was also in ref list!\n", finalValue );
+ log_error("ERROR: Final value %" PRId64 " was also in ref list!\n",
+ finalValue);
return false;
}
else
- valids[ finalValue ] = 1; // So the following loop will be okay
+ valids[finalValue] = 1; // So the following loop will be okay
/* Now check that every entry has one and only one count */
- if( originalValidCount != 1 )
+ if (originalValidCount != 1)
{
- log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
+ log_error("ERROR: Starting reference value %d did not occur "
+ "once-and-only-once (occurred %d)\n",
+ 65191, originalValidCount);
return false;
}
- for( i = 0; i < size; i++ )
+ for (i = 0; i < size; i++)
{
- if( valids[ i ] != 1 )
+ if (valids[i] != 1)
{
- log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
- for( size_t j = 0; j < size; j++ )
- log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
+ log_error("ERROR: Reference value %zu did not occur "
+ "once-and-only-once (occurred %d)\n",
+ i, valids[i]);
+ for (size_t j = 0; j < size; j++)
+ log_info("%d: %d\n", (int)j, (int)valids[j]);
return false;
}
}
- free( valids );
+ free(valids);
return true;
}
-bool test_atomic_xchg_verify_float( size_t size, cl_float *refValues, cl_float finalValue )
+bool test_atomic_xchg_verify_float(size_t size, cl_float *refValues,
+ cl_float finalValue)
{
- /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
+ /* For xchg, each value from 0 to size - 1 should have an entry in the ref
+ * array, and ONLY one entry */
char *valids;
size_t i;
char originalValidCount = 0;
- valids = (char *)malloc( sizeof( char ) * size );
- memset( valids, 0, sizeof( char ) * size );
+ valids = (char *)malloc(sizeof(char) * size);
+ memset(valids, 0, sizeof(char) * size);
- for( i = 0; i < size; i++ )
+ for (i = 0; i < size; i++)
{
- cl_int *intRefValue = (cl_int *)( &refValues[ i ] );
- if( *intRefValue == INT_TEST_VALUE )
+ cl_int *intRefValue = (cl_int *)(&refValues[i]);
+ if (*intRefValue == INT_TEST_VALUE)
{
// Special initial value
originalValidCount++;
continue;
}
- if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
+ if (refValues[i] < 0 || (size_t)refValues[i] >= size)
{
- log_error( "ERROR: Reference value %ld outside of valid range! (%a)\n", i, refValues[ i ] );
+ log_error(
+ "ERROR: Reference value %zu outside of valid range! (%a)\n", i,
+ refValues[i]);
return false;
}
- valids[ (int)refValues[ i ] ] ++;
+ valids[(int)refValues[i]]++;
}
- /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
- the final value outputted */
- if( valids[ (int)finalValue ] > 0 )
+ /* Note: ONE entry will have zero count. It'll be the last one that
+ executed, because that value should be the final value outputted */
+ if (valids[(int)finalValue] > 0)
{
- log_error( "ERROR: Final value %a was also in ref list!\n", finalValue );
+ log_error("ERROR: Final value %a was also in ref list!\n", finalValue);
return false;
}
else
- valids[ (int)finalValue ] = 1; // So the following loop will be okay
+ valids[(int)finalValue] = 1; // So the following loop will be okay
/* Now check that every entry has one and only one count */
- if( originalValidCount != 1 )
+ if (originalValidCount != 1)
{
- log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
+ log_error("ERROR: Starting reference value %d did not occur "
+ "once-and-only-once (occurred %d)\n",
+ 65191, originalValidCount);
return false;
}
- for( i = 0; i < size; i++ )
+ for (i = 0; i < size; i++)
{
- if( valids[ i ] != 1 )
+ if (valids[i] != 1)
{
- log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
- for( size_t j = 0; j < size; j++ )
- log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
+ log_error("ERROR: Reference value %zu did not occur "
+ "once-and-only-once (occurred %d)\n",
+ i, valids[i]);
+ for (size_t j = 0; j < size; j++)
+ log_info("%d: %d\n", (int)j, (int)valids[j]);
return false;
}
}
- free( valids );
+ free(valids);
return true;
}
-int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_xchg(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, NULL, NULL, test_atomic_xchg_verify_int, NULL, NULL, test_atomic_xchg_verify_long, NULL, NULL, test_atomic_xchg_verify_float };
+ TestFns set = { INT_TEST_VALUE,
+ LONG_TEST_VALUE,
+ NULL,
+ NULL,
+ NULL,
+ test_atomic_xchg_verify_int,
+ NULL,
+ NULL,
+ test_atomic_xchg_verify_long,
+ NULL,
+ NULL,
+ test_atomic_xchg_verify_float };
- int errors = test_atomic_function_set( deviceID, context, queue, num_elements, atom_xchg_core, set, false, true, /*usingAtomicPrefix*/ false );
- errors |= test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xchg_core, set, false, true, /*usingAtomicPrefix*/ true );
+ int errors = test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_xchg_core, set, false,
+ true, /*usingAtomicPrefix*/ false);
+ errors |= test_atomic_function_set(deviceID, context, queue, num_elements,
+ atomic_xchg_core, set, false, true,
+ /*usingAtomicPrefix*/ true);
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, false, kFloat, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, true, kFloat, true );
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atomic_xchg_float_core, set, false, false,
+ kFloat, true);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atomic_xchg_float_core, set, false, true,
+ kFloat, true);
return errors;
}
@@ -720,51 +888,71 @@
#pragma mark ---- min
-const char atom_min_core[] = " oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n";
+const char atom_min_core[] =
+ " oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n";
-const char atomic_min_core[] = " oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n";
+const char atomic_min_core[] =
+ " oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n";
-cl_int test_atomic_min_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_min_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
cl_int total = 0x7fffffffL;
- for( size_t i = 0; i < size; i++ )
+ for (size_t i = 0; i < size; i++)
{
- if( startRefValues[ i ] < total )
- total = startRefValues[ i ];
+ if (startRefValues[i] < total) total = startRefValues[i];
}
return total;
}
-void test_atomic_min_gen_int( size_t size, cl_int *startRefValues, MTdata d )
+void test_atomic_min_gen_int(size_t size, cl_int *startRefValues, MTdata d)
{
- for( size_t i = 0; i < size; i++ )
- startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
+ for (size_t i = 0; i < size; i++)
+ startRefValues[i] =
+ (cl_int)(genrand_int32(d) % 0x3fffffff) + 0x3fffffff;
}
-cl_long test_atomic_min_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_min_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
cl_long total = 0x7fffffffffffffffLL;
- for( size_t i = 0; i < size; i++ )
+ for (size_t i = 0; i < size; i++)
{
- if( startRefValues[ i ] < total )
- total = startRefValues[ i ];
+ if (startRefValues[i] < total) total = startRefValues[i];
}
return total;
}
-void test_atomic_min_gen_long( size_t size, cl_long *startRefValues, MTdata d )
+void test_atomic_min_gen_long(size_t size, cl_long *startRefValues, MTdata d)
{
- for( size_t i = 0; i < size; i++ )
- startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
+ for (size_t i = 0; i < size; i++)
+ startRefValues[i] =
+ (cl_long)(genrand_int32(d)
+ | (((cl_long)genrand_int32(d) & 0x7fffffffL) << 16));
}
-int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_min(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { 0x7fffffffL, 0x7fffffffffffffffLL, NULL, test_atomic_min_result_int, test_atomic_min_gen_int, NULL, test_atomic_min_result_long, test_atomic_min_gen_long, NULL };
+ TestFns set = { 0x7fffffffL,
+ 0x7fffffffffffffffLL,
+ NULL,
+ test_atomic_min_result_int,
+ test_atomic_min_gen_int,
+ NULL,
+ test_atomic_min_result_long,
+ test_atomic_min_gen_long,
+ NULL };
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_min_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_min_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
@@ -772,79 +960,118 @@
#pragma mark ---- max
-const char atom_max_core[] = " oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n";
+const char atom_max_core[] =
+ " oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n";
-const char atomic_max_core[] = " oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n";
+const char atomic_max_core[] =
+ " oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n";
-cl_int test_atomic_max_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_max_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
cl_int total = 0;
- for( size_t i = 0; i < size; i++ )
+ for (size_t i = 0; i < size; i++)
{
- if( startRefValues[ i ] > total )
- total = startRefValues[ i ];
+ if (startRefValues[i] > total) total = startRefValues[i];
}
return total;
}
-void test_atomic_max_gen_int( size_t size, cl_int *startRefValues, MTdata d )
+void test_atomic_max_gen_int(size_t size, cl_int *startRefValues, MTdata d)
{
- for( size_t i = 0; i < size; i++ )
- startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
+ for (size_t i = 0; i < size; i++)
+ startRefValues[i] =
+ (cl_int)(genrand_int32(d) % 0x3fffffff) + 0x3fffffff;
}
-cl_long test_atomic_max_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_max_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
cl_long total = 0;
- for( size_t i = 0; i < size; i++ )
+ for (size_t i = 0; i < size; i++)
{
- if( startRefValues[ i ] > total )
- total = startRefValues[ i ];
+ if (startRefValues[i] > total) total = startRefValues[i];
}
return total;
}
-void test_atomic_max_gen_long( size_t size, cl_long *startRefValues, MTdata d )
+void test_atomic_max_gen_long(size_t size, cl_long *startRefValues, MTdata d)
{
- for( size_t i = 0; i < size; i++ )
- startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
+ for (size_t i = 0; i < size; i++)
+ startRefValues[i] =
+ (cl_long)(genrand_int32(d)
+ | (((cl_long)genrand_int32(d) & 0x7fffffffL) << 16));
}
-int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_max(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { 0, 0, NULL, test_atomic_max_result_int, test_atomic_max_gen_int, NULL, test_atomic_max_result_long, test_atomic_max_gen_long, NULL };
+ TestFns set = { 0,
+ 0,
+ NULL,
+ test_atomic_max_result_int,
+ test_atomic_max_gen_int,
+ NULL,
+ test_atomic_max_result_long,
+ test_atomic_max_gen_long,
+ NULL };
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_max_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
- return -1;
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_max_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
+ return -1;
return 0;
}
#pragma mark ---- inc
-const char atom_inc_core[] = " oldValues[tid] = atom_inc( &destMemory[0] );\n";
+const char atom_inc_core[] =
+ " oldValues[tid] = atom_inc( &destMemory[0] );\n";
-const char atomic_inc_core[] = " oldValues[tid] = atomic_inc( &destMemory[0] );\n";
+const char atomic_inc_core[] =
+ " oldValues[tid] = atomic_inc( &destMemory[0] );\n";
-cl_int test_atomic_inc_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_inc_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
return INT_TEST_VALUE + (cl_int)size;
}
-cl_long test_atomic_inc_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_inc_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
return LONG_TEST_VALUE + size;
}
-int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_inc(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_inc_result_int, NULL, NULL, test_atomic_inc_result_long, NULL, NULL };
+ TestFns set = { INT_TEST_VALUE,
+ LONG_TEST_VALUE,
+ NULL,
+ test_atomic_inc_result_int,
+ NULL,
+ NULL,
+ test_atomic_inc_result_long,
+ NULL,
+ NULL };
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_inc_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_inc_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
@@ -852,27 +1079,46 @@
#pragma mark ---- dec
-const char atom_dec_core[] = " oldValues[tid] = atom_dec( &destMemory[0] );\n";
+const char atom_dec_core[] =
+ " oldValues[tid] = atom_dec( &destMemory[0] );\n";
-const char atomic_dec_core[] = " oldValues[tid] = atomic_dec( &destMemory[0] );\n";
+const char atomic_dec_core[] =
+ " oldValues[tid] = atomic_dec( &destMemory[0] );\n";
-cl_int test_atomic_dec_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_dec_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
return INT_TEST_VALUE - (cl_int)size;
}
-cl_long test_atomic_dec_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_dec_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
return LONG_TEST_VALUE - size;
}
-int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_dec(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_dec_result_int, NULL, NULL, test_atomic_dec_result_long, NULL, NULL };
+ TestFns set = { INT_TEST_VALUE,
+ LONG_TEST_VALUE,
+ NULL,
+ test_atomic_dec_result_int,
+ NULL,
+ NULL,
+ test_atomic_dec_result_long,
+ NULL,
+ NULL };
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_dec_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_dec_core, set, false,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
@@ -881,129 +1127,159 @@
#pragma mark ---- cmpxchg
/* We test cmpxchg by implementing (the long way) atom_add */
+// clang-format off
const char atom_cmpxchg_core[] =
-" int oldValue, origValue, newValue;\n"
-" do { \n"
-" origValue = destMemory[0];\n"
-" newValue = origValue + tid + 2;\n"
-" oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
-" } while( oldValue != origValue );\n"
-" oldValues[tid] = oldValue;\n"
-;
+ " int oldValue, origValue, newValue;\n"
+ " do { \n"
+ " origValue = destMemory[0];\n"
+ " newValue = origValue + tid + 2;\n"
+ " oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
+ " } while( oldValue != origValue );\n"
+ " oldValues[tid] = oldValue;\n";
const char atom_cmpxchg64_core[] =
-" long oldValue, origValue, newValue;\n"
-" do { \n"
-" origValue = destMemory[0];\n"
-" newValue = origValue + tid + 2;\n"
-" oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
-" } while( oldValue != origValue );\n"
-" oldValues[tid] = oldValue;\n"
-;
+ " long oldValue, origValue, newValue;\n"
+ " do { \n"
+ " origValue = destMemory[0];\n"
+ " newValue = origValue + tid + 2;\n"
+ " oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
+ " } while( oldValue != origValue );\n"
+ " oldValues[tid] = oldValue;\n";
const char atomic_cmpxchg_core[] =
-" int oldValue, origValue, newValue;\n"
-" do { \n"
-" origValue = destMemory[0];\n"
-" newValue = origValue + tid + 2;\n"
-" oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n"
-" } while( oldValue != origValue );\n"
-" oldValues[tid] = oldValue;\n"
-;
+ " int oldValue, origValue, newValue;\n"
+ " do { \n"
+ " origValue = destMemory[0];\n"
+ " newValue = origValue + tid + 2;\n"
+ " oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n"
+ " } while( oldValue != origValue );\n"
+ " oldValues[tid] = oldValue;\n";
+// clang-format on
-cl_int test_atomic_cmpxchg_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_cmpxchg_result_int(size_t size, cl_int *startRefValues,
+ size_t whichDestValue)
{
cl_int total = INT_TEST_VALUE;
- for( size_t i = 0; i < size; i++ )
- total += (cl_int)i + 2;
+ for (size_t i = 0; i < size; i++) total += (cl_int)i + 2;
return total;
}
-cl_long test_atomic_cmpxchg_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_cmpxchg_result_long(size_t size, cl_long *startRefValues,
+ size_t whichDestValue)
{
cl_long total = LONG_TEST_VALUE;
- for( size_t i = 0; i < size; i++ )
- total += i + 2;
+ for (size_t i = 0; i < size; i++) total += i + 2;
return total;
}
-int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_cmpxchg_result_int, NULL, NULL, test_atomic_cmpxchg_result_long, NULL, NULL };
+ TestFns set = { INT_TEST_VALUE,
+ LONG_TEST_VALUE,
+ NULL,
+ test_atomic_cmpxchg_result_int,
+ NULL,
+ NULL,
+ test_atomic_cmpxchg_result_long,
+ NULL,
+ NULL };
int errors = 0;
log_info(" Testing atom_ functions...\n");
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kInt, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kUInt, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kInt, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kUInt, true );
+ errors |=
+ test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg_core, set, false, false, kInt, true);
+ errors |=
+ test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg_core, set, false, false, kUInt, true);
+ errors |=
+ test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg_core, set, false, true, kInt, true);
+ errors |=
+ test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg_core, set, false, true, kUInt, true);
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kLong, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kULong, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kLong, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kULong, true );
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg64_core, set, false, false,
+ kLong, true);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg64_core, set, false, false,
+ kULong, true);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg64_core, set, false, true, kLong,
+ true);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atom_cmpxchg64_core, set, false, true,
+ kULong, true);
log_info(" Testing atomic_ functions...\n");
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kInt, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kUInt, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kInt, true );
- errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kUInt, true );
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atomic_cmpxchg_core, set, false, false, kInt,
+ true);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atomic_cmpxchg_core, set, false, false,
+ kUInt, true);
+ errors |=
+ test_atomic_function(deviceID, context, queue, num_elements,
+ atomic_cmpxchg_core, set, false, true, kInt, true);
+ errors |= test_atomic_function(deviceID, context, queue, num_elements,
+ atomic_cmpxchg_core, set, false, true, kUInt,
+ true);
- if( errors )
- return -1;
+ if (errors) return -1;
return 0;
}
#pragma mark -------- Bitwise functions
-size_t test_bitwise_num_results( size_t threadCount, ExplicitType dataType )
+size_t test_bitwise_num_results(size_t threadCount, ExplicitType dataType)
{
- size_t numBits = get_explicit_type_size( dataType ) * 8;
+ size_t numBits = get_explicit_type_size(dataType) * 8;
- return ( threadCount + numBits - 1 ) / numBits;
+ return (threadCount + numBits - 1) / numBits;
}
#pragma mark ---- and
+// clang-format off
const char atom_and_core[] =
-" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-" int whichResult = tid / numBits;\n"
-" int bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-" oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
-;
+ " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+ " int whichResult = tid / numBits;\n"
+ " int bitIndex = tid - ( whichResult * numBits );\n"
+ "\n"
+ " oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n";
const char atomic_and_core[] =
-" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-" int whichResult = tid / numBits;\n"
-" int bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-" oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
-;
+ " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+ " int whichResult = tid / numBits;\n"
+ " int bitIndex = tid - ( whichResult * numBits );\n"
+ "\n"
+ " oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n";
+// clang-format on
-cl_int test_atomic_and_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
+cl_int test_atomic_and_result_int(size_t size, cl_int *startRefValues,
+ size_t whichResult)
{
- size_t numThreads = ( (size_t)size + 31 ) / 32;
- if( whichResult < numThreads - 1 )
- return 0;
+ size_t numThreads = ((size_t)size + 31) / 32;
+ if (whichResult < numThreads - 1) return 0;
// Last item doesn't get and'ed on every bit, so we have to mask away
size_t numBits = (size_t)size - whichResult * 32;
cl_int bits = (cl_int)0xffffffffL;
- for( size_t i = 0; i < numBits; i++ )
- bits &= ~( 1 << i );
+ for (size_t i = 0; i < numBits; i++) bits &= ~(1 << i);
return bits;
}
-cl_long test_atomic_and_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
+cl_long test_atomic_and_result_long(size_t size, cl_long *startRefValues,
+ size_t whichResult)
{
- size_t numThreads = ( (size_t)size + 63 ) / 64;
- if( whichResult < numThreads - 1 )
- return 0;
+ size_t numThreads = ((size_t)size + 63) / 64;
+ if (whichResult < numThreads - 1) return 0;
// Last item doesn't get and'ed on every bit, so we have to mask away
size_t numBits = (size_t)size - whichResult * 64;
@@ -1013,14 +1289,28 @@
return bits;
}
-int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_and(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { 0xffffffff, 0xffffffffffffffffLL, test_bitwise_num_results,
- test_atomic_and_result_int, NULL, NULL, test_atomic_and_result_long, NULL, NULL };
+ TestFns set = { 0xffffffff,
+ 0xffffffffffffffffLL,
+ test_bitwise_num_results,
+ test_atomic_and_result_int,
+ NULL,
+ NULL,
+ test_atomic_and_result_long,
+ NULL,
+ NULL };
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_and_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_and_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
@@ -1028,59 +1318,68 @@
#pragma mark ---- or
+// clang-format off
const char atom_or_core[] =
-" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-" int whichResult = tid / numBits;\n"
-" int bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-" oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
-;
+ " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+ " int whichResult = tid / numBits;\n"
+ " int bitIndex = tid - ( whichResult * numBits );\n"
+ "\n"
+ " oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n";
const char atomic_or_core[] =
-" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-" int whichResult = tid / numBits;\n"
-" int bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-" oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
-;
+ " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+ " int whichResult = tid / numBits;\n"
+ " int bitIndex = tid - ( whichResult * numBits );\n"
+ "\n"
+ " oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n";
+// clang-format on
-cl_int test_atomic_or_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
+cl_int test_atomic_or_result_int(size_t size, cl_int *startRefValues,
+ size_t whichResult)
{
- size_t numThreads = ( (size_t)size + 31 ) / 32;
- if( whichResult < numThreads - 1 )
- return 0xffffffff;
+ size_t numThreads = ((size_t)size + 31) / 32;
+ if (whichResult < numThreads - 1) return 0xffffffff;
// Last item doesn't get and'ed on every bit, so we have to mask away
size_t numBits = (size_t)size - whichResult * 32;
cl_int bits = 0;
- for( size_t i = 0; i < numBits; i++ )
- bits |= ( 1 << i );
+ for (size_t i = 0; i < numBits; i++) bits |= (1 << i);
return bits;
}
-cl_long test_atomic_or_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
+cl_long test_atomic_or_result_long(size_t size, cl_long *startRefValues,
+ size_t whichResult)
{
- size_t numThreads = ( (size_t)size + 63 ) / 64;
- if( whichResult < numThreads - 1 )
- return 0x0ffffffffffffffffLL;
+ size_t numThreads = ((size_t)size + 63) / 64;
+ if (whichResult < numThreads - 1) return 0x0ffffffffffffffffLL;
// Last item doesn't get and'ed on every bit, so we have to mask away
size_t numBits = (size_t)size - whichResult * 64;
cl_long bits = 0;
- for( size_t i = 0; i < numBits; i++ )
- bits |= ( 1LL << i );
+ for (size_t i = 0; i < numBits; i++) bits |= (1LL << i);
return bits;
}
-int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_or(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { 0, 0LL, test_bitwise_num_results, test_atomic_or_result_int, NULL, NULL, test_atomic_or_result_long, NULL, NULL };
+ TestFns set = {
+ 0, 0LL, test_bitwise_num_results, test_atomic_or_result_int,
+ NULL, NULL, test_atomic_or_result_long, NULL,
+ NULL
+ };
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_or_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_or_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
@@ -1100,33 +1399,44 @@
"\n"
" oldValues[tid] = atomic_xor( &destMemory[0], 1L << bitIndex );\n";
-cl_int test_atomic_xor_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
+cl_int test_atomic_xor_result_int(size_t size, cl_int *startRefValues,
+ size_t whichResult)
{
cl_int total = 0x2f08ab41;
- for( size_t i = 0; i < size; i++ )
- total ^= ( 1 << ( i & 31 ) );
+ for (size_t i = 0; i < size; i++) total ^= (1 << (i & 31));
return total;
}
-cl_long test_atomic_xor_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
+cl_long test_atomic_xor_result_long(size_t size, cl_long *startRefValues,
+ size_t whichResult)
{
cl_long total = 0x2f08ab418ba0541LL;
- for( size_t i = 0; i < size; i++ )
- total ^= ( 1LL << ( i & 63 ) );
+ for (size_t i = 0; i < size; i++) total ^= (1LL << (i & 63));
return total;
}
-int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_xor(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- TestFns set = { 0x2f08ab41, 0x2f08ab418ba0541LL, NULL, test_atomic_xor_result_int, NULL, NULL, test_atomic_xor_result_long, NULL, NULL };
+ TestFns set = { 0x2f08ab41,
+ 0x2f08ab418ba0541LL,
+ NULL,
+ test_atomic_xor_result_int,
+ NULL,
+ NULL,
+ test_atomic_xor_result_long,
+ NULL,
+ NULL };
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atom_xor_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+ != 0)
return -1;
- if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
+ if (test_atomic_function_set(
+ deviceID, context, queue, num_elements, atomic_xor_core, set, true,
+ /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+ != 0)
return -1;
return 0;
}
-
-
-
-
diff --git a/test_conformance/atomics/test_indexed_cases.cpp b/test_conformance/atomics/test_indexed_cases.cpp
index b85e3d2..2bba3e2 100644
--- a/test_conformance/atomics/test_indexed_cases.cpp
+++ b/test_conformance/atomics/test_indexed_cases.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -16,48 +16,55 @@
#include "testBase.h"
#include "harness/conversions.h"
-const char * atomic_index_source =
-"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
-"// Counter keeps track of which index in counts we are using.\n"
-"// We get that value, increment it, and then set that index in counts to our thread ID.\n"
-"// At the end of this we should have all thread IDs in some random location in counts\n"
-"// exactly once. If atom_add failed then we will write over various thread IDs and we\n"
-"// will be missing some.\n"
-"\n"
-"__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
-" int tid = get_global_id(0);\n"
-" \n"
-" int counter_to_use = atom_add(counter, 1);\n"
-" counts[counter_to_use] = tid;\n"
-"}";
+// clang-format off
+const char *atomic_index_source =
+ "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
+ "// Counter keeps track of which index in counts we are using.\n"
+ "// We get that value, increment it, and then set that index in counts to our thread ID.\n"
+ "// At the end of this we should have all thread IDs in some random location in counts\n"
+ "// exactly once. If atom_add failed then we will write over various thread IDs and we\n"
+ "// will be missing some.\n"
+ "\n"
+ "__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
+ " int tid = get_global_id(0);\n"
+ " \n"
+ " int counter_to_use = atom_add(counter, 1);\n"
+ " counts[counter_to_use] = tid;\n"
+ "}";
+// clang-format on
-int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_add_index(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper counter, counters;
size_t numGlobalThreads, numLocalThreads;
- int fail = 0, succeed = 0, err;
+ int fail = 0, err;
- /* Check if atomics are supported. */
- if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
- log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n");
- return 0;
- }
+ /* Check if atomics are supported. */
+ if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
+ {
+ log_info("Base atomics not supported "
+ "(cl_khr_global_int32_base_atomics). Skipping test.\n");
+ return 0;
+ }
//===== add_index test
// The index test replicates what particles does.
- // It uses one memory location to keep track of the current index and then each thread
- // does an atomic add to it to get its new location. The threads then write to their
- // assigned location. At the end we check to make sure that each thread's ID shows up
- // exactly once in the output.
+ // It uses one memory location to keep track of the current index and then
+ // each thread does an atomic add to it to get its new location. The threads
+ // then write to their assigned location. At the end we check to make sure
+ // that each thread's ID shows up exactly once in the output.
numGlobalThreads = 2048;
- if( create_single_kernel_helper( context, &program, &kernel, 1, &atomic_index_source, "add_index_test" ) )
+ if (create_single_kernel_helper(context, &program, &kernel, 1,
+ &atomic_index_source, "add_index_test"))
return -1;
- if( get_max_common_work_group_size( context, kernel, numGlobalThreads, &numLocalThreads ) )
+ if (get_max_common_work_group_size(context, kernel, numGlobalThreads,
+ &numLocalThreads))
return -1;
log_info("Execute global_threads:%d local_threads:%d\n",
@@ -72,103 +79,148 @@
sizeof(cl_int) * numGlobalThreads, NULL, NULL);
// Reset all those locations to -1 to indciate they have not been used.
- cl_int *values = (cl_int*) malloc(sizeof(cl_int)*numGlobalThreads);
- if (values == NULL) {
- log_error("add_index_test FAILED to allocate memory for initial values.\n");
- fail = 1; succeed = -1;
- } else {
+ cl_int *values = (cl_int *)malloc(sizeof(cl_int) * numGlobalThreads);
+ if (values == NULL)
+ {
+ log_error(
+ "add_index_test FAILED to allocate memory for initial values.\n");
+ fail = 1;
+ }
+ else
+ {
memset(values, -1, numLocalThreads);
- unsigned int i=0;
- for (i=0; i<numGlobalThreads; i++)
- values[i] = -1;
- int init=0;
- err = clEnqueueWriteBuffer(queue, counters, true, 0, numGlobalThreads*sizeof(cl_int), values, 0, NULL, NULL);
- err |= clEnqueueWriteBuffer(queue, counter, true, 0,1*sizeof(cl_int), &init, 0, NULL, NULL);
- if (err) {
- log_error("add_index_test FAILED to write initial values to arrays: %d\n", err);
- fail=1; succeed=-1;
- } else {
+ unsigned int i = 0;
+ for (i = 0; i < numGlobalThreads; i++) values[i] = -1;
+ int init = 0;
+ err = clEnqueueWriteBuffer(queue, counters, true, 0,
+ numGlobalThreads * sizeof(cl_int), values, 0,
+ NULL, NULL);
+ err |= clEnqueueWriteBuffer(queue, counter, true, 0, 1 * sizeof(cl_int),
+ &init, 0, NULL, NULL);
+ if (err)
+ {
+ log_error(
+ "add_index_test FAILED to write initial values to arrays: %d\n",
+ err);
+ fail = 1;
+ }
+ else
+ {
err = clSetKernelArg(kernel, 0, sizeof(counter), &counter);
err |= clSetKernelArg(kernel, 1, sizeof(counters), &counters);
- if (err) {
- log_error("add_index_test FAILED to set kernel arguments: %d\n", err);
- fail=1; succeed=-1;
- } else {
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numGlobalThreads, &numLocalThreads, 0, NULL, NULL );
- if (err) {
- log_error("add_index_test FAILED to execute kernel: %d\n", err);
- fail=1; succeed=-1;
- } else {
- err = clEnqueueReadBuffer( queue, counters, true, 0, sizeof(cl_int)*numGlobalThreads, values, 0, NULL, NULL );
- if (err) {
- log_error("add_index_test FAILED to read back results: %d\n", err);
- fail = 1; succeed=-1;
- } else {
+ if (err)
+ {
+ log_error("add_index_test FAILED to set kernel arguments: %d\n",
+ err);
+ fail = 1;
+ }
+ else
+ {
+ err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
+ &numGlobalThreads,
+ &numLocalThreads, 0, NULL, NULL);
+ if (err)
+ {
+ log_error("add_index_test FAILED to execute kernel: %d\n",
+ err);
+ fail = 1;
+ }
+ else
+ {
+ err = clEnqueueReadBuffer(queue, counters, true, 0,
+ sizeof(cl_int) * numGlobalThreads,
+ values, 0, NULL, NULL);
+ if (err)
+ {
+ log_error(
+ "add_index_test FAILED to read back results: %d\n",
+ err);
+ fail = 1;
+ }
+ else
+ {
unsigned int looking_for, index;
- for (looking_for=0; looking_for<numGlobalThreads; looking_for++) {
- int instances_found=0;
- for (index=0; index<numGlobalThreads; index++) {
- if (values[index]==(int)looking_for)
+ for (looking_for = 0; looking_for < numGlobalThreads;
+ looking_for++)
+ {
+ int instances_found = 0;
+ for (index = 0; index < numGlobalThreads; index++)
+ {
+ if (values[index] == (int)looking_for)
instances_found++;
}
- if (instances_found != 1) {
- log_error("add_index_test FAILED: wrong number of instances (%d!=1) for counter %d.\n", instances_found, looking_for);
- fail = 1; succeed=-1;
+ if (instances_found != 1)
+ {
+ log_error(
+ "add_index_test FAILED: wrong number of "
+ "instances (%d!=1) for counter %d.\n",
+ instances_found, looking_for);
+ fail = 1;
}
}
}
}
}
}
- if (!fail) {
- log_info("add_index_test passed. Each thread used exactly one index.\n");
+ if (!fail)
+ {
+ log_info(
+ "add_index_test passed. Each thread used exactly one index.\n");
}
free(values);
}
return fail;
}
+// clang-format off
const char *add_index_bin_kernel[] = {
-"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
-"// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n"
-"// using an atomic add to keep track of the current location to write into in each bin.\n"
-"// This is the same as the memory update for the particles demo.\n"
-"\n"
-"__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" int location = bin_assignments[tid];\n"
-" int counter = atom_add(&bin_counters[location], 1);\n"
-" bins[location*max_counts_per_bin + counter] = tid;\n"
-"}" };
+ "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
+ "// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n"
+ "// using an atomic add to keep track of the current location to write into in each bin.\n"
+ "// This is the same as the memory update for the particles demo.\n"
+ "\n"
+ "__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
+ " int tid = get_global_id(0);\n"
+ "\n"
+ " int location = bin_assignments[tid];\n"
+ " int counter = atom_add(&bin_counters[location], 1);\n"
+ " bins[location*max_counts_per_bin + counter] = tid;\n"
+ "}" };
+// clang-format on
-// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel
-// using an atomic add to keep track of the current location to write into in each bin.
-// This is the same as the memory update for the particles demo.
-int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_context context, MTdata d)
+// This test assigns a bunch of values to bins and then tries to put them in the
+// bins in parallel using an atomic add to keep track of the current location to
+// write into in each bin. This is the same as the memory update for the
+// particles demo.
+int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
+ cl_context context, MTdata d)
{
int number_of_items = (int)global_threads[0];
size_t local_threads[1];
int divisor = 12;
- int number_of_bins = number_of_items/divisor;
- int max_counts_per_bin = divisor*2;
+ int number_of_bins = number_of_items / divisor;
+ int max_counts_per_bin = divisor * 2;
int fail = 0;
- int succeed = 0;
int err;
clProgramWrapper program;
clKernelWrapper kernel;
- // log_info("add_index_bin_test: %d items, into %d bins, with a max of %d items per bin (bins is %d long).\n",
- // number_of_items, number_of_bins, max_counts_per_bin, number_of_bins*max_counts_per_bin);
+ // log_info("add_index_bin_test: %d items, into %d bins, with a max of %d
+ // items per bin (bins is %d long).\n",
+ // number_of_items, number_of_bins, max_counts_per_bin,
+ // number_of_bins*max_counts_per_bin);
//===== add_index_bin test
// The index test replicates what particles does.
- err = create_single_kernel_helper(context, &program, &kernel, 1, add_index_bin_kernel, "add_index_bin_test" );
- test_error( err, "Unable to create testing kernel" );
+ err =
+ create_single_kernel_helper(context, &program, &kernel, 1,
+ add_index_bin_kernel, "add_index_bin_test");
+ test_error(err, "Unable to create testing kernel");
- if( get_max_common_work_group_size( context, kernel, global_threads[0], &local_threads[0] ) )
+ if (get_max_common_work_group_size(context, kernel, global_threads[0],
+ &local_threads[0]))
return -1;
log_info("Execute global_threads:%d local_threads:%d\n",
@@ -185,152 +237,228 @@
clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(cl_int) * number_of_items, NULL, NULL);
- if (bin_counters == NULL) {
+ if (bin_counters == NULL)
+ {
log_error("add_index_bin_test FAILED to allocate bin_counters.\n");
return -1;
}
- if (bins == NULL) {
+ if (bins == NULL)
+ {
log_error("add_index_bin_test FAILED to allocate bins.\n");
return -1;
}
- if (bin_assignments == NULL) {
+ if (bin_assignments == NULL)
+ {
log_error("add_index_bin_test FAILED to allocate bin_assignments.\n");
return -1;
}
// Initialize our storage
- cl_int *l_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
- if (!l_bin_counts) {
- log_error("add_index_bin_test FAILED to allocate initial values for bin_counters.\n");
+ cl_int *l_bin_counts = (cl_int *)malloc(sizeof(cl_int) * number_of_bins);
+ if (!l_bin_counts)
+ {
+ log_error("add_index_bin_test FAILED to allocate initial values for "
+ "bin_counters.\n");
return -1;
}
int i;
- for (i=0; i<number_of_bins; i++)
- l_bin_counts[i] = 0;
- err = clEnqueueWriteBuffer(queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, l_bin_counts, 0, NULL, NULL);
- if (err) {
- log_error("add_index_bin_test FAILED to set initial values for bin_counters: %d\n", err);
+ for (i = 0; i < number_of_bins; i++) l_bin_counts[i] = 0;
+ err = clEnqueueWriteBuffer(queue, bin_counters, true, 0,
+ sizeof(cl_int) * number_of_bins, l_bin_counts, 0,
+ NULL, NULL);
+ if (err)
+ {
+ log_error("add_index_bin_test FAILED to set initial values for "
+ "bin_counters: %d\n",
+ err);
return -1;
}
- cl_int *values = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
- if (!values) {
- log_error("add_index_bin_test FAILED to allocate initial values for bins.\n");
+ cl_int *values =
+ (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin);
+ if (!values)
+ {
+ log_error(
+ "add_index_bin_test FAILED to allocate initial values for bins.\n");
return -1;
}
- for (i=0; i<number_of_bins*max_counts_per_bin; i++)
- values[i] = -1;
- err = clEnqueueWriteBuffer(queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, values, 0, NULL, NULL);
- if (err) {
- log_error("add_index_bin_test FAILED to set initial values for bins: %d\n", err);
+ for (i = 0; i < number_of_bins * max_counts_per_bin; i++) values[i] = -1;
+ err = clEnqueueWriteBuffer(queue, bins, true, 0,
+ sizeof(cl_int) * number_of_bins
+ * max_counts_per_bin,
+ values, 0, NULL, NULL);
+ if (err)
+ {
+ log_error(
+ "add_index_bin_test FAILED to set initial values for bins: %d\n",
+ err);
return -1;
}
free(values);
- cl_int *l_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_items);
- if (!l_bin_assignments) {
- log_error("add_index_bin_test FAILED to allocate initial values for l_bin_assignments.\n");
+ cl_int *l_bin_assignments =
+ (cl_int *)malloc(sizeof(cl_int) * number_of_items);
+ if (!l_bin_assignments)
+ {
+ log_error("add_index_bin_test FAILED to allocate initial values for "
+ "l_bin_assignments.\n");
return -1;
}
- for (i=0; i<number_of_items; i++) {
- int bin = random_in_range(0, number_of_bins-1, d);
- while (l_bin_counts[bin] >= max_counts_per_bin) {
- bin = random_in_range(0, number_of_bins-1, d);
+ for (i = 0; i < number_of_items; i++)
+ {
+ int bin = random_in_range(0, number_of_bins - 1, d);
+ while (l_bin_counts[bin] >= max_counts_per_bin)
+ {
+ bin = random_in_range(0, number_of_bins - 1, d);
}
if (bin >= number_of_bins)
- log_error("add_index_bin_test internal error generating bin assignments: bin %d >= number_of_bins %d.\n", bin, number_of_bins);
- if (l_bin_counts[bin]+1 > max_counts_per_bin)
- log_error("add_index_bin_test internal error generating bin assignments: bin %d has more entries (%d) than max_counts_per_bin (%d).\n", bin, l_bin_counts[bin], max_counts_per_bin);
+ log_error("add_index_bin_test internal error generating bin "
+ "assignments: bin %d >= number_of_bins %d.\n",
+ bin, number_of_bins);
+ if (l_bin_counts[bin] + 1 > max_counts_per_bin)
+ log_error(
+ "add_index_bin_test internal error generating bin assignments: "
+ "bin %d has more entries (%d) than max_counts_per_bin (%d).\n",
+ bin, l_bin_counts[bin], max_counts_per_bin);
l_bin_counts[bin]++;
l_bin_assignments[i] = bin;
- // log_info("item %d assigned to bin %d (%d items)\n", i, bin, l_bin_counts[bin]);
+ // log_info("item %d assigned to bin %d (%d items)\n", i, bin,
+ // l_bin_counts[bin]);
}
- err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, sizeof(cl_int)*number_of_items, l_bin_assignments, 0, NULL, NULL);
- if (err) {
- log_error("add_index_bin_test FAILED to set initial values for bin_assignments: %d\n", err);
+ err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0,
+ sizeof(cl_int) * number_of_items,
+ l_bin_assignments, 0, NULL, NULL);
+ if (err)
+ {
+ log_error("add_index_bin_test FAILED to set initial values for "
+ "bin_assignments: %d\n",
+ err);
return -1;
}
// Setup the kernel
err = clSetKernelArg(kernel, 0, sizeof(bin_counters), &bin_counters);
err |= clSetKernelArg(kernel, 1, sizeof(bins), &bins);
err |= clSetKernelArg(kernel, 2, sizeof(bin_assignments), &bin_assignments);
- err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin), &max_counts_per_bin);
- if (err) {
- log_error("add_index_bin_test FAILED to set kernel arguments: %d\n", err);
- fail=1; succeed=-1;
+ err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin),
+ &max_counts_per_bin);
+ if (err)
+ {
+ log_error("add_index_bin_test FAILED to set kernel arguments: %d\n",
+ err);
+ fail = 1;
return -1;
}
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
- if (err) {
+ err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads,
+ local_threads, 0, NULL, NULL);
+ if (err)
+ {
log_error("add_index_bin_test FAILED to execute kernel: %d\n", err);
- fail=1; succeed=-1;
+ fail = 1;
}
- cl_int *final_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
- if (!final_bin_assignments) {
- log_error("add_index_bin_test FAILED to allocate initial values for final_bin_assignments.\n");
+ cl_int *final_bin_assignments =
+ (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin);
+ if (!final_bin_assignments)
+ {
+ log_error("add_index_bin_test FAILED to allocate initial values for "
+ "final_bin_assignments.\n");
return -1;
}
- err = clEnqueueReadBuffer( queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, final_bin_assignments, 0, NULL, NULL );
- if (err) {
+ err = clEnqueueReadBuffer(queue, bins, true, 0,
+ sizeof(cl_int) * number_of_bins
+ * max_counts_per_bin,
+ final_bin_assignments, 0, NULL, NULL);
+ if (err)
+ {
log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
- fail = 1; succeed=-1;
+ fail = 1;
}
- cl_int *final_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
- if (!final_bin_counts) {
- log_error("add_index_bin_test FAILED to allocate initial values for final_bin_counts.\n");
+ cl_int *final_bin_counts =
+ (cl_int *)malloc(sizeof(cl_int) * number_of_bins);
+ if (!final_bin_counts)
+ {
+ log_error("add_index_bin_test FAILED to allocate initial values for "
+ "final_bin_counts.\n");
return -1;
}
- err = clEnqueueReadBuffer( queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, final_bin_counts, 0, NULL, NULL );
- if (err) {
- log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", err);
- fail = 1; succeed=-1;
+ err = clEnqueueReadBuffer(queue, bin_counters, true, 0,
+ sizeof(cl_int) * number_of_bins, final_bin_counts,
+ 0, NULL, NULL);
+ if (err)
+ {
+ log_error("add_index_bin_test FAILED to read back bin_counters: %d\n",
+ err);
+ fail = 1;
}
// Verification.
- int errors=0;
+ int errors = 0;
int current_bin;
int search;
// Print out all the contents of the bins.
// for (current_bin=0; current_bin<number_of_bins; current_bin++)
// for (search=0; search<max_counts_per_bin; search++)
- // log_info("[bin %d, entry %d] = %d\n", current_bin, search, final_bin_assignments[current_bin*max_counts_per_bin+search]);
+ // log_info("[bin %d, entry %d] = %d\n", current_bin, search,
+ // final_bin_assignments[current_bin*max_counts_per_bin+search]);
// First verify that there are the correct number in each bin.
- for (current_bin=0; current_bin<number_of_bins; current_bin++) {
+ for (current_bin = 0; current_bin < number_of_bins; current_bin++)
+ {
int expected_number = l_bin_counts[current_bin];
int actual_number = final_bin_counts[current_bin];
- if (expected_number != actual_number) {
- log_error("add_index_bin_test FAILED: bin %d reported %d entries when %d were expected.\n", current_bin, actual_number, expected_number);
+ if (expected_number != actual_number)
+ {
+ log_error("add_index_bin_test FAILED: bin %d reported %d entries "
+ "when %d were expected.\n",
+ current_bin, actual_number, expected_number);
errors++;
}
- for (search=0; search<expected_number; search++) {
- if (final_bin_assignments[current_bin*max_counts_per_bin+search] == -1) {
- log_error("add_index_bin_test FAILED: bin %d had no entry at position %d when it should have had %d entries.\n", current_bin, search, expected_number);
+ for (search = 0; search < expected_number; search++)
+ {
+ if (final_bin_assignments[current_bin * max_counts_per_bin + search]
+ == -1)
+ {
+ log_error("add_index_bin_test FAILED: bin %d had no entry at "
+ "position %d when it should have had %d entries.\n",
+ current_bin, search, expected_number);
errors++;
}
}
- for (search=expected_number; search<max_counts_per_bin; search++) {
- if (final_bin_assignments[current_bin*max_counts_per_bin+search] != -1) {
- log_error("add_index_bin_test FAILED: bin %d had an extra entry at position %d when it should have had only %d entries.\n", current_bin, search, expected_number);
+ for (search = expected_number; search < max_counts_per_bin; search++)
+ {
+ if (final_bin_assignments[current_bin * max_counts_per_bin + search]
+ != -1)
+ {
+ log_error(
+ "add_index_bin_test FAILED: bin %d had an extra entry at "
+ "position %d when it should have had only %d entries.\n",
+ current_bin, search, expected_number);
errors++;
}
}
}
// Now verify that the correct ones are in each bin
int index;
- for (index=0; index<number_of_items; index++) {
+ for (index = 0; index < number_of_items; index++)
+ {
int expected_bin = l_bin_assignments[index];
int found_it = 0;
- for (search=0; search<l_bin_counts[expected_bin]; search++) {
- if (final_bin_assignments[expected_bin*max_counts_per_bin+search] == index) {
+ for (search = 0; search < l_bin_counts[expected_bin]; search++)
+ {
+ if (final_bin_assignments[expected_bin * max_counts_per_bin
+ + search]
+ == index)
+ {
found_it = 1;
}
}
- if (found_it == 0) {
- log_error("add_index_bin_test FAILED: did not find item %d in bin %d.\n", index, expected_bin);
+ if (found_it == 0)
+ {
+ log_error(
+ "add_index_bin_test FAILED: did not find item %d in bin %d.\n",
+ index, expected_bin);
errors++;
}
}
@@ -341,41 +469,49 @@
clReleaseMemObject(bin_counters);
clReleaseMemObject(bins);
clReleaseMemObject(bin_assignments);
- if (errors == 0) {
- log_info("add_index_bin_test passed. Each item was put in the correct bin in parallel.\n");
+ if (errors == 0)
+ {
+ log_info("add_index_bin_test passed. Each item was put in the correct "
+ "bin in parallel.\n");
return 0;
- } else {
+ }
+ else
+ {
log_error("add_index_bin_test FAILED: %d errors.\n", errors);
return -1;
}
}
-int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
//===== add_index_bin test
size_t numGlobalThreads = 2048;
- int iteration=0;
+ int iteration = 0;
int err, failed = 0;
- MTdata d = init_genrand( gRandomSeed );
+ MTdata d = init_genrand(gRandomSeed);
- /* Check if atomics are supported. */
- if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
- log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n");
- free_mtdata( d );
- return 0;
- }
+ /* Check if atomics are supported. */
+ if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
+ {
+ log_info("Base atomics not supported "
+ "(cl_khr_global_int32_base_atomics). Skipping test.\n");
+ free_mtdata(d);
+ return 0;
+ }
- for(iteration=0; iteration<10; iteration++) {
- log_info("add_index_bin_test with %d elements:\n", (int)numGlobalThreads);
- err = add_index_bin_test(&numGlobalThreads, queue, context, d);
- if (err) {
+ for (iteration = 0; iteration < 10; iteration++)
+ {
+ log_info("add_index_bin_test with %d elements:\n",
+ (int)numGlobalThreads);
+ err = add_index_bin_test(&numGlobalThreads, queue, context, d);
+ if (err)
+ {
failed++;
break;
}
- numGlobalThreads*=2;
+ numGlobalThreads *= 2;
}
- free_mtdata( d );
+ free_mtdata(d);
return failed;
}
-
-
diff --git a/test_conformance/basic/test_async_copy2D.cpp b/test_conformance/basic/test_async_copy2D.cpp
index 54633a3..bf3f155 100644
--- a/test_conformance/basic/test_async_copy2D.cpp
+++ b/test_conformance/basic/test_async_copy2D.cpp
@@ -53,7 +53,7 @@
for (int i = 0; i < lineCopiesPerWorkItem; i++) {
for (int j = 0; j < numElementsPerLine; j++) {
- const local_index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
+ const int local_index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
const int global_index = (get_global_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
dst[global_index] = localBuffer[local_index];
}
diff --git a/test_conformance/basic/test_enqueued_local_size.cpp b/test_conformance/basic/test_enqueued_local_size.cpp
index 91fe143..ea95df6 100644
--- a/test_conformance/basic/test_enqueued_local_size.cpp
+++ b/test_conformance/basic/test_enqueued_local_size.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -26,32 +26,33 @@
#include "procs.h"
-static const char *enqueued_local_size_2d_code =
-"__kernel void test_enqueued_local_size_2d(global int *dst)\n"
-"{\n"
-" if ((get_global_id(0) == 0) && (get_global_id(1) == 0))\n"
-" {\n"
-" dst[0] = (int)get_enqueued_local_size(0)\n;"
-" dst[1] = (int)get_enqueued_local_size(1)\n;"
-" }\n"
-"}\n";
+static const char *enqueued_local_size_2d_code = R"(
+__kernel void test_enqueued_local_size_2d(global int *dst)
+{
+ if ((get_global_id(0) == 0) && (get_global_id(1) == 0))
+ {
+ dst[0] = (int)get_enqueued_local_size(0);
+ dst[1] = (int)get_enqueued_local_size(1);
+ }
+}
+)";
-static const char *enqueued_local_size_1d_code =
-"__kernel void test_enqueued_local_size_1d(global int *dst)\n"
-"{\n"
-" int tid_x = get_global_id(0);\n"
-" if (get_global_id(0) == 0)\n"
-" {\n"
-" dst[tid_x] = (int)get_enqueued_local_size(0)\n;"
-" }\n"
-"}\n";
+static const char *enqueued_local_size_1d_code = R"(
+__kernel void test_enqueued_local_size_1d(global int *dst)
+{
+ int tid_x = get_global_id(0);
+ if (get_global_id(0) == 0)
+ {
+ dst[tid_x] = (int)get_enqueued_local_size(0);
+ }
+}
+)";
-static int
-verify_enqueued_local_size(int *result, size_t *expected, int n)
+static int verify_enqueued_local_size(int *result, size_t *expected, int n)
{
int i;
- for (i=0; i<n; i++)
+ for (i = 0; i < n; i++)
{
if (result[i] != (int)expected[i])
{
@@ -64,14 +65,14 @@
}
-int
-test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_enqueued_local_size(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- cl_mem streams;
- cl_program program[2];
- cl_kernel kernel[2];
+ clMemWrapper stream;
+ clProgramWrapper program[2];
+ clKernelWrapper kernel[2];
- int *output_ptr;
+ cl_int output_ptr[2];
size_t globalsize[2];
size_t localsize[2];
int err;
@@ -97,34 +98,33 @@
}
}
- output_ptr = (int*)malloc(2 * sizeof(int));
-
- streams =
- clCreateBuffer(context, CL_MEM_READ_WRITE, 2 * sizeof(int), NULL, &err);
- test_error( err, "clCreateBuffer failed.");
+ stream = clCreateBuffer(context, CL_MEM_READ_WRITE, 2 * sizeof(cl_int),
+ nullptr, &err);
+ test_error(err, "clCreateBuffer failed.");
std::string cl_std = "-cl-std=CL";
cl_std += (get_device_cl_version(device) == Version(3, 0)) ? "3.0" : "2.0";
err = create_single_kernel_helper_with_build_options(
context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code,
"test_enqueued_local_size_1d", cl_std.c_str());
- test_error( err, "create_single_kernel_helper failed");
+ test_error(err, "create_single_kernel_helper failed");
err = create_single_kernel_helper_with_build_options(
context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code,
"test_enqueued_local_size_2d", cl_std.c_str());
- test_error( err, "create_single_kernel_helper failed");
+ test_error(err, "create_single_kernel_helper failed");
- err = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);
- test_error( err, "clSetKernelArgs failed.");
- err = clSetKernelArg(kernel[1], 0, sizeof streams, &streams);
- test_error( err, "clSetKernelArgs failed.");
+ err = clSetKernelArg(kernel[0], 0, sizeof stream, &stream);
+ test_error(err, "clSetKernelArgs failed.");
+ err = clSetKernelArg(kernel[1], 0, sizeof stream, &stream);
+ test_error(err, "clSetKernelArgs failed.");
- globalsize[0] = (size_t)num_elements;
- globalsize[1] = (size_t)num_elements;
+ globalsize[0] = static_cast<size_t>(num_elements);
+ globalsize[1] = static_cast<size_t>(num_elements);
size_t max_wgs;
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_wgs), &max_wgs, NULL);
- test_error( err, "clGetDeviceInfo failed.");
+ err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+ sizeof(max_wgs), &max_wgs, nullptr);
+ test_error(err, "clGetDeviceInfo failed.");
localsize[0] = std::min<size_t>(16, max_wgs);
localsize[1] = std::min<size_t>(11, max_wgs / localsize[0]);
@@ -143,35 +143,31 @@
}
}
- err = clEnqueueNDRangeKernel(queue, kernel[1], 2, NULL, globalsize, localsize, 0, NULL, NULL);
- test_error( err, "clEnqueueNDRangeKernel failed.");
+ err = clEnqueueNDRangeKernel(queue, kernel[1], 2, nullptr, globalsize,
+ localsize, 0, nullptr, nullptr);
+ test_error(err, "clEnqueueNDRangeKernel failed.");
- err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL);
- test_error( err, "clEnqueueReadBuffer failed.");
+ err = clEnqueueReadBuffer(queue, stream, CL_BLOCKING, 0, 2 * sizeof(int),
+ output_ptr, 0, nullptr, nullptr);
+ test_error(err, "clEnqueueReadBuffer failed.");
err = verify_enqueued_local_size(output_ptr, localsize, 2);
- globalsize[0] = (size_t)num_elements;
+ globalsize[0] = static_cast<size_t>(num_elements);
localsize[0] = 9;
if (use_uniform_work_groups && (globalsize[0] % localsize[0]))
{
globalsize[0] += (localsize[0] - (globalsize[0] % localsize[0]));
}
- err = clEnqueueNDRangeKernel(queue, kernel[1], 1, NULL, globalsize, localsize, 0, NULL, NULL);
- test_error( err, "clEnqueueNDRangeKernel failed.");
+ err = clEnqueueNDRangeKernel(queue, kernel[1], 1, nullptr, globalsize,
+ localsize, 0, nullptr, nullptr);
+ test_error(err, "clEnqueueNDRangeKernel failed.");
- err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL);
- test_error( err, "clEnqueueReadBuffer failed.");
+ err = clEnqueueReadBuffer(queue, stream, CL_BLOCKING, 0, 2 * sizeof(int),
+ output_ptr, 0, nullptr, nullptr);
+ test_error(err, "clEnqueueReadBuffer failed.");
err = verify_enqueued_local_size(output_ptr, localsize, 1);
- // cleanup
- clReleaseMemObject(streams);
- clReleaseKernel(kernel[0]);
- clReleaseKernel(kernel[1]);
- clReleaseProgram(program[0]);
- clReleaseProgram(program[1]);
- free(output_ptr);
-
return err;
}
diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp
index 9c872be..e202d27 100644
--- a/test_conformance/basic/test_progvar.cpp
+++ b/test_conformance/basic/test_progvar.cpp
@@ -15,12 +15,13 @@
//
#include "harness/compat.h"
-// Bug: Missing in spec: atomic_intptr_t is always supported if device is 32-bits.
+// Bug: Missing in spec: atomic_intptr_t is always supported if device is
+// 32-bits.
// Bug: Missing in spec: CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE
#define FLUSH fflush(stdout)
-#define MAX_STR 16*1024
+#define MAX_STR 16 * 1024
#define ALIGNMENT 128
@@ -66,7 +67,11 @@
static size_t l_max_global_id0 = 0;
static cl_bool l_linker_available = false;
-#define check_error(errCode,msg,...) ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n", ## __VA_ARGS__, __FILE__, __LINE__), 1) : 0)
+#define check_error(errCode, msg, ...) \
+ ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n", \
+ ##__VA_ARGS__, __FILE__, __LINE__), \
+ 1) \
+ : 0)
////////////////////
// Info about types we can use for program scope variables.
@@ -75,110 +80,135 @@
class TypeInfo {
public:
- TypeInfo() :
- name(""),
- m_buf_elem_type(""),
- m_is_vecbase(false),
- m_is_atomic(false),
- m_is_like_size_t(false),
- m_is_bool(false),
- m_elem_type(0), m_num_elem(0),
- m_size(0),
- m_value_size(0)
- {}
- TypeInfo(const char* name_arg) :
- name(name_arg),
- m_buf_elem_type(name_arg),
- m_is_vecbase(false),
- m_is_atomic(false),
- m_is_like_size_t(false),
- m_is_bool(false),
- m_elem_type(0), m_num_elem(0),
- m_size(0),
- m_value_size(0)
- { }
+ TypeInfo()
+ : name(""), m_buf_elem_type(""), m_is_vecbase(false),
+ m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false),
+ m_elem_type(0), m_num_elem(0), m_size(0), m_value_size(0)
+ {}
+ TypeInfo(const char* name_arg)
+ : name(name_arg), m_buf_elem_type(name_arg), m_is_vecbase(false),
+ m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false),
+ m_elem_type(0), m_num_elem(0), m_size(0), m_value_size(0)
+ {}
// Vectors
- TypeInfo( TypeInfo* elem_type, int num_elem ) :
- m_is_vecbase(false),
- m_is_atomic(false),
- m_is_like_size_t(false),
- m_is_bool(false),
- m_elem_type(elem_type),
- m_num_elem(num_elem)
+ TypeInfo(TypeInfo* elem_type, int num_elem)
+ : m_is_vecbase(false), m_is_atomic(false), m_is_like_size_t(false),
+ m_is_bool(false), m_elem_type(elem_type), m_num_elem(num_elem)
{
- char the_name[10]; // long enough for longest vector type name "double16"
- snprintf(the_name,sizeof(the_name),"%s%d",elem_type->get_name_c_str(),m_num_elem);
+ char
+ the_name[10]; // long enough for longest vector type name "double16"
+ snprintf(the_name, sizeof(the_name), "%s%d",
+ elem_type->get_name_c_str(), m_num_elem);
this->name = std::string(the_name);
this->m_buf_elem_type = std::string(the_name);
this->m_value_size = num_elem * elem_type->get_size();
- if ( m_num_elem == 3 ) {
+ if (m_num_elem == 3)
+ {
this->m_size = 4 * elem_type->get_size();
- } else {
+ }
+ else
+ {
this->m_size = num_elem * elem_type->get_size();
}
}
const std::string& get_name(void) const { return name; }
const char* get_name_c_str(void) const { return name.c_str(); }
- TypeInfo& set_vecbase(void) { this->m_is_vecbase = true; return *this; }
- TypeInfo& set_atomic(void) { this->m_is_atomic = true; return *this; }
- TypeInfo& set_like_size_t(void) {
+ TypeInfo& set_vecbase(void)
+ {
+ this->m_is_vecbase = true;
+ return *this;
+ }
+ TypeInfo& set_atomic(void)
+ {
+ this->m_is_atomic = true;
+ return *this;
+ }
+ TypeInfo& set_like_size_t(void)
+ {
this->m_is_like_size_t = true;
- this->set_size( l_64bit_device ? 8 : 4 );
+ this->set_size(l_64bit_device ? 8 : 4);
this->m_buf_elem_type = l_64bit_device ? "ulong" : "uint";
return *this;
}
- TypeInfo& set_bool(void) { this->m_is_bool = true; return *this; }
- TypeInfo& set_size(size_t n) { this->m_value_size = this->m_size = n; return *this; }
- TypeInfo& set_buf_elem_type( const char* name ) { this->m_buf_elem_type = std::string(name); return *this; }
+ TypeInfo& set_bool(void)
+ {
+ this->m_is_bool = true;
+ return *this;
+ }
+ TypeInfo& set_size(size_t n)
+ {
+ this->m_value_size = this->m_size = n;
+ return *this;
+ }
+ TypeInfo& set_buf_elem_type(const char* name)
+ {
+ this->m_buf_elem_type = std::string(name);
+ return *this;
+ }
const TypeInfo* elem_type(void) const { return m_elem_type; }
int num_elem(void) const { return m_num_elem; }
- bool is_vecbase(void) const {return m_is_vecbase;}
- bool is_atomic(void) const {return m_is_atomic;}
- bool is_atomic_64bit(void) const {return m_is_atomic && m_size == 8;}
- bool is_like_size_t(void) const {return m_is_like_size_t;}
- bool is_bool(void) const {return m_is_bool;}
- size_t get_size(void) const {return m_size;}
- size_t get_value_size(void) const {return m_value_size;}
+ bool is_vecbase(void) const { return m_is_vecbase; }
+ bool is_atomic(void) const { return m_is_atomic; }
+ bool is_atomic_64bit(void) const { return m_is_atomic && m_size == 8; }
+ bool is_like_size_t(void) const { return m_is_like_size_t; }
+ bool is_bool(void) const { return m_is_bool; }
+ size_t get_size(void) const { return m_size; }
+ size_t get_value_size(void) const { return m_value_size; }
// When passing values of this type to a kernel, what buffer type
// should be used?
- const char* get_buf_elem_type(void) const { return m_buf_elem_type.c_str(); }
+ const char* get_buf_elem_type(void) const
+ {
+ return m_buf_elem_type.c_str();
+ }
- std::string as_string(const cl_uchar* value_ptr) const {
+ std::string as_string(const cl_uchar* value_ptr) const
+ {
// This method would be shorter if I had a real handle to element
// vector type.
- if ( this->is_bool() ) {
- std::string result( name );
+ if (this->is_bool())
+ {
+ std::string result(name);
result += "<";
result += (*value_ptr ? "true" : "false");
result += ", ";
char buf[10];
- sprintf(buf,"%02x",*value_ptr);
+ sprintf(buf, "%02x", *value_ptr);
result += buf;
result += ">";
return result;
- } else if ( this->num_elem() ) {
- std::string result( name );
+ }
+ else if (this->num_elem())
+ {
+ std::string result(name);
result += "<";
- for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) {
+ for (unsigned ielem = 0; ielem < this->num_elem(); ielem++)
+ {
char buf[MAX_STR];
- if ( ielem ) result += ", ";
- for ( unsigned ibyte = 0; ibyte < this->m_elem_type->get_size() ; ibyte++ ) {
- sprintf(buf + 2*ibyte,"%02x", value_ptr[ ielem * this->m_elem_type->get_size() + ibyte ] );
+ if (ielem) result += ", ";
+ for (unsigned ibyte = 0; ibyte < this->m_elem_type->get_size();
+ ibyte++)
+ {
+ sprintf(buf + 2 * ibyte, "%02x",
+ value_ptr[ielem * this->m_elem_type->get_size()
+ + ibyte]);
}
result += buf;
}
result += ">";
return result;
- } else {
- std::string result( name );
+ }
+ else
+ {
+ std::string result(name);
result += "<";
char buf[MAX_STR];
- for ( unsigned ibyte = 0; ibyte < this->get_size() ; ibyte++ ) {
- sprintf(buf + 2*ibyte,"%02x", value_ptr[ ibyte ] );
+ for (unsigned ibyte = 0; ibyte < this->get_size(); ibyte++)
+ {
+ sprintf(buf + 2 * ibyte, "%02x", value_ptr[ibyte]);
}
result += buf;
result += ">";
@@ -189,51 +219,71 @@
// Initialize the given buffer to a constant value initialized as if it
// were from the INIT_VAR macro below.
// Only needs to support values 0 and 1.
- void init( cl_uchar* buf, cl_uchar val) const {
- if ( this->num_elem() ) {
- for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) {
+ void init(cl_uchar* buf, cl_uchar val) const
+ {
+ if (this->num_elem())
+ {
+ for (unsigned ielem = 0; ielem < this->num_elem(); ielem++)
+ {
// Delegate!
- this->init_elem( buf + ielem * this->get_value_size()/this->num_elem(), val );
+ this->init_elem(
+ buf + ielem * this->get_value_size() / this->num_elem(),
+ val);
}
- } else {
- init_elem( buf, val );
+ }
+ else
+ {
+ init_elem(buf, val);
}
}
private:
- void init_elem( cl_uchar* buf, cl_uchar val ) const {
- size_t elem_size = this->num_elem() ? this->get_value_size()/this->num_elem() : this->get_size();
- memset(buf,0,elem_size);
- if ( val ) {
- if ( strstr( name.c_str(), "float" ) ) {
+ void init_elem(cl_uchar* buf, cl_uchar val) const
+ {
+ size_t elem_size = this->num_elem()
+ ? this->get_value_size() / this->num_elem()
+ : this->get_size();
+ memset(buf, 0, elem_size);
+ if (val)
+ {
+ if (strstr(name.c_str(), "float"))
+ {
*(float*)buf = (float)val;
return;
}
- if ( strstr( name.c_str(), "double" ) ) {
+ if (strstr(name.c_str(), "double"))
+ {
*(double*)buf = (double)val;
return;
}
- if ( this->is_bool() ) { *buf = (bool)val; return; }
+ if (this->is_bool())
+ {
+ *buf = (bool)val;
+ return;
+ }
// Write a single character value to the correct spot,
// depending on host endianness.
- if ( l_host_is_big_endian ) *(buf + elem_size-1) = (cl_uchar)val;
- else *buf = (cl_uchar)val;
+ if (l_host_is_big_endian)
+ *(buf + elem_size - 1) = (cl_uchar)val;
+ else
+ *buf = (cl_uchar)val;
}
}
-public:
- void dump(FILE* fp) const {
- fprintf(fp,"Type %s : <%d,%d,%s> ", name.c_str(),
- (int)m_size,
- (int)m_value_size,
- m_buf_elem_type.c_str() );
- if ( this->m_elem_type ) fprintf(fp, " vec(%s,%d)", this->m_elem_type->get_name_c_str(), this->num_elem() );
- if ( this->m_is_vecbase ) fprintf(fp, " vecbase");
- if ( this->m_is_bool ) fprintf(fp, " bool");
- if ( this->m_is_like_size_t ) fprintf(fp, " like-size_t");
- if ( this->m_is_atomic ) fprintf(fp, " atomic");
- fprintf(fp,"\n");
+public:
+ void dump(FILE* fp) const
+ {
+ fprintf(fp, "Type %s : <%d,%d,%s> ", name.c_str(), (int)m_size,
+ (int)m_value_size, m_buf_elem_type.c_str());
+ if (this->m_elem_type)
+ fprintf(fp, " vec(%s,%d)", this->m_elem_type->get_name_c_str(),
+ this->num_elem());
+ if (this->m_is_vecbase) fprintf(fp, " vecbase");
+ if (this->m_is_bool) fprintf(fp, " bool");
+ if (this->m_is_like_size_t) fprintf(fp, " like-size_t");
+ if (this->m_is_atomic) fprintf(fp, " atomic");
+ fprintf(fp, "\n");
fflush(fp);
}
@@ -246,7 +296,8 @@
bool m_is_like_size_t;
bool m_is_bool;
size_t m_size; // Number of bytes of storage occupied by this type.
- size_t m_value_size; // Number of bytes of value significant for this type. Differs for vec3.
+ size_t m_value_size; // Number of bytes of value significant for this type.
+ // Differs for vec3.
// When passing values of this type to a kernel, what buffer type
// should be used?
@@ -256,46 +307,65 @@
};
-#define NUM_SCALAR_TYPES (8+2) // signed and unsigned integral types, float and double
-#define NUM_VECTOR_SIZES (5) // 2,3,4,8,16
-#define NUM_PLAIN_TYPES \
- 5 /*boolean and size_t family */ \
- + NUM_SCALAR_TYPES \
- + NUM_SCALAR_TYPES*NUM_VECTOR_SIZES \
- + 10 /* atomic types */
+#define NUM_SCALAR_TYPES \
+ (8 + 2) // signed and unsigned integral types, float and double
+#define NUM_VECTOR_SIZES (5) // 2,3,4,8,16
+#define NUM_PLAIN_TYPES \
+ 5 /*boolean and size_t family */ \
+ + NUM_SCALAR_TYPES + NUM_SCALAR_TYPES* NUM_VECTOR_SIZES \
+ + 10 /* atomic types */
// Need room for plain, array, pointer, struct
-#define MAX_TYPES (4*NUM_PLAIN_TYPES)
+#define MAX_TYPES (4 * NUM_PLAIN_TYPES)
static TypeInfo type_info[MAX_TYPES];
static int num_type_info = 0; // Number of valid entries in type_info[]
-
-
// A helper class to form kernel source arguments for clCreateProgramWithSource.
class StringTable {
public:
- StringTable() : m_c_strs(NULL), m_lengths(NULL), m_frozen(false), m_strings() {}
+ StringTable(): m_c_strs(NULL), m_lengths(NULL), m_frozen(false), m_strings()
+ {}
~StringTable() { release_frozen(); }
- void add(std::string s) { release_frozen(); m_strings.push_back(s); }
+ void add(std::string s)
+ {
+ release_frozen();
+ m_strings.push_back(s);
+ }
- const size_t num_str() { freeze(); return m_strings.size(); }
- const char** strs() { freeze(); return m_c_strs; }
- const size_t* lengths() { freeze(); return m_lengths; }
+ const size_t num_str()
+ {
+ freeze();
+ return m_strings.size();
+ }
+ const char** strs()
+ {
+ freeze();
+ return m_c_strs;
+ }
+ const size_t* lengths()
+ {
+ freeze();
+ return m_lengths;
+ }
private:
- void freeze(void) {
- if ( !m_frozen ) {
+ void freeze(void)
+ {
+ if (!m_frozen)
+ {
release_frozen();
- m_c_strs = (const char**) malloc(sizeof(const char*) * m_strings.size());
- m_lengths = (size_t*) malloc(sizeof(size_t) * m_strings.size());
- assert( m_c_strs );
- assert( m_lengths );
+ m_c_strs =
+ (const char**)malloc(sizeof(const char*) * m_strings.size());
+ m_lengths = (size_t*)malloc(sizeof(size_t) * m_strings.size());
+ assert(m_c_strs);
+ assert(m_lengths);
- for ( size_t i = 0; i < m_strings.size() ; i++ ) {
+ for (size_t i = 0; i < m_strings.size(); i++)
+ {
m_c_strs[i] = m_strings[i].c_str();
m_lengths[i] = strlen(m_c_strs[i]);
}
@@ -303,9 +373,18 @@
m_frozen = true;
}
}
- void release_frozen(void) {
- if ( m_c_strs ) { free(m_c_strs); m_c_strs = 0; }
- if ( m_lengths ) { free(m_lengths); m_lengths = 0; }
+ void release_frozen(void)
+ {
+ if (m_c_strs)
+ {
+ free(m_c_strs);
+ m_c_strs = 0;
+ }
+ if (m_lengths)
+ {
+ free(m_lengths);
+ m_lengths = 0;
+ }
m_frozen = false;
}
@@ -325,11 +404,15 @@
static const char* l_get_cles_int64_pragma(void);
static int l_build_type_table(cl_device_id device);
-static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret);
+static int l_get_device_info(cl_device_id device, size_t* max_size_ret,
+ size_t* pref_size_ret);
-static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state );
-static int l_compare( const cl_uchar* expected, const cl_uchar* received, unsigned num_values, const TypeInfo&ti );
-static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti );
+static void l_set_randomly(cl_uchar* buf, size_t buf_size,
+ RandomSeed& rand_state);
+static int l_compare(const cl_uchar* expected, const cl_uchar* received,
+ unsigned num_values, const TypeInfo& ti);
+static int l_copy(cl_uchar* dest, unsigned dest_idx, const cl_uchar* src,
+ unsigned src_idx, const TypeInfo& ti);
static std::string conversion_functions(const TypeInfo& ti);
static std::string global_decls(const TypeInfo& ti, bool with_init);
@@ -337,90 +420,123 @@
static std::string writer_function(const TypeInfo& ti);
static std::string reader_function(const TypeInfo& ti);
-static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue );
-static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state );
+static int l_write_read(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+static int l_write_read_for_type(cl_device_id device, cl_context context,
+ cl_command_queue queue, const TypeInfo& ti,
+ RandomSeed& rand_state);
-static int l_init_write_read( cl_device_id device, cl_context context, cl_command_queue queue );
-static int l_init_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state );
+static int l_init_write_read(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+static int l_init_write_read_for_type(cl_device_id device, cl_context context,
+ cl_command_queue queue,
+ const TypeInfo& ti,
+ RandomSeed& rand_state);
-static int l_capacity( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size );
-static int l_user_type( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size, bool separate_compilation );
-
+static int l_capacity(cl_device_id device, cl_context context,
+ cl_command_queue queue, size_t max_size);
+static int l_user_type(cl_device_id device, cl_context context,
+ cl_command_queue queue, size_t max_size,
+ bool separate_compilation);
////////////////////
// File scope function definitions
-static cl_int print_build_log(cl_program program, cl_uint num_devices, cl_device_id *device_list, cl_uint count, const char **strings, const size_t *lengths, const char* options)
+static cl_int print_build_log(cl_program program, cl_uint num_devices,
+ cl_device_id* device_list, cl_uint count,
+ const char** strings, const size_t* lengths,
+ const char* options)
{
cl_uint i;
cl_int error;
BufferOwningPtr<cl_device_id> devices;
- if(num_devices == 0 || device_list == NULL)
+ if (num_devices == 0 || device_list == NULL)
{
- error = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(num_devices), &num_devices, NULL);
+ error = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES,
+ sizeof(num_devices), &num_devices, NULL);
test_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
- device_list = (cl_device_id*)malloc(sizeof(cl_device_id)*num_devices);
+ device_list = (cl_device_id*)malloc(sizeof(cl_device_id) * num_devices);
devices.reset(device_list);
memset(device_list, 0, sizeof(cl_device_id) * num_devices);
- error = clGetProgramInfo(program, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * num_devices, device_list, NULL);
+ error = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
+ sizeof(cl_device_id) * num_devices,
+ device_list, NULL);
test_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
}
cl_uint z;
bool sourcePrinted = false;
- for(z = 0; z < num_devices; z++)
+ for (z = 0; z < num_devices; z++)
{
char deviceName[4096] = "";
- error = clGetDeviceInfo(device_list[z], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
- check_error(error, "Device \"%d\" failed to return a name. clGetDeviceInfo CL_DEVICE_NAME failed", z);
+ error = clGetDeviceInfo(device_list[z], CL_DEVICE_NAME,
+ sizeof(deviceName), deviceName, NULL);
+ check_error(error,
+ "Device \"%d\" failed to return a name. clGetDeviceInfo "
+ "CL_DEVICE_NAME failed",
+ z);
cl_build_status buildStatus;
- error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
- check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
+ error = clGetProgramBuildInfo(program, device_list[z],
+ CL_PROGRAM_BUILD_STATUS,
+ sizeof(buildStatus), &buildStatus, NULL);
+ check_error(error,
+ "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
- if(buildStatus != CL_BUILD_SUCCESS)
+ if (buildStatus != CL_BUILD_SUCCESS)
{
- if(!sourcePrinted)
+ if (!sourcePrinted)
{
log_error("Build options: %s\n", options);
- if(count && strings)
+ if (count && strings)
{
log_error("Original source is: ------------\n");
- for(i = 0; i < count; i++) log_error("%s", strings[i]);
+ for (i = 0; i < count; i++) log_error("%s", strings[i]);
}
sourcePrinted = true;
}
char statusString[64] = "";
if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
- sprintf(statusString, "CL_BUILD_SUCCESS");
+ sprintf(statusString, "CL_BUILD_SUCCESS");
else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
- sprintf(statusString, "CL_BUILD_NONE");
+ sprintf(statusString, "CL_BUILD_NONE");
else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
- sprintf(statusString, "CL_BUILD_ERROR");
+ sprintf(statusString, "CL_BUILD_ERROR");
else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
- sprintf(statusString, "CL_BUILD_IN_PROGRESS");
+ sprintf(statusString, "CL_BUILD_IN_PROGRESS");
else
- sprintf(statusString, "UNKNOWN (%d)", buildStatus);
+ sprintf(statusString, "UNKNOWN (%d)", buildStatus);
- log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
+ log_error("Build not successful for device \"%s\", status: %s\n",
+ deviceName, statusString);
size_t paramSize = 0;
- error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, 0, NULL, ¶mSize);
- if(check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed")) break;
+ error = clGetProgramBuildInfo(program, device_list[z],
+ CL_PROGRAM_BUILD_LOG, 0, NULL,
+ ¶mSize);
+ if (check_error(
+ error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed"))
+ break;
std::string log;
- log.resize(paramSize/sizeof(char));
+ log.resize(paramSize / sizeof(char));
- error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, paramSize, &log[0], NULL);
- if(check_error(error, "Device %d (%s) failed to return a build log", z, deviceName)) break;
- if(log[0] == 0) log_error("clGetProgramBuildInfo returned an empty log.\n");
+ error = clGetProgramBuildInfo(program, device_list[z],
+ CL_PROGRAM_BUILD_LOG, paramSize,
+ &log[0], NULL);
+ if (check_error(error,
+ "Device %d (%s) failed to return a build log", z,
+ deviceName))
+ break;
+ if (log[0] == 0)
+ log_error("clGetProgramBuildInfo returned an empty log.\n");
else
{
log_error("Build log:\n", deviceName);
@@ -433,25 +549,29 @@
static void l_load_abilities(cl_device_id device)
{
- l_has_half = is_extension_available(device,"cl_khr_fp16");
- l_has_double = is_extension_available(device,"cl_khr_fp64");
- l_has_cles_int64 = is_extension_available(device,"cles_khr_int64");
+ l_has_half = is_extension_available(device, "cl_khr_fp16");
+ l_has_double = is_extension_available(device, "cl_khr_fp64");
+ l_has_cles_int64 = is_extension_available(device, "cles_khr_int64");
- l_has_int64_atomics
- = is_extension_available(device,"cl_khr_int64_base_atomics")
- && is_extension_available(device,"cl_khr_int64_extended_atomics");
+ l_has_int64_atomics =
+ is_extension_available(device, "cl_khr_int64_base_atomics")
+ && is_extension_available(device, "cl_khr_int64_extended_atomics");
{
int status = CL_SUCCESS;
cl_uint addr_bits = 32;
- status = clGetDeviceInfo(device,CL_DEVICE_ADDRESS_BITS,sizeof(addr_bits),&addr_bits,0);
- l_64bit_device = ( status == CL_SUCCESS && addr_bits == 64 );
+ status = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS,
+ sizeof(addr_bits), &addr_bits, 0);
+ l_64bit_device = (status == CL_SUCCESS && addr_bits == 64);
}
// 32-bit devices always have intptr atomics.
l_has_intptr_atomics = !l_64bit_device || l_has_int64_atomics;
- union { char c[4]; int i; } probe;
+ union {
+ char c[4];
+ int i;
+ } probe;
probe.i = 1;
l_host_is_big_endian = !probe.c[0];
@@ -459,33 +579,40 @@
{
int status = CL_SUCCESS;
cl_uint max_dim = 0;
- status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(max_dim),&max_dim,0);
- assert( status == CL_SUCCESS );
- assert( max_dim > 0 );
+ status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
+ sizeof(max_dim), &max_dim, 0);
+ assert(status == CL_SUCCESS);
+ assert(max_dim > 0);
size_t max_id[3];
max_id[0] = 0;
- status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_SIZES,max_dim*sizeof(size_t),&max_id[0],0);
- assert( status == CL_SUCCESS );
+ status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+ max_dim * sizeof(size_t), &max_id[0], 0);
+ assert(status == CL_SUCCESS);
l_max_global_id0 = max_id[0];
}
{ // Is separate compilation supported?
int status = CL_SUCCESS;
l_linker_available = false;
- status = clGetDeviceInfo(device,CL_DEVICE_LINKER_AVAILABLE,sizeof(l_linker_available),&l_linker_available,0);
- assert( status == CL_SUCCESS );
+ status =
+ clGetDeviceInfo(device, CL_DEVICE_LINKER_AVAILABLE,
+ sizeof(l_linker_available), &l_linker_available, 0);
+ assert(status == CL_SUCCESS);
}
}
static const char* l_get_fp64_pragma(void)
{
- return l_has_double ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" : "";
+ return l_has_double ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+ : "";
}
static const char* l_get_cles_int64_pragma(void)
{
- return l_has_cles_int64 ? "#pragma OPENCL EXTENSION cles_khr_int64 : enable\n" : "";
+ return l_has_cles_int64
+ ? "#pragma OPENCL EXTENSION cles_khr_int64 : enable\n"
+ : "";
}
static const char* l_get_int64_atomic_pragma(void)
@@ -500,89 +627,83 @@
size_t iscalar = 0;
size_t ivecsize = 0;
int vecsizes[] = { 2, 3, 4, 8, 16 };
- const char* vecbase[] = {
- "uchar", "char",
- "ushort", "short",
- "uint", "int",
- "ulong", "long",
- "float",
- "double"
- };
- int vecbase_size[] = {
- 1, 1,
- 2, 2,
- 4, 4,
- 8, 8,
- 4,
- 8
- };
- const char* like_size_t[] = {
- "intptr_t",
- "uintptr_t",
- "size_t",
- "ptrdiff_t"
- };
+ const char* vecbase[] = { "uchar", "char", "ushort", "short", "uint",
+ "int", "ulong", "long", "float", "double" };
+ int vecbase_size[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
+ const char* like_size_t[] = { "intptr_t", "uintptr_t", "size_t",
+ "ptrdiff_t" };
const char* atomics[] = {
- "atomic_int", "atomic_uint",
- "atomic_long", "atomic_ulong",
- "atomic_float",
- "atomic_double",
+ "atomic_int", "atomic_uint", "atomic_long",
+ "atomic_ulong", "atomic_float", "atomic_double",
};
- int atomics_size[] = {
- 4, 4,
- 8, 8,
- 4,
- 8
- };
- const char* intptr_atomics[] = {
- "atomic_intptr_t",
- "atomic_uintptr_t",
- "atomic_size_t",
- "atomic_ptrdiff_t"
- };
+ int atomics_size[] = { 4, 4, 8, 8, 4, 8 };
+ const char* intptr_atomics[] = { "atomic_intptr_t", "atomic_uintptr_t",
+ "atomic_size_t", "atomic_ptrdiff_t" };
l_load_abilities(device);
num_type_info = 0;
// Boolean.
- type_info[ num_type_info++ ] = TypeInfo( "bool" ).set_bool().set_size(1).set_buf_elem_type("uchar");
+ type_info[num_type_info++] =
+ TypeInfo("bool").set_bool().set_size(1).set_buf_elem_type("uchar");
// Vector types, and the related scalar element types.
- for ( iscalar=0; iscalar < sizeof(vecbase)/sizeof(vecbase[0]) ; ++iscalar ) {
- if ( !gHasLong && strstr(vecbase[iscalar],"long") ) continue;
- if ( !l_has_double && strstr(vecbase[iscalar],"double") ) continue;
+ for (iscalar = 0; iscalar < sizeof(vecbase) / sizeof(vecbase[0]); ++iscalar)
+ {
+ if (!gHasLong && strstr(vecbase[iscalar], "long")) continue;
+ if (!l_has_double && strstr(vecbase[iscalar], "double")) continue;
// Scalar
TypeInfo* elem_type = type_info + num_type_info++;
- *elem_type = TypeInfo( vecbase[iscalar] ).set_vecbase().set_size( vecbase_size[iscalar] );
+ *elem_type = TypeInfo(vecbase[iscalar])
+ .set_vecbase()
+ .set_size(vecbase_size[iscalar]);
// Vector
- for ( ivecsize=0; ivecsize < sizeof(vecsizes)/sizeof(vecsizes[0]) ; ivecsize++ ) {
- type_info[ num_type_info++ ] = TypeInfo( elem_type, vecsizes[ivecsize] );
+ for (ivecsize = 0; ivecsize < sizeof(vecsizes) / sizeof(vecsizes[0]);
+ ivecsize++)
+ {
+ type_info[num_type_info++] =
+ TypeInfo(elem_type, vecsizes[ivecsize]);
}
}
// Size_t-like types
- for ( iscalar=0; iscalar < sizeof(like_size_t)/sizeof(like_size_t[0]) ; ++iscalar ) {
- type_info[ num_type_info++ ] = TypeInfo( like_size_t[iscalar] ).set_like_size_t();
+ for (iscalar = 0; iscalar < sizeof(like_size_t) / sizeof(like_size_t[0]);
+ ++iscalar)
+ {
+ type_info[num_type_info++] =
+ TypeInfo(like_size_t[iscalar]).set_like_size_t();
}
// Atomic types.
- for ( iscalar=0; iscalar < sizeof(atomics)/sizeof(atomics[0]) ; ++iscalar ) {
- if ( !l_has_int64_atomics && strstr(atomics[iscalar],"long") ) continue;
- if ( !(l_has_int64_atomics && l_has_double) && strstr(atomics[iscalar],"double") ) continue;
+ for (iscalar = 0; iscalar < sizeof(atomics) / sizeof(atomics[0]); ++iscalar)
+ {
+ if (!l_has_int64_atomics && strstr(atomics[iscalar], "long")) continue;
+ if (!(l_has_int64_atomics && l_has_double)
+ && strstr(atomics[iscalar], "double"))
+ continue;
// The +7 is used to skip over the "atomic_" prefix.
const char* buf_type = atomics[iscalar] + 7;
- type_info[ num_type_info++ ] = TypeInfo( atomics[iscalar] ).set_atomic().set_size( atomics_size[iscalar] ).set_buf_elem_type( buf_type );
+ type_info[num_type_info++] = TypeInfo(atomics[iscalar])
+ .set_atomic()
+ .set_size(atomics_size[iscalar])
+ .set_buf_elem_type(buf_type);
}
- if ( l_has_intptr_atomics ) {
- for ( iscalar=0; iscalar < sizeof(intptr_atomics)/sizeof(intptr_atomics[0]) ; ++iscalar ) {
- type_info[ num_type_info++ ] = TypeInfo( intptr_atomics[iscalar] ).set_atomic().set_like_size_t();
+ if (l_has_intptr_atomics)
+ {
+ for (iscalar = 0;
+ iscalar < sizeof(intptr_atomics) / sizeof(intptr_atomics[0]);
+ ++iscalar)
+ {
+ type_info[num_type_info++] = TypeInfo(intptr_atomics[iscalar])
+ .set_atomic()
+ .set_like_size_t();
}
}
- assert( num_type_info <= MAX_TYPES ); // or increase MAX_TYPES
+ assert(num_type_info <= MAX_TYPES); // or increase MAX_TYPES
#if 0
for ( size_t i = 0 ; i < num_type_info ; i++ ) {
@@ -594,7 +715,7 @@
return status;
}
-static const TypeInfo& l_find_type( const char* name )
+static const TypeInfo& l_find_type(const char* name)
{
auto itr =
std::find_if(type_info, type_info + num_type_info,
@@ -604,36 +725,54 @@
}
+// Populate return parameters for max program variable size, preferred program
+// variable size.
-// Populate return parameters for max program variable size, preferred program variable size.
-
-static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret)
+static int l_get_device_info(cl_device_id device, size_t* max_size_ret,
+ size_t* pref_size_ret)
{
int err = CL_SUCCESS;
size_t return_size = 0;
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, sizeof(*max_size_ret), max_size_ret, &return_size);
- if ( err != CL_SUCCESS ) {
- log_error("Error: Failed to get device info for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n");
+ err = clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE,
+ sizeof(*max_size_ret), max_size_ret, &return_size);
+ if (err != CL_SUCCESS)
+ {
+ log_error("Error: Failed to get device info for "
+ "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n");
return err;
}
- if ( return_size != sizeof(size_t) ) {
- log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size );
+ if (return_size != sizeof(size_t))
+ {
+ log_error("Error: Invalid size %d returned for "
+ "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n",
+ (int)return_size);
return 1;
}
- if ( return_size != sizeof(size_t) ) {
- log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size );
+ if (return_size != sizeof(size_t))
+ {
+ log_error("Error: Invalid size %d returned for "
+ "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n",
+ (int)return_size);
return 1;
}
return_size = 0;
- err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, sizeof(*pref_size_ret), pref_size_ret, &return_size);
- if ( err != CL_SUCCESS ) {
- log_error("Error: Failed to get device info for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: %d\n",err);
+ err =
+ clGetDeviceInfo(device, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE,
+ sizeof(*pref_size_ret), pref_size_ret, &return_size);
+ if (err != CL_SUCCESS)
+ {
+ log_error("Error: Failed to get device info for "
+ "CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: %d\n",
+ err);
return err;
}
- if ( return_size != sizeof(size_t) ) {
- log_error("Error: Invalid size %d returned for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE\n", (int)return_size );
+ if (return_size != sizeof(size_t))
+ {
+ log_error("Error: Invalid size %d returned for "
+ "CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE\n",
+ (int)return_size);
return 1;
}
@@ -641,11 +780,13 @@
}
-static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state )
+static void l_set_randomly(cl_uchar* buf, size_t buf_size,
+ RandomSeed& rand_state)
{
- assert( 0 == (buf_size % sizeof(cl_uint) ) );
- for ( size_t i = 0; i < buf_size ; i += sizeof(cl_uint) ) {
- *( (cl_uint*)(buf + i) ) = genrand_int32( rand_state );
+ assert(0 == (buf_size % sizeof(cl_uint)));
+ for (size_t i = 0; i < buf_size; i += sizeof(cl_uint))
+ {
+ *((cl_uint*)(buf + i)) = genrand_int32(rand_state);
}
#if 0
for ( size_t i = 0; i < buf_size ; i++ ) {
@@ -657,20 +798,23 @@
// Return num_value values of the given type.
// Returns CL_SUCCESS if they compared as equal.
-static int l_compare( const char* test_name, const cl_uchar* expected, const cl_uchar* received, size_t num_values, const TypeInfo&ti )
+static int l_compare(const char* test_name, const cl_uchar* expected,
+ const cl_uchar* received, size_t num_values,
+ const TypeInfo& ti)
{
// Compare only the valid returned bytes.
- for ( unsigned value_idx = 0; value_idx < num_values; value_idx++ ) {
+ for (unsigned value_idx = 0; value_idx < num_values; value_idx++)
+ {
const cl_uchar* expv = expected + value_idx * ti.get_size();
const cl_uchar* gotv = received + value_idx * ti.get_size();
- if ( memcmp( expv, gotv, ti.get_value_size() ) ) {
- std::string exp_str = ti.as_string( expv );
- std::string got_str = ti.as_string( gotv );
- log_error("Error: %s test for type %s, at index %d: Expected %s got %s\n",
- test_name,
- ti.get_name_c_str(), value_idx,
- exp_str.c_str(),
- got_str.c_str() );
+ if (memcmp(expv, gotv, ti.get_value_size()))
+ {
+ std::string exp_str = ti.as_string(expv);
+ std::string got_str = ti.as_string(gotv);
+ log_error(
+ "Error: %s test for type %s, at index %d: Expected %s got %s\n",
+ test_name, ti.get_name_c_str(), value_idx, exp_str.c_str(),
+ got_str.c_str());
return 1;
}
}
@@ -678,11 +822,12 @@
}
// Copy a target value from src[idx] to dest[idx]
-static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti )
+static int l_copy(cl_uchar* dest, unsigned dest_idx, const cl_uchar* src,
+ unsigned src_idx, const TypeInfo& ti)
{
- cl_uchar* raw_dest = dest + dest_idx * ti.get_size();
- const cl_uchar* raw_src = src + src_idx * ti.get_size();
- memcpy( raw_dest, raw_src, ti.get_value_size() );
+ cl_uchar* raw_dest = dest + dest_idx * ti.get_size();
+ const cl_uchar* raw_src = src + src_idx * ti.get_size();
+ memcpy(raw_dest, raw_src, ti.get_value_size());
return 0;
}
@@ -694,59 +839,70 @@
static char buf[MAX_STR];
int num_printed = 0;
// The atomic types just use the base type.
- if ( ti.is_atomic() || 0 == strcmp( ti.get_buf_elem_type(), ti.get_name_c_str() ) ) {
+ if (ti.is_atomic()
+ || 0 == strcmp(ti.get_buf_elem_type(), ti.get_name_c_str()))
+ {
// The type is represented in a buffer by itself.
- num_printed = snprintf(buf,MAX_STR,
- "%s from_buf(%s a) { return a; }\n"
- "%s to_buf(%s a) { return a; }\n",
- ti.get_buf_elem_type(), ti.get_buf_elem_type(),
- ti.get_buf_elem_type(), ti.get_buf_elem_type() );
- } else {
+ num_printed = snprintf(buf, MAX_STR,
+ "%s from_buf(%s a) { return a; }\n"
+ "%s to_buf(%s a) { return a; }\n",
+ ti.get_buf_elem_type(), ti.get_buf_elem_type(),
+ ti.get_buf_elem_type(), ti.get_buf_elem_type());
+ }
+ else
+ {
// Just use C-style cast.
- num_printed = snprintf(buf,MAX_STR,
- "%s from_buf(%s a) { return (%s)a; }\n"
- "%s to_buf(%s a) { return (%s)a; }\n",
- ti.get_name_c_str(), ti.get_buf_elem_type(), ti.get_name_c_str(),
- ti.get_buf_elem_type(), ti.get_name_c_str(), ti.get_buf_elem_type() );
+ num_printed = snprintf(buf, MAX_STR,
+ "%s from_buf(%s a) { return (%s)a; }\n"
+ "%s to_buf(%s a) { return (%s)a; }\n",
+ ti.get_name_c_str(), ti.get_buf_elem_type(),
+ ti.get_name_c_str(), ti.get_buf_elem_type(),
+ ti.get_name_c_str(), ti.get_buf_elem_type());
}
// Add initializations.
- if ( ti.is_atomic() ) {
- num_printed += snprintf( buf + num_printed, MAX_STR-num_printed,
- "#define INIT_VAR(a) ATOMIC_VAR_INIT(a)\n" );
- } else {
- // This cast works even if the target type is a vector type.
- num_printed += snprintf( buf + num_printed, MAX_STR-num_printed,
- "#define INIT_VAR(a) ((%s)(a))\n", ti.get_name_c_str());
+ if (ti.is_atomic())
+ {
+ num_printed += snprintf(buf + num_printed, MAX_STR - num_printed,
+ "#define INIT_VAR(a) ATOMIC_VAR_INIT(a)\n");
}
- assert( num_printed < MAX_STR ); // or increase MAX_STR
+ else
+ {
+ // This cast works even if the target type is a vector type.
+ num_printed +=
+ snprintf(buf + num_printed, MAX_STR - num_printed,
+ "#define INIT_VAR(a) ((%s)(a))\n", ti.get_name_c_str());
+ }
+ assert(num_printed < MAX_STR); // or increase MAX_STR
result = buf;
return result;
}
-static std::string global_decls(const TypeInfo& ti, bool with_init )
+static std::string global_decls(const TypeInfo& ti, bool with_init)
{
const char* tn = ti.get_name_c_str();
const char* vol = (ti.is_atomic() ? " volatile " : " ");
static char decls[MAX_STR];
int num_printed = 0;
- if ( with_init ) {
- const char *decls_template_with_init =
+ if (with_init)
+ {
+ const char* decls_template_with_init =
"%s %s var = INIT_VAR(0);\n"
"global %s %s g_var = INIT_VAR(1);\n"
"%s %s a_var[2] = { INIT_VAR(1), INIT_VAR(1) };\n"
"volatile global %s %s* p_var = &a_var[1];\n\n";
- num_printed = snprintf(decls,sizeof(decls),decls_template_with_init,
- vol,tn,vol,tn,vol,tn,vol,tn);
- } else {
- const char *decls_template_no_init =
- "%s %s var;\n"
- "global %s %s g_var;\n"
- "%s %s a_var[2];\n"
- "global %s %s* p_var;\n\n";
- num_printed = snprintf(decls,sizeof(decls),decls_template_no_init,
- vol,tn,vol,tn,vol,tn,vol,tn);
+ num_printed = snprintf(decls, sizeof(decls), decls_template_with_init,
+ vol, tn, vol, tn, vol, tn, vol, tn);
}
- assert( num_printed < sizeof(decls) );
+ else
+ {
+ const char* decls_template_no_init = "%s %s var;\n"
+ "global %s %s g_var;\n"
+ "%s %s a_var[2];\n"
+ "global %s %s* p_var;\n\n";
+ num_printed = snprintf(decls, sizeof(decls), decls_template_no_init,
+ vol, tn, vol, tn, vol, tn, vol, tn);
+ }
+ assert(num_printed < sizeof(decls));
return std::string(decls);
}
@@ -761,18 +917,26 @@
// all() should only be used on vector inputs. For scalar comparison, the
// result of the equality operator can be used as a bool value.
- const bool is_scalar = ti.num_elem() == 0; // 0 is used to represent scalar types, not 1.
+ const bool is_scalar =
+ ti.num_elem() == 0; // 0 is used to represent scalar types, not 1.
const std::string is_equality_true = is_scalar ? "" : "all";
std::string code = "kernel void global_check(global int* out) {\n";
code += " const " + type_name + " zero = ((" + type_name + ")0);\n";
code += " bool status = true;\n";
- if (ti.is_atomic()) {
- code += " status &= " + is_equality_true + "(atomic_load(&var) == zero);\n";
- code += " status &= " + is_equality_true + "(atomic_load(&g_var) == zero);\n";
- code += " status &= " + is_equality_true + "(atomic_load(&a_var[0]) == zero);\n";
- code += " status &= " + is_equality_true + "(atomic_load(&a_var[1]) == zero);\n";
- } else {
+ if (ti.is_atomic())
+ {
+ code += " status &= " + is_equality_true
+ + "(atomic_load(&var) == zero);\n";
+ code += " status &= " + is_equality_true
+ + "(atomic_load(&g_var) == zero);\n";
+ code += " status &= " + is_equality_true
+ + "(atomic_load(&a_var[0]) == zero);\n";
+ code += " status &= " + is_equality_true
+ + "(atomic_load(&a_var[1]) == zero);\n";
+ }
+ else
+ {
code += " status &= " + is_equality_true + "(var == zero);\n";
code += " status &= " + is_equality_true + "(g_var == zero);\n";
code += " status &= " + is_equality_true + "(a_var[0] == zero);\n";
@@ -792,7 +956,8 @@
{
static char writer_src[MAX_STR];
int num_printed = 0;
- if ( !ti.is_atomic() ) {
+ if (!ti.is_atomic())
+ {
const char* writer_template_normal =
"kernel void writer( global %s* src, uint idx ) {\n"
" var = from_buf(src[0]);\n"
@@ -801,8 +966,11 @@
" a_var[1] = from_buf(src[3]);\n"
" p_var = a_var + idx;\n"
"}\n\n";
- num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_normal,ti.get_buf_elem_type());
- } else {
+ num_printed = snprintf(writer_src, sizeof(writer_src),
+ writer_template_normal, ti.get_buf_elem_type());
+ }
+ else
+ {
const char* writer_template_atomic =
"kernel void writer( global %s* src, uint idx ) {\n"
" atomic_store( &var, from_buf(src[0]) );\n"
@@ -811,9 +979,10 @@
" atomic_store( &a_var[1], from_buf(src[3]) );\n"
" p_var = a_var + idx;\n"
"}\n\n";
- num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_atomic,ti.get_buf_elem_type());
+ num_printed = snprintf(writer_src, sizeof(writer_src),
+ writer_template_atomic, ti.get_buf_elem_type());
}
- assert( num_printed < sizeof(writer_src) );
+ assert(num_printed < sizeof(writer_src));
std::string result = writer_src;
return result;
}
@@ -826,7 +995,8 @@
{
static char reader_src[MAX_STR];
int num_printed = 0;
- if ( !ti.is_atomic() ) {
+ if (!ti.is_atomic())
+ {
const char* reader_template_normal =
"kernel void reader( global %s* dest, %s ptr_write_val ) {\n"
" *p_var = from_buf(ptr_write_val);\n"
@@ -835,8 +1005,12 @@
" dest[2] = to_buf(a_var[0]);\n"
" dest[3] = to_buf(a_var[1]);\n"
"}\n\n";
- num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_normal,ti.get_buf_elem_type(),ti.get_buf_elem_type());
- } else {
+ num_printed =
+ snprintf(reader_src, sizeof(reader_src), reader_template_normal,
+ ti.get_buf_elem_type(), ti.get_buf_elem_type());
+ }
+ else
+ {
const char* reader_template_atomic =
"kernel void reader( global %s* dest, %s ptr_write_val ) {\n"
" atomic_store( p_var, from_buf(ptr_write_val) );\n"
@@ -845,40 +1019,53 @@
" dest[2] = to_buf( atomic_load( &a_var[0] ) );\n"
" dest[3] = to_buf( atomic_load( &a_var[1] ) );\n"
"}\n\n";
- num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_atomic,ti.get_buf_elem_type(),ti.get_buf_elem_type());
+ num_printed =
+ snprintf(reader_src, sizeof(reader_src), reader_template_atomic,
+ ti.get_buf_elem_type(), ti.get_buf_elem_type());
}
- assert( num_printed < sizeof(reader_src) );
+ assert(num_printed < sizeof(reader_src));
std::string result = reader_src;
return result;
}
// Check that all globals where appropriately default-initialized.
-static int check_global_initialization(cl_context context, cl_program program, cl_command_queue queue)
+static int check_global_initialization(cl_context context, cl_program program,
+ cl_command_queue queue)
{
int status = CL_SUCCESS;
// Create a buffer on device to store a unique integer.
cl_int is_init_valid = 0;
- clMemWrapper buffer(clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(is_init_valid), &is_init_valid, &status));
+ clMemWrapper buffer(
+ clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+ sizeof(is_init_valid), &is_init_valid, &status));
test_error_ret(status, "Failed to allocate buffer", status);
// Create, setup and invoke kernel.
- clKernelWrapper global_check(clCreateKernel(program, "global_check", &status));
+ clKernelWrapper global_check(
+ clCreateKernel(program, "global_check", &status));
test_error_ret(status, "Failed to create global_check kernel", status);
status = clSetKernelArg(global_check, 0, sizeof(cl_mem), &buffer);
- test_error_ret(status, "Failed to set up argument for the global_check kernel", status);
+ test_error_ret(status,
+ "Failed to set up argument for the global_check kernel",
+ status);
const cl_uint work_dim = 1;
const size_t global_work_offset[] = { 0 };
const size_t global_work_size[] = { 1 };
- status = clEnqueueNDRangeKernel(queue, global_check, work_dim, global_work_offset, global_work_size, nullptr, 0, nullptr, nullptr);
+ status = clEnqueueNDRangeKernel(queue, global_check, work_dim,
+ global_work_offset, global_work_size,
+ nullptr, 0, nullptr, nullptr);
test_error_ret(status, "Failed to run global_check kernel", status);
status = clFinish(queue);
test_error_ret(status, "clFinish() failed", status);
// Read back the memory buffer from the device.
- status = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(is_init_valid), &is_init_valid, 0, nullptr, nullptr);
+ status =
+ clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(is_init_valid),
+ &is_init_valid, 0, nullptr, nullptr);
test_error_ret(status, "Failed to read buffer from device", status);
- if (is_init_valid == 0) {
+ if (is_init_valid == 0)
+ {
log_error("Unexpected default values were detected");
return 1;
}
@@ -887,58 +1074,75 @@
}
// Check write-then-read.
-static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue )
+static int l_write_read(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
int status = CL_SUCCESS;
int itype;
- RandomSeed rand_state( gRandomSeed );
+ RandomSeed rand_state(gRandomSeed);
- for ( itype = 0; itype < num_type_info ; itype++ ) {
- status = status | l_write_read_for_type(device,context,queue,type_info[itype], rand_state );
+ for (itype = 0; itype < num_type_info; itype++)
+ {
+ status = status
+ | l_write_read_for_type(device, context, queue, type_info[itype],
+ rand_state);
FLUSH;
}
return status;
}
-static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state )
+static int l_write_read_for_type(cl_device_id device, cl_context context,
+ cl_command_queue queue, const TypeInfo& ti,
+ RandomSeed& rand_state)
{
int err = CL_SUCCESS;
- std::string type_name( ti.get_name() );
+ std::string type_name(ti.get_name());
const char* tn = type_name.c_str();
- log_info(" %s ",tn);
+ log_info(" %s ", tn);
StringTable ksrc;
- ksrc.add( l_get_fp64_pragma() );
- ksrc.add( l_get_cles_int64_pragma() );
- if (ti.is_atomic_64bit())
- ksrc.add( l_get_int64_atomic_pragma() );
- ksrc.add( conversion_functions(ti) );
- ksrc.add( global_decls(ti,false) );
- ksrc.add( global_check_function(ti) );
- ksrc.add( writer_function(ti) );
- ksrc.add( reader_function(ti) );
+ ksrc.add(l_get_fp64_pragma());
+ ksrc.add(l_get_cles_int64_pragma());
+ if (ti.is_atomic_64bit()) ksrc.add(l_get_int64_atomic_pragma());
+ ksrc.add(conversion_functions(ti));
+ ksrc.add(global_decls(ti, false));
+ ksrc.add(global_check_function(ti));
+ ksrc.add(writer_function(ti));
+ ksrc.add(reader_function(ti));
int status = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper writer;
- status = create_single_kernel_helper_with_build_options(context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", OPTIONS);
- test_error_ret(status,"Failed to create program for read-after-write test",status);
+ status = create_single_kernel_helper_with_build_options(
+ context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer",
+ OPTIONS);
+ test_error_ret(status, "Failed to create program for read-after-write test",
+ status);
- clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
- test_error_ret(status,"Failed to create reader kernel for read-after-write test",status);
+ clKernelWrapper reader(clCreateKernel(program, "reader", &status));
+ test_error_ret(status,
+ "Failed to create reader kernel for read-after-write test",
+ status);
// Check size query.
size_t used_bytes = 0;
- status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
- test_error_ret(status,"Failed to query global variable total size",status);
- size_t expected_used_bytes =
- (NUM_TESTED_VALUES-1)*ti.get_size() // Two regular variables and an array of 2 elements.
- + ( l_64bit_device ? 8 : 4 ); // The pointer
- if ( used_bytes < expected_used_bytes ) {
- log_error("Error program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_used_bytes, (unsigned long long)used_bytes );
+ status = clGetProgramBuildInfo(program, device,
+ CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+ sizeof(used_bytes), &used_bytes, 0);
+ test_error_ret(status, "Failed to query global variable total size",
+ status);
+ size_t expected_used_bytes = (NUM_TESTED_VALUES - 1)
+ * ti.get_size() // Two regular variables and an array of 2 elements.
+ + (l_64bit_device ? 8 : 4); // The pointer
+ if (used_bytes < expected_used_bytes)
+ {
+ log_error("Error program query for global variable total size query "
+ "failed: Expected at least %llu but got %llu\n",
+ (unsigned long long)expected_used_bytes,
+ (unsigned long long)used_bytes);
err |= 1;
}
@@ -951,90 +1155,131 @@
cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
- clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) );
- test_error_ret(status,"Failed to allocate write buffer",status);
- clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) );
- test_error_ret(status,"Failed to allocate read buffer",status);
+ clMemWrapper write_mem(clCreateBuffer(
+ context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status));
+ test_error_ret(status, "Failed to allocate write buffer", status);
+ clMemWrapper read_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+ read_data_size, read_data, &status));
+ test_error_ret(status, "Failed to allocate read buffer", status);
- status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status);
- status = clSetKernelArg(reader,0,sizeof(cl_mem),&read_mem); test_error_ret(status,"set arg",status);
+ status = clSetKernelArg(writer, 0, sizeof(cl_mem), &write_mem);
+ test_error_ret(status, "set arg", status);
+ status = clSetKernelArg(reader, 0, sizeof(cl_mem), &read_mem);
+ test_error_ret(status, "set arg", status);
// Boolean random data needs to be massaged a bit more.
- const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES ) : NUM_ROUNDS;
+ const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES) : NUM_ROUNDS;
unsigned bool_iter = 0;
- for ( int iround = 0; iround < num_rounds ; iround++ ) {
- for ( cl_uint iptr_idx = 0; iptr_idx < 2 ; iptr_idx++ ) { // Index into array, to write via pointer
+ for (int iround = 0; iround < num_rounds; iround++)
+ {
+ for (cl_uint iptr_idx = 0; iptr_idx < 2; iptr_idx++)
+ { // Index into array, to write via pointer
// Generate new random data to push through.
- // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that.
+ // Generate 5 * 128 bytes all the time, even though the test for
+ // many types use less than all that.
- cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0);
+ cl_uchar* write_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0,
+ 0, 0, 0);
- if ( ti.is_bool() ) {
+ if (ti.is_bool())
+ {
// For boolean, random data cast to bool isn't very random.
// So use the bottom bit of bool_value_iter to get true
// diversity.
- for ( unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES ; value_idx++ ) {
- write_data[value_idx] = (1<<value_idx) & bool_iter;
- //printf(" %s", (write_data[value_idx] ? "true" : "false" ));
+ for (unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES;
+ value_idx++)
+ {
+ write_data[value_idx] = (1 << value_idx) & bool_iter;
+ // printf(" %s", (write_data[value_idx] ? "true" : "false"
+ // ));
}
bool_iter++;
- } else {
- l_set_randomly( write_data, write_data_size, rand_state );
}
- status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status);
+ else
+ {
+ l_set_randomly(write_data, write_data_size, rand_state);
+ }
+ status = clSetKernelArg(writer, 1, sizeof(cl_uint), &iptr_idx);
+ test_error_ret(status, "set arg", status);
// The value to write via the pointer should be taken from the
// 5th typed slot of the write_data.
- status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status);
+ status = clSetKernelArg(
+ reader, 1, ti.get_size(),
+ write_data + (NUM_TESTED_VALUES - 1) * ti.get_size());
+ test_error_ret(status, "set arg", status);
// Determine the expected values.
cl_uchar expected[read_data_size];
- memset( expected, -1, sizeof(expected) );
- l_copy( expected, 0, write_data, 0, ti );
- l_copy( expected, 1, write_data, 1, ti );
- l_copy( expected, 2, write_data, 2, ti );
- l_copy( expected, 3, write_data, 3, ti );
- // But we need to take into account the value from the pointer write.
- // The 2 represents where the "a" array values begin in our read-back.
- l_copy( expected, 2 + iptr_idx, write_data, 4, ti );
+ memset(expected, -1, sizeof(expected));
+ l_copy(expected, 0, write_data, 0, ti);
+ l_copy(expected, 1, write_data, 1, ti);
+ l_copy(expected, 2, write_data, 2, ti);
+ l_copy(expected, 3, write_data, 3, ti);
+ // But we need to take into account the value from the pointer
+ // write. The 2 represents where the "a" array values begin in our
+ // read-back.
+ l_copy(expected, 2 + iptr_idx, write_data, 4, ti);
clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
- if ( ti.is_bool() ) {
+ if (ti.is_bool())
+ {
// Collapse down to one bit.
- for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i];
+ for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
+ expected[i] = (bool)expected[i];
}
- cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
+ cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
+ 0, 0);
memset(read_data, -1, read_data_size);
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
// Now run the kernel
const size_t one = 1;
- status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
- status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
- status = clFinish(queue); test_error_ret(status,"finish",status);
+ status =
+ clEnqueueNDRangeKernel(queue, writer, 1, 0, &one, 0, 0, 0, 0);
+ test_error_ret(status, "enqueue writer", status);
+ status =
+ clEnqueueNDRangeKernel(queue, reader, 1, 0, &one, 0, 0, 0, 0);
+ test_error_ret(status, "enqueue reader", status);
+ status = clFinish(queue);
+ test_error_ret(status, "finish", status);
- read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
+ read_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
+ 0, 0);
- if ( ti.is_bool() ) {
+ if (ti.is_bool())
+ {
// Collapse down to one bit.
- for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) read_data[i] = (bool)read_data[i];
+ for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
+ read_data[i] = (bool)read_data[i];
}
// Compare only the valid returned bytes.
- int compare_result = l_compare( "read-after-write", expected, read_data, NUM_TESTED_VALUES-1, ti );
- // log_info("Compared %d values each of size %llu. Result %d\n", NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(), compare_result );
+ int compare_result =
+ l_compare("read-after-write", expected, read_data,
+ NUM_TESTED_VALUES - 1, ti);
+ // log_info("Compared %d values each of size %llu. Result %d\n",
+ // NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(),
+ // compare_result );
err |= compare_result;
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
- if ( err ) break;
+ if (err) break;
}
}
- if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+ if (CL_SUCCESS == err)
+ {
+ log_info("OK\n");
+ FLUSH;
+ }
align_free(write_data);
align_free(read_data);
return err;
@@ -1042,74 +1287,97 @@
// Check initialization, then, read, then write, then read.
-static int l_init_write_read( cl_device_id device, cl_context context, cl_command_queue queue )
+static int l_init_write_read(cl_device_id device, cl_context context,
+ cl_command_queue queue)
{
int status = CL_SUCCESS;
int itype;
- RandomSeed rand_state( gRandomSeed );
+ RandomSeed rand_state(gRandomSeed);
- for ( itype = 0; itype < num_type_info ; itype++ ) {
- status = status | l_init_write_read_for_type(device,context,queue,type_info[itype], rand_state );
+ for (itype = 0; itype < num_type_info; itype++)
+ {
+ status = status
+ | l_init_write_read_for_type(device, context, queue,
+ type_info[itype], rand_state);
}
return status;
}
-static int l_init_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state )
+static int l_init_write_read_for_type(cl_device_id device, cl_context context,
+ cl_command_queue queue,
+ const TypeInfo& ti,
+ RandomSeed& rand_state)
{
int err = CL_SUCCESS;
- std::string type_name( ti.get_name() );
+ std::string type_name(ti.get_name());
const char* tn = type_name.c_str();
- log_info(" %s ",tn);
+ log_info(" %s ", tn);
StringTable ksrc;
- ksrc.add( l_get_fp64_pragma() );
- ksrc.add( l_get_cles_int64_pragma() );
- if (ti.is_atomic_64bit())
- ksrc.add( l_get_int64_atomic_pragma() );
- ksrc.add( conversion_functions(ti) );
- ksrc.add( global_decls(ti,true) );
- ksrc.add( writer_function(ti) );
- ksrc.add( reader_function(ti) );
+ ksrc.add(l_get_fp64_pragma());
+ ksrc.add(l_get_cles_int64_pragma());
+ if (ti.is_atomic_64bit()) ksrc.add(l_get_int64_atomic_pragma());
+ ksrc.add(conversion_functions(ti));
+ ksrc.add(global_decls(ti, true));
+ ksrc.add(writer_function(ti));
+ ksrc.add(reader_function(ti));
int status = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper writer;
- status = create_single_kernel_helper_with_build_options(context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", OPTIONS);
- test_error_ret(status,"Failed to create program for init-read-after-write test",status);
+ status = create_single_kernel_helper_with_build_options(
+ context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer",
+ OPTIONS);
+ test_error_ret(status,
+ "Failed to create program for init-read-after-write test",
+ status);
- clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
- test_error_ret(status,"Failed to create reader kernel for init-read-after-write test",status);
+ clKernelWrapper reader(clCreateKernel(program, "reader", &status));
+ test_error_ret(
+ status, "Failed to create reader kernel for init-read-after-write test",
+ status);
// Check size query.
size_t used_bytes = 0;
- status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
- test_error_ret(status,"Failed to query global variable total size",status);
- size_t expected_used_bytes =
- (NUM_TESTED_VALUES-1)*ti.get_size() // Two regular variables and an array of 2 elements.
- + ( l_64bit_device ? 8 : 4 ); // The pointer
- if ( used_bytes < expected_used_bytes ) {
- log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_used_bytes, (unsigned long long)used_bytes );
+ status = clGetProgramBuildInfo(program, device,
+ CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+ sizeof(used_bytes), &used_bytes, 0);
+ test_error_ret(status, "Failed to query global variable total size",
+ status);
+ size_t expected_used_bytes = (NUM_TESTED_VALUES - 1)
+ * ti.get_size() // Two regular variables and an array of 2 elements.
+ + (l_64bit_device ? 8 : 4); // The pointer
+ if (used_bytes < expected_used_bytes)
+ {
+ log_error("Error: program query for global variable total size query "
+ "failed: Expected at least %llu but got %llu\n",
+ (unsigned long long)expected_used_bytes,
+ (unsigned long long)used_bytes);
err |= 1;
}
// We need to create 5 random values of the given type,
// and read 4 of them back.
const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16);
- const size_t read_data_size = (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16);
+ const size_t read_data_size = (NUM_TESTED_VALUES - 1) * sizeof(cl_ulong16);
cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
- clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) );
- test_error_ret(status,"Failed to allocate write buffer",status);
- clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) );
- test_error_ret(status,"Failed to allocate read buffer",status);
+ clMemWrapper write_mem(clCreateBuffer(
+ context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status));
+ test_error_ret(status, "Failed to allocate write buffer", status);
+ clMemWrapper read_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+ read_data_size, read_data, &status));
+ test_error_ret(status, "Failed to allocate read buffer", status);
- status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status);
- status = clSetKernelArg(reader,0,sizeof(cl_mem),&read_mem); test_error_ret(status,"set arg",status);
+ status = clSetKernelArg(writer, 0, sizeof(cl_mem), &write_mem);
+ test_error_ret(status, "set arg", status);
+ status = clSetKernelArg(reader, 0, sizeof(cl_mem), &read_mem);
+ test_error_ret(status, "set arg", status);
// Boolean random data needs to be massaged a bit more.
- const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES ) : NUM_ROUNDS;
+ const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES) : NUM_ROUNDS;
unsigned bool_iter = 0;
// We need to count iterations. We do something *different on the
@@ -1117,107 +1385,152 @@
// values.
unsigned iteration = 0;
- for ( int iround = 0; iround < num_rounds ; iround++ ) {
- for ( cl_uint iptr_idx = 0; iptr_idx < 2 ; iptr_idx++ ) { // Index into array, to write via pointer
+ for (int iround = 0; iround < num_rounds; iround++)
+ {
+ for (cl_uint iptr_idx = 0; iptr_idx < 2; iptr_idx++)
+ { // Index into array, to write via pointer
// Generate new random data to push through.
- // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that.
+ // Generate 5 * 128 bytes all the time, even though the test for
+ // many types use less than all that.
- cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0);
+ cl_uchar* write_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0,
+ 0, 0, 0);
- if ( ti.is_bool() ) {
+ if (ti.is_bool())
+ {
// For boolean, random data cast to bool isn't very random.
// So use the bottom bit of bool_value_iter to get true
// diversity.
- for ( unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES ; value_idx++ ) {
- write_data[value_idx] = (1<<value_idx) & bool_iter;
- //printf(" %s", (write_data[value_idx] ? "true" : "false" ));
+ for (unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES;
+ value_idx++)
+ {
+ write_data[value_idx] = (1 << value_idx) & bool_iter;
+ // printf(" %s", (write_data[value_idx] ? "true" : "false"
+ // ));
}
bool_iter++;
- } else {
- l_set_randomly( write_data, write_data_size, rand_state );
}
- status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status);
+ else
+ {
+ l_set_randomly(write_data, write_data_size, rand_state);
+ }
+ status = clSetKernelArg(writer, 1, sizeof(cl_uint), &iptr_idx);
+ test_error_ret(status, "set arg", status);
- if ( !iteration ) {
+ if (!iteration)
+ {
// On first iteration, the value we write via the last arg
// to the "reader" function is 0.
// It's way easier to code the test this way.
- ti.init( write_data + (NUM_TESTED_VALUES-1)*ti.get_size(), 0 );
+ ti.init(write_data + (NUM_TESTED_VALUES - 1) * ti.get_size(),
+ 0);
}
// The value to write via the pointer should be taken from the
// 5th typed slot of the write_data.
- status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status);
+ status = clSetKernelArg(
+ reader, 1, ti.get_size(),
+ write_data + (NUM_TESTED_VALUES - 1) * ti.get_size());
+ test_error_ret(status, "set arg", status);
// Determine the expected values.
cl_uchar expected[read_data_size];
- memset( expected, -1, sizeof(expected) );
- if ( iteration ) {
- l_copy( expected, 0, write_data, 0, ti );
- l_copy( expected, 1, write_data, 1, ti );
- l_copy( expected, 2, write_data, 2, ti );
- l_copy( expected, 3, write_data, 3, ti );
- // But we need to take into account the value from the pointer write.
- // The 2 represents where the "a" array values begin in our read-back.
- // But we need to take into account the value from the pointer write.
- l_copy( expected, 2 + iptr_idx, write_data, 4, ti );
- } else {
+ memset(expected, -1, sizeof(expected));
+ if (iteration)
+ {
+ l_copy(expected, 0, write_data, 0, ti);
+ l_copy(expected, 1, write_data, 1, ti);
+ l_copy(expected, 2, write_data, 2, ti);
+ l_copy(expected, 3, write_data, 3, ti);
+ // But we need to take into account the value from the pointer
+ // write. The 2 represents where the "a" array values begin in
+ // our read-back. But we need to take into account the value
+ // from the pointer write.
+ l_copy(expected, 2 + iptr_idx, write_data, 4, ti);
+ }
+ else
+ {
// On first iteration, expect these initialized values!
// See the decls_template_with_init above.
- ti.init( expected, 0 );
- ti.init( expected + ti.get_size(), 1 );
- ti.init( expected + 2*ti.get_size(), 1 );
+ ti.init(expected, 0);
+ ti.init(expected + ti.get_size(), 1);
+ ti.init(expected + 2 * ti.get_size(), 1);
// Emulate the effect of the write via the pointer.
// The value is 0, not 1 (see above).
// The pointer is always initialized to the second element
// of the array. So it goes into slot 3 of the "expected" array.
- ti.init( expected + 3*ti.get_size(), 0 );
+ ti.init(expected + 3 * ti.get_size(), 0);
}
- if ( ti.is_bool() ) {
+ if (ti.is_bool())
+ {
// Collapse down to one bit.
- for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i];
+ for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
+ expected[i] = (bool)expected[i];
}
clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
- cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
- memset( read_data, -1, read_data_size );
+ cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
+ 0, 0);
+ memset(read_data, -1, read_data_size);
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
// Now run the kernel
const size_t one = 1;
- if ( iteration ) {
- status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
- } else {
+ if (iteration)
+ {
+ status = clEnqueueNDRangeKernel(queue, writer, 1, 0, &one, 0, 0,
+ 0, 0);
+ test_error_ret(status, "enqueue writer", status);
+ }
+ else
+ {
// On first iteration, we should be picking up the
// initialized value. So don't enqueue the writer.
}
- status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
- status = clFinish(queue); test_error_ret(status,"finish",status);
+ status =
+ clEnqueueNDRangeKernel(queue, reader, 1, 0, &one, 0, 0, 0, 0);
+ test_error_ret(status, "enqueue reader", status);
+ status = clFinish(queue);
+ test_error_ret(status, "finish", status);
- read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
+ read_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
+ 0, 0);
- if ( ti.is_bool() ) {
+ if (ti.is_bool())
+ {
// Collapse down to one bit.
- for ( unsigned i = 0; i < NUM_TESTED_VALUES-1 ; i++ ) read_data[i] = (bool)read_data[i];
+ for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
+ read_data[i] = (bool)read_data[i];
}
// Compare only the valid returned bytes.
- //log_info(" Round %d ptr_idx %u\n", iround, iptr_idx );
- int compare_result = l_compare( "init-write-read", expected, read_data, NUM_TESTED_VALUES-1, ti );
- //log_info("Compared %d values each of size %llu. Result %d\n", NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(), compare_result );
+ // log_info(" Round %d ptr_idx %u\n", iround, iptr_idx );
+ int compare_result =
+ l_compare("init-write-read", expected, read_data,
+ NUM_TESTED_VALUES - 1, ti);
+ // log_info("Compared %d values each of size %llu. Result %d\n",
+ // NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(),
+ // compare_result );
err |= compare_result;
clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
- if ( err ) break;
+ if (err) break;
iteration++;
}
}
- if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+ if (CL_SUCCESS == err)
+ {
+ log_info("OK\n");
+ FLUSH;
+ }
align_free(write_data);
align_free(read_data);
@@ -1226,12 +1539,14 @@
// Check that we can make at least one variable with size
-// max_size which is returned from the device info property : CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE.
-static int l_capacity( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size )
+// max_size which is returned from the device info property :
+// CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE.
+static int l_capacity(cl_device_id device, cl_context context,
+ cl_command_queue queue, size_t max_size)
{
int err = CL_SUCCESS;
// Just test one type.
- const TypeInfo ti( l_find_type("uchar") );
+ const TypeInfo ti(l_find_type("uchar"));
log_info(" l_capacity...");
const char prog_src_template[] =
@@ -1254,84 +1569,132 @@
" dest[get_global_linear_id()] = var[get_global_id(0)];\n"
"}\n\n";
char prog_src[MAX_STR];
- int num_printed = snprintf(prog_src,sizeof(prog_src),prog_src_template,max_size, max_size);
- assert( num_printed < MAX_STR ); // or increase MAX_STR
+ int num_printed = snprintf(prog_src, sizeof(prog_src), prog_src_template,
+ max_size, max_size);
+ assert(num_printed < MAX_STR); // or increase MAX_STR
(void)num_printed;
StringTable ksrc;
- ksrc.add( prog_src );
+ ksrc.add(prog_src);
int status = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper get_max_size;
- status = create_single_kernel_helper_with_build_options(context, &program, &get_max_size, ksrc.num_str(), ksrc.strs(), "get_max_size", OPTIONS);
- test_error_ret(status,"Failed to create program for capacity test",status);
+ status = create_single_kernel_helper_with_build_options(
+ context, &program, &get_max_size, ksrc.num_str(), ksrc.strs(),
+ "get_max_size", OPTIONS);
+ test_error_ret(status, "Failed to create program for capacity test",
+ status);
// Check size query.
size_t used_bytes = 0;
- status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
- test_error_ret(status,"Failed to query global variable total size",status);
- if ( used_bytes < max_size ) {
- log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)max_size, (unsigned long long)used_bytes );
+ status = clGetProgramBuildInfo(program, device,
+ CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+ sizeof(used_bytes), &used_bytes, 0);
+ test_error_ret(status, "Failed to query global variable total size",
+ status);
+ if (used_bytes < max_size)
+ {
+ log_error("Error: program query for global variable total size query "
+ "failed: Expected at least %llu but got %llu\n",
+ (unsigned long long)max_size, (unsigned long long)used_bytes);
err |= 1;
}
// Prepare to execute
- clKernelWrapper writer( clCreateKernel( program, "writer", &status ) );
- test_error_ret(status,"Failed to create writer kernel for capacity test",status);
- clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
- test_error_ret(status,"Failed to create reader kernel for capacity test",status);
+ clKernelWrapper writer(clCreateKernel(program, "writer", &status));
+ test_error_ret(status, "Failed to create writer kernel for capacity test",
+ status);
+ clKernelWrapper reader(clCreateKernel(program, "reader", &status));
+ test_error_ret(status, "Failed to create reader kernel for capacity test",
+ status);
cl_ulong max_size_ret = 0;
- const size_t arr_size = 10*1024*1024;
- cl_uchar* buffer = (cl_uchar*) align_malloc( arr_size, ALIGNMENT );
+ const size_t arr_size = 10 * 1024 * 1024;
+ cl_uchar* buffer = (cl_uchar*)align_malloc(arr_size, ALIGNMENT);
- if ( !buffer ) { log_error("Failed to allocate buffer\n"); return 1; }
+ if (!buffer)
+ {
+ log_error("Failed to allocate buffer\n");
+ return 1;
+ }
- clMemWrapper max_size_ret_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(max_size_ret), &max_size_ret, &status ) );
- test_error_ret(status,"Failed to allocate size query buffer",status);
- clMemWrapper buffer_mem( clCreateBuffer( context, CL_MEM_READ_WRITE, arr_size, 0, &status ) );
- test_error_ret(status,"Failed to allocate write buffer",status);
+ clMemWrapper max_size_ret_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+ sizeof(max_size_ret),
+ &max_size_ret, &status));
+ test_error_ret(status, "Failed to allocate size query buffer", status);
+ clMemWrapper buffer_mem(
+ clCreateBuffer(context, CL_MEM_READ_WRITE, arr_size, 0, &status));
+ test_error_ret(status, "Failed to allocate write buffer", status);
- status = clSetKernelArg(get_max_size,0,sizeof(cl_mem),&max_size_ret_mem); test_error_ret(status,"set arg",status);
- status = clSetKernelArg(writer,0,sizeof(cl_mem),&buffer_mem); test_error_ret(status,"set arg",status);
- status = clSetKernelArg(reader,0,sizeof(cl_mem),&buffer_mem); test_error_ret(status,"set arg",status);
+ status = clSetKernelArg(get_max_size, 0, sizeof(cl_mem), &max_size_ret_mem);
+ test_error_ret(status, "set arg", status);
+ status = clSetKernelArg(writer, 0, sizeof(cl_mem), &buffer_mem);
+ test_error_ret(status, "set arg", status);
+ status = clSetKernelArg(reader, 0, sizeof(cl_mem), &buffer_mem);
+ test_error_ret(status, "set arg", status);
// Check the macro value of CL_DEVICE_MAX_GLOBAL_VARIABLE
const size_t one = 1;
- status = clEnqueueNDRangeKernel(queue,get_max_size,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue size query",status);
- status = clFinish(queue); test_error_ret(status,"finish",status);
+ status =
+ clEnqueueNDRangeKernel(queue, get_max_size, 1, 0, &one, 0, 0, 0, 0);
+ test_error_ret(status, "enqueue size query", status);
+ status = clFinish(queue);
+ test_error_ret(status, "finish", status);
- cl_uchar *max_size_ret_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, max_size_ret_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(max_size_ret), 0, 0, 0, 0);
- if ( max_size_ret != max_size ) {
- log_error("Error: preprocessor definition for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE is %llu and does not match device query value %llu\n",
- (unsigned long long) max_size_ret,
- (unsigned long long) max_size );
+ cl_uchar* max_size_ret_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, max_size_ret_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(max_size_ret),
+ 0, 0, 0, 0);
+ if (max_size_ret != max_size)
+ {
+ log_error("Error: preprocessor definition for "
+ "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE is %llu and does not "
+ "match device query value %llu\n",
+ (unsigned long long)max_size_ret,
+ (unsigned long long)max_size);
err |= 1;
}
clEnqueueUnmapMemObject(queue, max_size_ret_mem, max_size_ret_ptr, 0, 0, 0);
- RandomSeed rand_state_write( gRandomSeed );
- for ( size_t offset = 0; offset < max_size ; offset += arr_size ) {
- size_t curr_size = (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
- l_set_randomly( buffer, curr_size, rand_state_write );
- status = clEnqueueWriteBuffer (queue, buffer_mem, CL_TRUE, 0, curr_size, buffer, 0, 0, 0);test_error_ret(status,"populate buffer_mem object",status);
- status = clEnqueueNDRangeKernel(queue,writer,1,&offset,&curr_size,0,0,0,0); test_error_ret(status,"enqueue writer",status);
- status = clFinish(queue); test_error_ret(status,"finish",status);
+ RandomSeed rand_state_write(gRandomSeed);
+ for (size_t offset = 0; offset < max_size; offset += arr_size)
+ {
+ size_t curr_size =
+ (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
+ l_set_randomly(buffer, curr_size, rand_state_write);
+ status = clEnqueueWriteBuffer(queue, buffer_mem, CL_TRUE, 0, curr_size,
+ buffer, 0, 0, 0);
+ test_error_ret(status, "populate buffer_mem object", status);
+ status = clEnqueueNDRangeKernel(queue, writer, 1, &offset, &curr_size,
+ 0, 0, 0, 0);
+ test_error_ret(status, "enqueue writer", status);
+ status = clFinish(queue);
+ test_error_ret(status, "finish", status);
}
- RandomSeed rand_state_read( gRandomSeed );
- for ( size_t offset = 0; offset < max_size ; offset += arr_size ) {
- size_t curr_size = (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
- status = clEnqueueNDRangeKernel(queue,reader,1,&offset,&curr_size,0,0,0,0); test_error_ret(status,"enqueue reader",status);
- cl_uchar* read_mem_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, buffer_mem, CL_TRUE, CL_MAP_READ, 0, curr_size, 0, 0, 0, &status);test_error_ret(status,"map read data",status);
- l_set_randomly( buffer, curr_size, rand_state_read );
- err |= l_compare( "capacity", buffer, read_mem_ptr, curr_size, ti );
+ RandomSeed rand_state_read(gRandomSeed);
+ for (size_t offset = 0; offset < max_size; offset += arr_size)
+ {
+ size_t curr_size =
+ (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
+ status = clEnqueueNDRangeKernel(queue, reader, 1, &offset, &curr_size,
+ 0, 0, 0, 0);
+ test_error_ret(status, "enqueue reader", status);
+ cl_uchar* read_mem_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, buffer_mem, CL_TRUE, CL_MAP_READ, 0, curr_size, 0, 0, 0,
+ &status);
+ test_error_ret(status, "map read data", status);
+ l_set_randomly(buffer, curr_size, rand_state_read);
+ err |= l_compare("capacity", buffer, read_mem_ptr, curr_size, ti);
clEnqueueUnmapMemObject(queue, buffer_mem, read_mem_ptr, 0, 0, 0);
}
- if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+ if (CL_SUCCESS == err)
+ {
+ log_info("OK\n");
+ FLUSH;
+ }
align_free(buffer);
return err;
@@ -1339,32 +1702,33 @@
// Check operation on a user type.
-static int l_user_type( cl_device_id device, cl_context context, cl_command_queue queue, bool separate_compile )
+static int l_user_type(cl_device_id device, cl_context context,
+ cl_command_queue queue, bool separate_compile)
{
int err = CL_SUCCESS;
// Just test one type.
- const TypeInfo ti( l_find_type("uchar") );
- log_info(" l_user_type %s...", separate_compile ? "separate compilation" : "single source compilation" );
+ const TypeInfo ti(l_find_type("uchar"));
+ log_info(" l_user_type %s...",
+ separate_compile ? "separate compilation"
+ : "single source compilation");
- if ( separate_compile && ! l_linker_available ) {
+ if (separate_compile && !l_linker_available)
+ {
log_info("Separate compilation is not supported. Skipping test\n");
return err;
}
const char type_src[] =
"typedef struct { uchar c; uint i; } my_struct_t;\n\n";
- const char def_src[] =
- "my_struct_t var = { 'a', 42 };\n\n";
- const char decl_src[] =
- "extern my_struct_t var;\n\n";
+ const char def_src[] = "my_struct_t var = { 'a', 42 };\n\n";
+ const char decl_src[] = "extern my_struct_t var;\n\n";
// Don't use a host struct. We can't guarantee that the host
// compiler has the same structure layout as the device compiler.
- const char writer_src[] =
- "kernel void writer( uchar c, uint i ) {\n"
- " var.c = c;\n"
- " var.i = i;\n"
- "}\n\n";
+ const char writer_src[] = "kernel void writer( uchar c, uint i ) {\n"
+ " var.c = c;\n"
+ " var.i = i;\n"
+ "}\n\n";
const char reader_src[] =
"kernel void reader( global uchar* C, global uint* I ) {\n"
" *C = var.c;\n"
@@ -1373,36 +1737,53 @@
clProgramWrapper program;
- if ( separate_compile ) {
+ if (separate_compile)
+ {
// Separate compilation flow.
StringTable wksrc;
- wksrc.add( type_src );
- wksrc.add( def_src );
- wksrc.add( writer_src );
+ wksrc.add(type_src);
+ wksrc.add(def_src);
+ wksrc.add(writer_src);
StringTable rksrc;
- rksrc.add( type_src );
- rksrc.add( decl_src );
- rksrc.add( reader_src );
+ rksrc.add(type_src);
+ rksrc.add(decl_src);
+ rksrc.add(reader_src);
int status = CL_SUCCESS;
- clProgramWrapper writer_program( clCreateProgramWithSource( context, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), &status ) );
- test_error_ret(status,"Failed to create writer program for user type test",status);
+ clProgramWrapper writer_program(clCreateProgramWithSource(
+ context, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), &status));
+ test_error_ret(status,
+ "Failed to create writer program for user type test",
+ status);
- status = clCompileProgram( writer_program, 1, &device, OPTIONS, 0, 0, 0, 0, 0 );
- if(check_error(status, "Failed to compile writer program for user type test (%s)", IGetErrorString(status)))
+ status = clCompileProgram(writer_program, 1, &device, OPTIONS, 0, 0, 0,
+ 0, 0);
+ if (check_error(
+ status,
+ "Failed to compile writer program for user type test (%s)",
+ IGetErrorString(status)))
{
- print_build_log(writer_program, 1, &device, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), OPTIONS);
+ print_build_log(writer_program, 1, &device, wksrc.num_str(),
+ wksrc.strs(), wksrc.lengths(), OPTIONS);
return status;
}
- clProgramWrapper reader_program( clCreateProgramWithSource( context, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), &status ) );
- test_error_ret(status,"Failed to create reader program for user type test",status);
+ clProgramWrapper reader_program(clCreateProgramWithSource(
+ context, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), &status));
+ test_error_ret(status,
+ "Failed to create reader program for user type test",
+ status);
- status = clCompileProgram( reader_program, 1, &device, OPTIONS, 0, 0, 0, 0, 0 );
- if(check_error(status, "Failed to compile reader program for user type test (%s)", IGetErrorString(status)))
+ status = clCompileProgram(reader_program, 1, &device, OPTIONS, 0, 0, 0,
+ 0, 0);
+ if (check_error(
+ status,
+ "Failed to compile reader program for user type test (%s)",
+ IGetErrorString(status)))
{
- print_build_log(reader_program, 1, &device, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), OPTIONS);
+ print_build_log(reader_program, 1, &device, rksrc.num_str(),
+ rksrc.strs(), rksrc.lengths(), OPTIONS);
return status;
}
@@ -1410,33 +1791,45 @@
progs[0] = writer_program;
progs[1] = reader_program;
- program = clLinkProgram( context, 1, &device, "", 2, progs, 0, 0, &status );
- if(check_error(status, "Failed to link program for user type test (%s)", IGetErrorString(status)))
+ program =
+ clLinkProgram(context, 1, &device, "", 2, progs, 0, 0, &status);
+ if (check_error(status,
+ "Failed to link program for user type test (%s)",
+ IGetErrorString(status)))
{
print_build_log(program, 1, &device, 0, NULL, NULL, "");
return status;
}
- } else {
+ }
+ else
+ {
// Single compilation flow.
StringTable ksrc;
- ksrc.add( type_src );
- ksrc.add( def_src );
- ksrc.add( writer_src );
- ksrc.add( reader_src );
+ ksrc.add(type_src);
+ ksrc.add(def_src);
+ ksrc.add(writer_src);
+ ksrc.add(reader_src);
int status = CL_SUCCESS;
- status = create_single_kernel_helper_create_program(context, &program, ksrc.num_str(), ksrc.strs(), OPTIONS);
- if(check_error(status, "Failed to build program for user type test (%s)", IGetErrorString(status)))
+ status = create_single_kernel_helper_create_program(
+ context, &program, ksrc.num_str(), ksrc.strs(), OPTIONS);
+ if (check_error(status,
+ "Failed to build program for user type test (%s)",
+ IGetErrorString(status)))
{
- print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS);
+ print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(),
+ ksrc.lengths(), OPTIONS);
return status;
}
status = clBuildProgram(program, 1, &device, OPTIONS, 0, 0);
- if(check_error(status, "Failed to compile program for user type test (%s)", IGetErrorString(status)))
+ if (check_error(status,
+ "Failed to compile program for user type test (%s)",
+ IGetErrorString(status)))
{
- print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS);
+ print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(),
+ ksrc.lengths(), OPTIONS);
return status;
}
}
@@ -1444,48 +1837,71 @@
// Check size query.
size_t used_bytes = 0;
- int status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
- test_error_ret(status,"Failed to query global variable total size",status);
+ int status = clGetProgramBuildInfo(
+ program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+ sizeof(used_bytes), &used_bytes, 0);
+ test_error_ret(status, "Failed to query global variable total size",
+ status);
size_t expected_size = sizeof(cl_uchar) + sizeof(cl_uint);
- if ( used_bytes < expected_size ) {
- log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_size, (unsigned long long)used_bytes );
+ if (used_bytes < expected_size)
+ {
+ log_error("Error: program query for global variable total size query "
+ "failed: Expected at least %llu but got %llu\n",
+ (unsigned long long)expected_size,
+ (unsigned long long)used_bytes);
err |= 1;
}
// Prepare to execute
- clKernelWrapper writer( clCreateKernel( program, "writer", &status ) );
- test_error_ret(status,"Failed to create writer kernel for user type test",status);
- clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
- test_error_ret(status,"Failed to create reader kernel for user type test",status);
+ clKernelWrapper writer(clCreateKernel(program, "writer", &status));
+ test_error_ret(status, "Failed to create writer kernel for user type test",
+ status);
+ clKernelWrapper reader(clCreateKernel(program, "reader", &status));
+ test_error_ret(status, "Failed to create reader kernel for user type test",
+ status);
// Set up data.
cl_uchar* uchar_data = (cl_uchar*)align_malloc(sizeof(cl_uchar), ALIGNMENT);
cl_uint* uint_data = (cl_uint*)align_malloc(sizeof(cl_uint), ALIGNMENT);
- clMemWrapper uchar_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar), uchar_data, &status ) );
- test_error_ret(status,"Failed to allocate uchar buffer",status);
- clMemWrapper uint_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uint), uint_data, &status ) );
- test_error_ret(status,"Failed to allocate uint buffer",status);
+ clMemWrapper uchar_mem(clCreateBuffer(
+ context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar), uchar_data, &status));
+ test_error_ret(status, "Failed to allocate uchar buffer", status);
+ clMemWrapper uint_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+ sizeof(cl_uint), uint_data, &status));
+ test_error_ret(status, "Failed to allocate uint buffer", status);
- status = clSetKernelArg(reader,0,sizeof(cl_mem),&uchar_mem); test_error_ret(status,"set arg",status);
- status = clSetKernelArg(reader,1,sizeof(cl_mem),&uint_mem); test_error_ret(status,"set arg",status);
+ status = clSetKernelArg(reader, 0, sizeof(cl_mem), &uchar_mem);
+ test_error_ret(status, "set arg", status);
+ status = clSetKernelArg(reader, 1, sizeof(cl_mem), &uint_mem);
+ test_error_ret(status, "set arg", status);
cl_uchar expected_uchar = 'a';
cl_uint expected_uint = 42;
- for ( unsigned iter = 0; iter < 5 ; iter++ ) { // Must go around at least twice
+ for (unsigned iter = 0; iter < 5; iter++)
+ { // Must go around at least twice
// Read back data
*uchar_data = -1;
*uint_data = -1;
const size_t one = 1;
- status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
- status = clFinish(queue); test_error_ret(status,"finish",status);
+ status = clEnqueueNDRangeKernel(queue, reader, 1, 0, &one, 0, 0, 0, 0);
+ test_error_ret(status, "enqueue reader", status);
+ status = clFinish(queue);
+ test_error_ret(status, "finish", status);
- cl_uchar *uint_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uint), 0, 0, 0, 0);
- cl_uchar *uchar_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uchar), 0, 0, 0, 0);
+ cl_uchar* uint_data_ptr =
+ (cl_uchar*)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ,
+ 0, sizeof(cl_uint), 0, 0, 0, 0);
+ cl_uchar* uchar_data_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uchar), 0, 0,
+ 0, 0);
- if ( expected_uchar != *uchar_data || expected_uint != *uint_data ) {
- log_error("FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n",
- iter, (int)*uchar_data, *uint_data, (int)expected_uchar, expected_uint );
+ if (expected_uchar != *uchar_data || expected_uint != *uint_data)
+ {
+ log_error(
+ "FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n",
+ iter, (int)*uchar_data, *uint_data, (int)expected_uchar,
+ expected_uint);
err |= 1;
}
@@ -1499,13 +1915,21 @@
// Write the new values into persistent store.
*uchar_data = expected_uchar;
*uint_data = expected_uint;
- status = clSetKernelArg(writer,0,sizeof(cl_uchar),uchar_data); test_error_ret(status,"set arg",status);
- status = clSetKernelArg(writer,1,sizeof(cl_uint),uint_data); test_error_ret(status,"set arg",status);
- status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
- status = clFinish(queue); test_error_ret(status,"finish",status);
+ status = clSetKernelArg(writer, 0, sizeof(cl_uchar), uchar_data);
+ test_error_ret(status, "set arg", status);
+ status = clSetKernelArg(writer, 1, sizeof(cl_uint), uint_data);
+ test_error_ret(status, "set arg", status);
+ status = clEnqueueNDRangeKernel(queue, writer, 1, 0, &one, 0, 0, 0, 0);
+ test_error_ret(status, "enqueue writer", status);
+ status = clFinish(queue);
+ test_error_ret(status, "finish", status);
}
- if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+ if (CL_SUCCESS == err)
+ {
+ log_info("OK\n");
+ FLUSH;
+ }
align_free(uchar_data);
align_free(uint_data);
return err;
@@ -1540,7 +1964,8 @@
// Test support for variables at program scope. Miscellaneous
-int test_progvar_prog_scope_misc(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_progvar_prog_scope_misc(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_bool skip{ CL_FALSE };
auto error = should_skip(device, skip);
@@ -1559,19 +1984,20 @@
cl_int err = CL_SUCCESS;
- err = l_get_device_info( device, &max_size, &pref_size );
- err |= l_build_type_table( device );
+ err = l_get_device_info(device, &max_size, &pref_size);
+ err |= l_build_type_table(device);
- err |= l_capacity( device, context, queue, max_size );
- err |= l_user_type( device, context, queue, false );
- err |= l_user_type( device, context, queue, true );
+ err |= l_capacity(device, context, queue, max_size);
+ err |= l_user_type(device, context, queue, false);
+ err |= l_user_type(device, context, queue, true);
return err;
}
// Test support for variables at program scope. Unitialized data
-int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_bool skip{ CL_FALSE };
auto error = should_skip(device, skip);
@@ -1591,16 +2017,17 @@
cl_int err = CL_SUCCESS;
- err = l_get_device_info( device, &max_size, &pref_size );
- err |= l_build_type_table( device );
+ err = l_get_device_info(device, &max_size, &pref_size);
+ err |= l_build_type_table(device);
- err |= l_write_read( device, context, queue );
+ err |= l_write_read(device, context, queue);
return err;
}
// Test support for variables at program scope. Initialized data.
-int test_progvar_prog_scope_init(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_progvar_prog_scope_init(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_bool skip{ CL_FALSE };
auto error = should_skip(device, skip);
@@ -1619,17 +2046,18 @@
cl_int err = CL_SUCCESS;
- err = l_get_device_info( device, &max_size, &pref_size );
- err |= l_build_type_table( device );
+ err = l_get_device_info(device, &max_size, &pref_size);
+ err |= l_build_type_table(device);
- err |= l_init_write_read( device, context, queue );
+ err |= l_init_write_read(device, context, queue);
return err;
}
// A simple test for support of static variables inside a kernel.
-int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_progvar_func_scope(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
{
cl_bool skip{ CL_FALSE };
auto error = should_skip(device, skip);
@@ -1649,48 +2077,64 @@
// Deliberately have two variables with the same name but in different
// scopes.
// Also, use a large initialized structure in both cases.
+ // clang-format off
const char prog_src[] =
"typedef struct { char c; int16 i; } mystruct_t;\n"
- "kernel void test_bump( global int* value, int which ) {\n"
- " if ( which ) {\n"
+ "kernel void test_bump(global int* value, int which) {\n"
+ " if (which) {\n"
// Explicit address space.
// Last element set to 0
- " static global mystruct_t persistent = {'a',(int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0) };\n"
+ " static global mystruct_t persistent = { 'a', (int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0) };\n"
" *value = persistent.i.sf++;\n"
" } else {\n"
// Implicitly global
// Last element set to 100
- " static mystruct_t persistent = {'b',(int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,100) };\n"
+ " static mystruct_t persistent = { 'b' , (int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,100) };\n"
" *value = persistent.i.sf++;\n"
" }\n"
"}\n";
+ // clang-format on
StringTable ksrc;
- ksrc.add( prog_src );
+ ksrc.add(prog_src);
int status = CL_SUCCESS;
clProgramWrapper program;
clKernelWrapper test_bump;
- status = create_single_kernel_helper_with_build_options(context, &program, &test_bump, ksrc.num_str(), ksrc.strs(), "test_bump", OPTIONS);
- test_error_ret(status, "Failed to create program for function static variable test", status);
+ status = create_single_kernel_helper_with_build_options(
+ context, &program, &test_bump, ksrc.num_str(), ksrc.strs(), "test_bump",
+ OPTIONS);
+ test_error_ret(status,
+ "Failed to create program for function static variable test",
+ status);
// Check size query.
size_t used_bytes = 0;
- status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
- test_error_ret(status,"Failed to query global variable total size",status);
+ status = clGetProgramBuildInfo(program, device,
+ CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+ sizeof(used_bytes), &used_bytes, 0);
+ test_error_ret(status, "Failed to query global variable total size",
+ status);
size_t expected_size = 2 * sizeof(cl_int); // Two ints.
- if ( used_bytes < expected_size ) {
- log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_size, (unsigned long long)used_bytes );
+ if (used_bytes < expected_size)
+ {
+ log_error("Error: program query for global variable total size query "
+ "failed: Expected at least %llu but got %llu\n",
+ (unsigned long long)expected_size,
+ (unsigned long long)used_bytes);
err |= 1;
}
// Prepare the data.
cl_int counter_value = 0;
- clMemWrapper counter_value_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(counter_value), &counter_value, &status ) );
- test_error_ret(status,"Failed to allocate counter query buffer",status);
+ clMemWrapper counter_value_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+ sizeof(counter_value),
+ &counter_value, &status));
+ test_error_ret(status, "Failed to allocate counter query buffer", status);
- status = clSetKernelArg(test_bump,0,sizeof(cl_mem),&counter_value_mem); test_error_ret(status,"set arg",status);
+ status = clSetKernelArg(test_bump, 0, sizeof(cl_mem), &counter_value_mem);
+ test_error_ret(status, "set arg", status);
// Go a few rounds, alternating between the two counters in the kernel.
@@ -1700,26 +2144,41 @@
cl_int expected_counter[2] = { 100, 0 };
const size_t one = 1;
- for ( int iround = 0; iround < 5 ; iround++ ) { // Must go at least twice around
- for ( int iwhich = 0; iwhich < 2 ; iwhich++ ) { // Cover both counters
- status = clSetKernelArg(test_bump,1,sizeof(iwhich),&iwhich); test_error_ret(status,"set arg",status);
- status = clEnqueueNDRangeKernel(queue,test_bump,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue test_bump",status);
- status = clFinish(queue); test_error_ret(status,"finish",status);
+ for (int iround = 0; iround < 5; iround++)
+ { // Must go at least twice around
+ for (int iwhich = 0; iwhich < 2; iwhich++)
+ { // Cover both counters
+ status = clSetKernelArg(test_bump, 1, sizeof(iwhich), &iwhich);
+ test_error_ret(status, "set arg", status);
+ status = clEnqueueNDRangeKernel(queue, test_bump, 1, 0, &one, 0, 0,
+ 0, 0);
+ test_error_ret(status, "enqueue test_bump", status);
+ status = clFinish(queue);
+ test_error_ret(status, "finish", status);
- cl_uchar *counter_value_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, counter_value_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(counter_value), 0, 0, 0, 0);
+ cl_uchar* counter_value_ptr = (cl_uchar*)clEnqueueMapBuffer(
+ queue, counter_value_mem, CL_TRUE, CL_MAP_READ, 0,
+ sizeof(counter_value), 0, 0, 0, 0);
- if ( counter_value != expected_counter[iwhich] ) {
- log_error("Error: Round %d on counter %d: Expected %d but got %d\n",
- iround, iwhich, expected_counter[iwhich], counter_value );
+ if (counter_value != expected_counter[iwhich])
+ {
+ log_error(
+ "Error: Round %d on counter %d: Expected %d but got %d\n",
+ iround, iwhich, expected_counter[iwhich], counter_value);
err |= 1;
}
expected_counter[iwhich]++; // Emulate behaviour of the kernel.
- clEnqueueUnmapMemObject(queue, counter_value_mem, counter_value_ptr, 0, 0, 0);
+ clEnqueueUnmapMemObject(queue, counter_value_mem, counter_value_ptr,
+ 0, 0, 0);
}
}
- if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+ if (CL_SUCCESS == err)
+ {
+ log_info("OK\n");
+ FLUSH;
+ }
return err;
}
diff --git a/test_conformance/basic/test_sizeof.cpp b/test_conformance/basic/test_sizeof.cpp
index 6b1ddb5..e980ed6 100644
--- a/test_conformance/basic/test_sizeof.cpp
+++ b/test_conformance/basic/test_sizeof.cpp
@@ -35,9 +35,9 @@
"}\n"
};
- cl_program p;
- cl_kernel k;
- cl_mem m;
+ clProgramWrapper p;
+ clKernelWrapper k;
+ clMemWrapper m;
cl_uint temp;
@@ -51,42 +51,19 @@
}
cl_int err = create_single_kernel_helper_with_build_options(
context, &p, &k, 4, sizeof_kernel_code, "test_sizeof", nullptr);
- if( err )
- return err;
+ test_error(err, "Failed to build kernel/program.");
m = clCreateBuffer( context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof( cl_ulong ), size, &err );
- if( NULL == m )
- {
- clReleaseProgram( p );
- clReleaseKernel( k );
- log_error("\nclCreateBuffer FAILED\n");
- return err;
- }
+ test_error(err, "clCreateBuffer failed.");
err = clSetKernelArg( k, 0, sizeof( cl_mem ), &m );
- if( err )
- {
- clReleaseProgram( p );
- clReleaseKernel( k );
- clReleaseMemObject( m );
- log_error("\nclSetKernelArg FAILED\n");
- return err;
- }
+ test_error(err, "clSetKernelArg failed.");
err = clEnqueueTask( queue, k, 0, NULL, NULL );
- clReleaseProgram( p );
- clReleaseKernel( k );
- if( err )
- {
- clReleaseMemObject( m );
- log_error( "\nclEnqueueTask FAILED\n" );
- return err;
- }
+ test_error(err, "clEnqueueTask failed.");
err = clEnqueueReadBuffer( queue, m, CL_TRUE, 0, sizeof( cl_uint ), &temp, 0, NULL, NULL );
- clReleaseMemObject( m );
- if( err )
- log_error( "\nclEnqueueReadBuffer FAILED\n" );
+ test_error(err, "clEnqueueReadBuffer failed.");
*size = (cl_ulong) temp;
diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index 94657d6..b95b0f5 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -79,11 +79,13 @@
"cl_khr_spirv_linkonce_odr",
"cl_khr_semaphore",
"cl_khr_external_semaphore",
- "cl_khr_external_semaphore_opaque_fd",
+ "cl_khr_external_semaphore_win32",
"cl_khr_external_semaphore_sync_fd",
- "cl_khr_command_buffer",
+ "cl_khr_external_semaphore_opaque_fd",
"cl_khr_external_memory",
+ "cl_khr_external_memory_win32",
"cl_khr_external_memory_opaque_fd",
+ "cl_khr_command_buffer",
"cl_khr_command_buffer_mutable_dispatch",
};
diff --git a/test_conformance/contractions/contractions.cpp b/test_conformance/contractions/contractions.cpp
index dddebb4..474fd36 100644
--- a/test_conformance/contractions/contractions.cpp
+++ b/test_conformance/contractions/contractions.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -434,7 +434,6 @@
gArgCount++;
}
}
- vlog( "\n\nTest binary built %s %s\n", __DATE__, __TIME__ );
PrintArch();
diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp
index 788af99..2b18b92 100644
--- a/test_conformance/conversions/test_conversions.cpp
+++ b/test_conformance/conversions/test_conversions.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -343,7 +343,7 @@
static int ParseArgs( int argc, const char **argv )
{
int i;
- argList = (const char **)calloc( argc - 1, sizeof( char*) );
+ argList = (const char **)calloc(argc, sizeof(char *));
argCount = 0;
if( NULL == argList && argc > 1 )
@@ -484,8 +484,6 @@
vlog( "\n" );
- vlog( "Test binary built %s %s\n", __DATE__, __TIME__ );
-
PrintArch();
if( gWimpyMode )
diff --git a/test_conformance/half/main.cpp b/test_conformance/half/main.cpp
index 6600cc5..6bc7db9 100644
--- a/test_conformance/half/main.cpp
+++ b/test_conformance/half/main.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -131,8 +131,7 @@
static int ParseArgs( int argc, const char **argv )
{
int i;
- argList = (const char **)calloc( argc - 1, sizeof( char*) );
-
+ argList = (const char **)calloc(argc, sizeof(char *));
if( NULL == argList )
{
vlog_error( "Failed to allocate memory for argList.\n" );
@@ -222,7 +221,6 @@
gWimpyMode = 1;
}
- vlog( "Test binary built %s %s\n", __DATE__, __TIME__ );
PrintArch();
if( gWimpyMode )
{
@@ -248,4 +246,3 @@
vlog("\t\t%s\n", test_list[i].name );
}
}
-
diff --git a/test_conformance/images/clCopyImage/test_copy_1D.cpp b/test_conformance/images/clCopyImage/test_copy_1D.cpp
index 2c996c7..0f6f3ce 100644
--- a/test_conformance/images/clCopyImage/test_copy_1D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_1D.cpp
@@ -113,6 +113,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
index 0b61693..f0b610b 100644
--- a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
@@ -118,6 +118,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_2D.cpp b/test_conformance/images/clCopyImage/test_copy_2D.cpp
index 1a69a1f..448b47f 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D.cpp
@@ -125,6 +125,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
index eb6dd55..1819d87 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
@@ -224,6 +224,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
index 8a56c95..4ab6b42 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
@@ -230,6 +230,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
index 6327ba5..3376bf9 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
@@ -71,6 +71,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_3D.cpp b/test_conformance/images/clCopyImage/test_copy_3D.cpp
index da6731d..cdfdcce 100644
--- a/test_conformance/images/clCopyImage/test_copy_3D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_3D.cpp
@@ -57,6 +57,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
index c098f64..1da1e47 100644
--- a/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
@@ -251,6 +251,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_1D.cpp b/test_conformance/images/clFillImage/test_fill_1D.cpp
index c3f2318..b1550bf 100644
--- a/test_conformance/images/clFillImage/test_fill_1D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_1D.cpp
@@ -80,6 +80,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if ( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_1D_array.cpp b/test_conformance/images/clFillImage/test_fill_1D_array.cpp
index b4347a4..be32ec6 100644
--- a/test_conformance/images/clFillImage/test_fill_1D_array.cpp
+++ b/test_conformance/images/clFillImage/test_fill_1D_array.cpp
@@ -83,6 +83,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if ( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_2D.cpp b/test_conformance/images/clFillImage/test_fill_2D.cpp
index bb66fc2..e941abc 100644
--- a/test_conformance/images/clFillImage/test_fill_2D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_2D.cpp
@@ -83,6 +83,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if ( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_2D_array.cpp b/test_conformance/images/clFillImage/test_fill_2D_array.cpp
index 3265aab..38196cf 100644
--- a/test_conformance/images/clFillImage/test_fill_2D_array.cpp
+++ b/test_conformance/images/clFillImage/test_fill_2D_array.cpp
@@ -87,6 +87,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if ( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_3D.cpp b/test_conformance/images/clFillImage/test_fill_3D.cpp
index 9db0ac7..0b8e4e5 100644
--- a/test_conformance/images/clFillImage/test_fill_3D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_3D.cpp
@@ -87,6 +87,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if ( gTestSmallImages )
diff --git a/test_conformance/images/clGetInfo/test_1D.cpp b/test_conformance/images/clGetInfo/test_1D.cpp
index 0d704b8..7e04485 100644
--- a/test_conformance/images/clGetInfo/test_1D.cpp
+++ b/test_conformance/images/clGetInfo/test_1D.cpp
@@ -46,6 +46,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clGetInfo/test_1D_2D_array.cpp b/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
index 447fc7c..c35bf22 100644
--- a/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
+++ b/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
@@ -44,6 +44,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
@@ -168,6 +169,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clGetInfo/test_2D.cpp b/test_conformance/images/clGetInfo/test_2D.cpp
index 74a6012..764b186 100644
--- a/test_conformance/images/clGetInfo/test_2D.cpp
+++ b/test_conformance/images/clGetInfo/test_2D.cpp
@@ -285,6 +285,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clGetInfo/test_3D.cpp b/test_conformance/images/clGetInfo/test_3D.cpp
index af5062e..e126186 100644
--- a/test_conformance/images/clGetInfo/test_3D.cpp
+++ b/test_conformance/images/clGetInfo/test_3D.cpp
@@ -47,6 +47,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_1D.cpp b/test_conformance/images/clReadWriteImage/test_read_1D.cpp
index 42933c0..2d94dc8 100644
--- a/test_conformance/images/clReadWriteImage/test_read_1D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_1D.cpp
@@ -187,6 +187,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
index efd2a79..cc90204 100644
--- a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
@@ -191,6 +191,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_2D.cpp b/test_conformance/images/clReadWriteImage/test_read_2D.cpp
index b7f8553..b610287 100644
--- a/test_conformance/images/clReadWriteImage/test_read_2D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_2D.cpp
@@ -194,6 +194,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
index 5889ad6..401b0e4 100644
--- a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
@@ -169,6 +169,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_3D.cpp b/test_conformance/images/clReadWriteImage/test_read_3D.cpp
index 6f73f42..ced04ab 100644
--- a/test_conformance/images/clReadWriteImage/test_read_3D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_3D.cpp
@@ -174,6 +174,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/kernel_image_methods/test_1D.cpp b/test_conformance/images/kernel_image_methods/test_1D.cpp
index 0059d4c..934e78b 100644
--- a/test_conformance/images/kernel_image_methods/test_1D.cpp
+++ b/test_conformance/images/kernel_image_methods/test_1D.cpp
@@ -171,6 +171,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/kernel_image_methods/test_1D_array.cpp b/test_conformance/images/kernel_image_methods/test_1D_array.cpp
index 797161c..a824f08 100644
--- a/test_conformance/images/kernel_image_methods/test_1D_array.cpp
+++ b/test_conformance/images/kernel_image_methods/test_1D_array.cpp
@@ -181,6 +181,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/kernel_image_methods/test_2D.cpp b/test_conformance/images/kernel_image_methods/test_2D.cpp
index b0d4a70..07f8d92 100644
--- a/test_conformance/images/kernel_image_methods/test_2D.cpp
+++ b/test_conformance/images/kernel_image_methods/test_2D.cpp
@@ -232,6 +232,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
if( gTestSmallImages )
diff --git a/test_conformance/images/kernel_read_write/CMakeLists.txt b/test_conformance/images/kernel_read_write/CMakeLists.txt
index 6eb5dc7..ccd678c 100644
--- a/test_conformance/images/kernel_read_write/CMakeLists.txt
+++ b/test_conformance/images/kernel_read_write/CMakeLists.txt
@@ -21,13 +21,7 @@
# Make unused variables not fatal in this module; see
# https://github.com/KhronosGroup/OpenCL-CTS/issues/1484
-if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
- SET_SOURCE_FILES_PROPERTIES(
- ${${MODULE_NAME}_SOURCES}
- PROPERTIES
- COMPILE_FLAGS "-Wno-error=unused-variable"
- )
-endif()
+set_gnulike_module_compile_flags("-Wno-error=unused-variable")
include(../../CMakeCommon.txt)
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
index 1b3b04b..2ce33a1 100644
--- a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
@@ -73,6 +73,12 @@
return TEST_SKIPPED_ITSELF;
}
+ if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+ {
+ printf("Extension cl_ext_image_requirements_info not available");
+ return TEST_SKIPPED_ITSELF;
+ }
+
std::vector<cl_mem_object_type> imageTypes{
CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D,
CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE1D_BUFFER,
diff --git a/test_conformance/images/kernel_read_write/test_common.cpp b/test_conformance/images/kernel_read_write/test_common.cpp
index 62bd4ab..a22db19 100644
--- a/test_conformance/images/kernel_read_write/test_common.cpp
+++ b/test_conformance/images/kernel_read_write/test_common.cpp
@@ -34,122 +34,210 @@
return sampler;
}
-void InitFloatCoordsCommon(image_descriptor *imageInfo,
- image_sampler_data *imageSampler, float *xOffsets,
- float *yOffsets, float *zOffsets, float xfract,
- float yfract, float zfract, int normalized_coords,
- MTdata d, int lod)
+bool get_image_dimensions(image_descriptor *imageInfo, size_t &width,
+ size_t &height, size_t &depth)
+{
+ width = imageInfo->width;
+ height = 1;
+ depth = 1;
+ switch (imageInfo->type)
+ {
+ case CL_MEM_OBJECT_IMAGE1D: break;
+ case CL_MEM_OBJECT_IMAGE1D_ARRAY: height = imageInfo->arraySize; break;
+ case CL_MEM_OBJECT_IMAGE2D: height = imageInfo->height; break;
+ case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+ height = imageInfo->height;
+ depth = imageInfo->arraySize;
+ break;
+ case CL_MEM_OBJECT_IMAGE3D:
+ height = imageInfo->height;
+ depth = imageInfo->depth;
+ break;
+ default:
+ log_error("ERROR: Test does not support image type");
+ return TEST_FAIL;
+ }
+ return 0;
+}
+
+static bool InitFloatCoordsCommon(image_descriptor *imageInfo,
+ image_sampler_data *imageSampler,
+ float *xOffsets, float *yOffsets,
+ float *zOffsets, float xfract, float yfract,
+ float zfract, int normalized_coords, MTdata d,
+ int lod)
{
size_t i = 0;
- if (gDisableOffsets)
+ size_t width_loop, height_loop, depth_loop;
+ bool error =
+ get_image_dimensions(imageInfo, width_loop, height_loop, depth_loop);
+ if (!error)
{
- for (size_t z = 0; z < imageInfo->depth; z++)
+ if (gDisableOffsets)
{
- for (size_t y = 0; y < imageInfo->height; y++)
+ for (size_t z = 0; z < depth_loop; z++)
{
- for (size_t x = 0; x < imageInfo->width; x++, i++)
+ for (size_t y = 0; y < height_loop; y++)
{
- xOffsets[i] = (float)(xfract + (double)x);
- yOffsets[i] = (float)(yfract + (double)y);
- zOffsets[i] = (float)(zfract + (double)z);
- }
- }
- }
- }
- else
- {
- for (size_t z = 0; z < imageInfo->depth; z++)
- {
- for (size_t y = 0; y < imageInfo->height; y++)
- {
- for (size_t x = 0; x < imageInfo->width; x++, i++)
- {
- xOffsets[i] =
- (float)(xfract
- + (double)((int)x
- + random_in_range(-10, 10, d)));
- yOffsets[i] =
- (float)(yfract
- + (double)((int)y
- + random_in_range(-10, 10, d)));
- zOffsets[i] =
- (float)(zfract
- + (double)((int)z
- + random_in_range(-10, 10, d)));
- }
- }
- }
- }
-
- if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
- {
- i = 0;
- for (size_t z = 0; z < imageInfo->depth; z++)
- {
- for (size_t y = 0; y < imageInfo->height; y++)
- {
- for (size_t x = 0; x < imageInfo->width; x++, i++)
- {
- xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0,
- (double)imageInfo->width - 1.0);
- yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0,
- (double)imageInfo->height - 1.0);
- zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0,
- (double)imageInfo->depth - 1.0);
- }
- }
- }
- }
-
- if (normalized_coords || gTestMipmaps)
- {
- i = 0;
- if (lod == 0)
- {
- for (size_t z = 0; z < imageInfo->depth; z++)
- {
- for (size_t y = 0; y < imageInfo->height; y++)
- {
- for (size_t x = 0; x < imageInfo->width; x++, i++)
+ for (size_t x = 0; x < width_loop; x++, i++)
{
- xOffsets[i] = (float)((double)xOffsets[i]
- / (double)imageInfo->width);
- yOffsets[i] = (float)((double)yOffsets[i]
- / (double)imageInfo->height);
- zOffsets[i] = (float)((double)zOffsets[i]
- / (double)imageInfo->depth);
+ xOffsets[i] = (float)(xfract + (double)x);
+ yOffsets[i] = (float)(yfract + (double)y);
+ zOffsets[i] = (float)(zfract + (double)z);
}
}
}
}
- else if (gTestMipmaps)
+ else
{
- size_t width_lod, height_lod, depth_lod;
-
- width_lod =
- (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
- height_lod =
- (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
- depth_lod =
- (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
-
- for (size_t z = 0; z < depth_lod; z++)
+ for (size_t z = 0; z < depth_loop; z++)
{
- for (size_t y = 0; y < height_lod; y++)
+ for (size_t y = 0; y < height_loop; y++)
{
- for (size_t x = 0; x < width_lod; x++, i++)
+ for (size_t x = 0; x < width_loop; x++, i++)
{
xOffsets[i] =
- (float)((double)xOffsets[i] / (double)width_lod);
+ (float)(xfract
+ + (double)((int)x
+ + random_in_range(-10, 10, d)));
yOffsets[i] =
- (float)((double)yOffsets[i] / (double)height_lod);
+ (float)(yfract
+ + (double)((int)y
+ + random_in_range(-10, 10, d)));
zOffsets[i] =
- (float)((double)zOffsets[i] / (double)depth_lod);
+ (float)(zfract
+ + (double)((int)z
+ + random_in_range(-10, 10, d)));
+ }
+ }
+ }
+ }
+
+ if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
+ {
+ i = 0;
+ for (size_t z = 0; z < depth_loop; z++)
+ {
+ for (size_t y = 0; y < height_loop; y++)
+ {
+ for (size_t x = 0; x < width_loop; x++, i++)
+ {
+ xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0,
+ (double)width_loop - 1.0);
+ yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0,
+ (double)height_loop - 1.0);
+ zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0,
+ (double)depth_loop - 1.0);
+ }
+ }
+ }
+ }
+
+ if (normalized_coords || gTestMipmaps)
+ {
+ i = 0;
+ if (lod == 0)
+ {
+ for (size_t z = 0; z < depth_loop; z++)
+ {
+ for (size_t y = 0; y < height_loop; y++)
+ {
+ for (size_t x = 0; x < width_loop; x++, i++)
+ {
+ xOffsets[i] = (float)((double)xOffsets[i]
+ / (double)width_loop);
+ if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ {
+ yOffsets[i] = (float)((double)yOffsets[i]
+ / (double)height_loop);
+ }
+ if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
+ {
+ zOffsets[i] = (float)((double)zOffsets[i]
+ / (double)depth_loop);
+ }
+ }
+ }
+ }
+ }
+ else if (gTestMipmaps)
+ {
+ size_t width_lod =
+ (width_loop >> lod) ? (width_loop >> lod) : 1;
+ size_t height_lod = height_loop;
+ size_t depth_lod = depth_loop;
+ if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ {
+ height_lod =
+ (height_loop >> lod) ? (height_loop >> lod) : 1;
+ }
+ if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
+ {
+ depth_lod = (depth_loop >> lod) ? (depth_loop >> lod) : 1;
+ }
+
+ for (size_t z = 0; z < depth_lod; z++)
+ {
+ for (size_t y = 0; y < height_lod; y++)
+ {
+ for (size_t x = 0; x < width_lod; x++, i++)
+ {
+ xOffsets[i] = (float)((double)xOffsets[i]
+ / (double)width_lod);
+ if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ {
+ yOffsets[i] = (float)((double)yOffsets[i]
+ / (double)height_lod);
+ }
+ if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
+ {
+ zOffsets[i] = (float)((double)zOffsets[i]
+ / (double)depth_lod);
+ }
+ }
}
}
}
}
}
+ return error;
+}
+
+cl_mem create_image_of_type(cl_context context, cl_mem_flags mem_flags,
+ image_descriptor *imageInfo, size_t row_pitch,
+ size_t slice_pitch, void *host_ptr, cl_int *error)
+{
+ cl_mem image;
+ switch (imageInfo->type)
+ {
+ case CL_MEM_OBJECT_IMAGE3D:
+ image = create_image_3d(context, mem_flags, imageInfo->format,
+ imageInfo->width, imageInfo->height,
+ imageInfo->depth, row_pitch, slice_pitch,
+ host_ptr, error);
+ break;
+ default:
+ log_error("Implementation is incomplete, only 3D images are "
+ "supported so far");
+ return nullptr;
+ }
+ return image;
+}
+
+static size_t get_image_num_pixels(image_descriptor *imageInfo, size_t width,
+ size_t height, size_t depth,
+ size_t array_size)
+{
+ size_t image_size;
+ switch (imageInfo->type)
+ {
+ case CL_MEM_OBJECT_IMAGE3D: image_size = width * height * depth; break;
+ default:
+ log_error("Implementation is incomplete, only 3D images are "
+ "supported so far");
+ return 0;
+ }
+ return image_size;
}
int test_read_image(cl_context context, cl_command_queue queue,
@@ -161,6 +249,17 @@
size_t threads[3];
static int initHalf = 0;
+ size_t image_size =
+ get_image_num_pixels(imageInfo, imageInfo->width, imageInfo->height,
+ imageInfo->depth, imageInfo->arraySize);
+ test_assert_error(0 != image_size, "Invalid image size");
+ size_t width_size, height_size, depth_size;
+ if (get_image_dimensions(imageInfo, width_size, height_size, depth_size))
+ {
+ log_error("ERROR: invalid image dimensions");
+ return CL_INVALID_VALUE;
+ }
+
cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY;
clMemWrapper xOffsets, yOffsets, zOffsets, results;
@@ -169,14 +268,11 @@
// Create offset data
BufferOwningPtr<cl_float> xOffsetValues(
- malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
- * imageInfo->depth));
+ malloc(sizeof(cl_float) * image_size));
BufferOwningPtr<cl_float> yOffsetValues(
- malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
- * imageInfo->depth));
+ malloc(sizeof(cl_float) * image_size));
BufferOwningPtr<cl_float> zOffsetValues(
- malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
- * imageInfo->depth));
+ malloc(sizeof(cl_float) * image_size));
if (imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
if (DetectFloatToHalfRoundingMode(queue)) return 1;
@@ -207,26 +303,27 @@
{
generate_random_image_data(imageInfo,
maxImageUseHostPtrBackingStore, d);
- unprotImage = create_image_3d(
+ unprotImage = create_image_of_type(
context, image_read_write_flags | CL_MEM_USE_HOST_PTR,
- imageInfo->format, imageInfo->width, imageInfo->height,
- imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
+ imageInfo, (gEnablePitch ? imageInfo->rowPitch : 0),
(gEnablePitch ? imageInfo->slicePitch : 0),
maxImageUseHostPtrBackingStore, &error);
}
else
{
- error = protImage.Create(context, image_read_write_flags,
- imageInfo->format, imageInfo->width,
- imageInfo->height, imageInfo->depth);
+ error = protImage.Create(context, imageInfo->type,
+ image_read_write_flags, imageInfo->format,
+ imageInfo->width, imageInfo->height,
+ imageInfo->depth, imageInfo->arraySize);
}
if (error != CL_SUCCESS)
{
- log_error("ERROR: Unable to create 3D image of size %d x %d x %d "
+ log_error("ERROR: Unable to create image of size %d x %d x %d x %d "
"(pitch %d, %d ) (%s)",
(int)imageInfo->width, (int)imageInfo->height,
- (int)imageInfo->depth, (int)imageInfo->rowPitch,
- (int)imageInfo->slicePitch, IGetErrorString(error));
+ (int)imageInfo->depth, (int)imageInfo->arraySize,
+ (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
+ IGetErrorString(error));
return error;
}
if (gTestMaxImages)
@@ -238,18 +335,18 @@
{
// Don't use clEnqueueWriteImage; just use copy host ptr to get the data
// in
- unprotImage = create_image_3d(
- context, image_read_write_flags | CL_MEM_COPY_HOST_PTR,
- imageInfo->format, imageInfo->width, imageInfo->height,
- imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
+ unprotImage = create_image_of_type(
+ context, image_read_write_flags | CL_MEM_COPY_HOST_PTR, imageInfo,
+ (gEnablePitch ? imageInfo->rowPitch : 0),
(gEnablePitch ? imageInfo->slicePitch : 0), imageValues, &error);
if (error != CL_SUCCESS)
{
- log_error("ERROR: Unable to create 3D image of size %d x %d x %d "
+ log_error("ERROR: Unable to create image of size %d x %d x %d x %d "
"(pitch %d, %d ) (%s)",
(int)imageInfo->width, (int)imageInfo->height,
- (int)imageInfo->depth, (int)imageInfo->rowPitch,
- (int)imageInfo->slicePitch, IGetErrorString(error));
+ (int)imageInfo->depth, (int)imageInfo->arraySize,
+ (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
+ IGetErrorString(error));
return error;
}
image = unprotImage;
@@ -261,19 +358,19 @@
// specified, so we just do the same thing either way
if (!gTestMipmaps)
{
- unprotImage = create_image_3d(
- context, image_read_write_flags | gMemFlagsToUse,
- imageInfo->format, imageInfo->width, imageInfo->height,
- imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
+ unprotImage = create_image_of_type(
+ context, image_read_write_flags | gMemFlagsToUse, imageInfo,
+ (gEnablePitch ? imageInfo->rowPitch : 0),
(gEnablePitch ? imageInfo->slicePitch : 0), imageValues,
&error);
if (error != CL_SUCCESS)
{
- log_error("ERROR: Unable to create 3D image of size %d x %d x "
- "%d (pitch %d, %d ) (%s)",
+ log_error("ERROR: Unable to create image of size %d x %d x "
+ "%d x %d (pitch %d, %d ) (%s)",
(int)imageInfo->width, (int)imageInfo->height,
- (int)imageInfo->depth, (int)imageInfo->rowPitch,
- (int)imageInfo->slicePitch, IGetErrorString(error));
+ (int)imageInfo->depth, (int)imageInfo->arraySize,
+ (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
+ IGetErrorString(error));
return error;
}
image = unprotImage;
@@ -281,10 +378,11 @@
else
{
cl_image_desc image_desc = { 0 };
- image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+ image_desc.image_type = imageInfo->type;
image_desc.image_width = imageInfo->width;
image_desc.image_height = imageInfo->height;
image_desc.image_depth = imageInfo->depth;
+ image_desc.image_array_size = imageInfo->arraySize;
image_desc.num_mip_levels = imageInfo->num_mip_levels;
@@ -293,23 +391,24 @@
imageInfo->format, &image_desc, NULL, &error);
if (error != CL_SUCCESS)
{
- log_error("ERROR: Unable to create %d level mipmapped 3D image "
- "of size %d x %d x %d (pitch %d, %d ) (%s)",
+ log_error("ERROR: Unable to create %d level mipmapped image "
+ "of size %d x %d x %d x %d (pitch %d, %d ) (%s)",
(int)imageInfo->num_mip_levels, (int)imageInfo->width,
(int)imageInfo->height, (int)imageInfo->depth,
- (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
- IGetErrorString(error));
+ (int)imageInfo->arraySize, (int)imageInfo->rowPitch,
+ (int)imageInfo->slicePitch, IGetErrorString(error));
return error;
}
image = unprotImage;
}
}
+ test_assert_error(nullptr != image, "Image creation failed");
+
if (gMemFlagsToUse != CL_MEM_COPY_HOST_PTR)
{
size_t origin[4] = { 0, 0, 0, 0 };
- size_t region[3] = { imageInfo->width, imageInfo->height,
- imageInfo->depth };
+ size_t region[3] = { width_size, height_size, depth_size };
if (gDebugTrace) log_info(" - Writing image...\n");
@@ -324,10 +423,10 @@
if (error != CL_SUCCESS)
{
- log_error("ERROR: Unable to write to 3D image of size %d x %d "
- "x %d \n",
+ log_error("ERROR: Unable to write to image of size %d x %d "
+ "x %d x %d\n",
(int)imageInfo->width, (int)imageInfo->height,
- (int)imageInfo->depth);
+ (int)imageInfo->depth, (int)imageInfo->arraySize);
return error;
}
}
@@ -339,17 +438,15 @@
{
origin[3] = i;
error = clEnqueueWriteImage(
- queue, image, CL_TRUE, origin, region,
- /*gEnablePitch ? imageInfo->rowPitch :*/ 0,
- /*gEnablePitch ? imageInfo->slicePitch :*/ 0,
+ queue, image, CL_TRUE, origin, region, 0, 0,
((char *)imageValues + nextLevelOffset), 0, NULL, NULL);
if (error != CL_SUCCESS)
{
- log_error("ERROR: Unable to write to %d level mipmapped 3D "
- "image of size %d x %d x %d\n",
+ log_error("ERROR: Unable to write to %d level mipmapped "
+ "image of size %d x %d x %d x %d\n",
(int)imageInfo->num_mip_levels,
(int)imageInfo->width, (int)imageInfo->height,
- (int)imageInfo->depth);
+ (int)imageInfo->arraySize, (int)imageInfo->depth);
return error;
}
nextLevelOffset += region[0] * region[1] * region[2]
@@ -362,26 +459,21 @@
}
}
- xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
- sizeof(cl_float) * imageInfo->width
- * imageInfo->height * imageInfo->depth,
- xOffsetValues, &error);
+ xOffsets =
+ clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+ sizeof(cl_float) * image_size, xOffsetValues, &error);
test_error(error, "Unable to create x offset buffer");
- yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
- sizeof(cl_float) * imageInfo->width
- * imageInfo->height * imageInfo->depth,
- yOffsetValues, &error);
+ yOffsets =
+ clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+ sizeof(cl_float) * image_size, yOffsetValues, &error);
test_error(error, "Unable to create y offset buffer");
- zOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
- sizeof(cl_float) * imageInfo->width
- * imageInfo->height * imageInfo->depth,
- zOffsetValues, &error);
+ zOffsets =
+ clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+ sizeof(cl_float) * image_size, zOffsetValues, &error);
test_error(error, "Unable to create y offset buffer");
- results =
- clCreateBuffer(context, CL_MEM_READ_WRITE,
- get_explicit_type_size(outputType) * 4 * imageInfo->width
- * imageInfo->height * imageInfo->depth,
- NULL, &error);
+ results = clCreateBuffer(
+ context, CL_MEM_READ_WRITE,
+ get_explicit_type_size(outputType) * 4 * image_size, NULL, &error);
test_error(error, "Unable to create result buffer");
// Create sampler to use
@@ -444,16 +536,19 @@
}
int nextLevelOffset = 0;
- size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
- depth_lod = imageInfo->depth;
+ size_t width_lod = width_size, height_lod = height_size,
+ depth_lod = depth_size;
// Loop over all mipmap levels, if we are testing mipmapped images.
for (int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels)
|| (!gTestMipmaps && lod < 1);
lod++)
{
- size_t resultValuesSize = width_lod * height_lod * depth_lod
- * get_explicit_type_size(outputType) * 4;
+ size_t image_lod_size = get_image_num_pixels(
+ imageInfo, width_lod, height_lod, depth_lod, imageInfo->arraySize);
+ test_assert_error(0 != image_lod_size, "Invalid image size");
+ size_t resultValuesSize =
+ image_lod_size * get_explicit_type_size(outputType) * 4;
BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
float lod_float = (float)lod;
if (gTestMipmaps)
@@ -469,30 +564,25 @@
float offset = float_offsets[q % float_offset_count];
// Init the coordinates
- InitFloatCoordsCommon(imageInfo, imageSampler, xOffsetValues,
- yOffsetValues, zOffsetValues,
- q >= float_offset_count ? -offset : offset,
- q >= float_offset_count ? offset : -offset,
- q >= float_offset_count ? -offset : offset,
- imageSampler->normalized_coords, d, lod);
+ error = InitFloatCoordsCommon(
+ imageInfo, imageSampler, xOffsetValues, yOffsetValues,
+ zOffsetValues, q >= float_offset_count ? -offset : offset,
+ q >= float_offset_count ? offset : -offset,
+ q >= float_offset_count ? -offset : offset,
+ imageSampler->normalized_coords, d, lod);
+ test_error(error, "Unable to initialise coordinates");
- error =
- clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0,
- sizeof(cl_float) * imageInfo->height
- * imageInfo->width * imageInfo->depth,
- xOffsetValues, 0, NULL, NULL);
+ error = clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0,
+ sizeof(cl_float) * image_size,
+ xOffsetValues, 0, NULL, NULL);
test_error(error, "Unable to write x offsets");
- error =
- clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0,
- sizeof(cl_float) * imageInfo->height
- * imageInfo->width * imageInfo->depth,
- yOffsetValues, 0, NULL, NULL);
+ error = clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0,
+ sizeof(cl_float) * image_size,
+ yOffsetValues, 0, NULL, NULL);
test_error(error, "Unable to write y offsets");
- error =
- clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0,
- sizeof(cl_float) * imageInfo->height
- * imageInfo->width * imageInfo->depth,
- zOffsetValues, 0, NULL, NULL);
+ error = clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0,
+ sizeof(cl_float) * image_size,
+ zOffsetValues, 0, NULL, NULL);
test_error(error, "Unable to write z offsets");
@@ -511,11 +601,10 @@
test_error(error, "Unable to run kernel");
// Get results
- error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0,
- width_lod * height_lod * depth_lod
- * get_explicit_type_size(outputType)
- * 4,
- resultValues, 0, NULL, NULL);
+ error = clEnqueueReadBuffer(
+ queue, results, CL_TRUE, 0,
+ image_lod_size * get_explicit_type_size(outputType) * 4,
+ resultValues, 0, NULL, NULL);
test_error(error, "Unable to read results from kernel");
if (gDebugTrace) log_info(" results read\n");
@@ -1540,8 +1629,14 @@
nextLevelOffset += width_lod * height_lod * depth_lod
* get_pixel_size(imageInfo->format);
width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
- height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
- depth_lod = (depth_lod >> 1) ? (depth_lod >> 1) : 1;
+ if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ {
+ height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
+ }
+ if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
+ {
+ depth_lod = (depth_lod >> 1) ? (depth_lod >> 1) : 1;
+ }
}
}
diff --git a/test_conformance/images/kernel_read_write/test_common.h b/test_conformance/images/kernel_read_write/test_common.h
index 656c41f..fc95bee 100644
--- a/test_conformance/images/kernel_read_write/test_common.h
+++ b/test_conformance/images/kernel_read_write/test_common.h
@@ -42,12 +42,8 @@
bool useFloatCoords, ExplicitType outputType,
MTdata d);
-extern void InitFloatCoordsCommon(image_descriptor *imageInfo,
- image_sampler_data *imageSampler,
- float *xOffsets, float *yOffsets,
- float *zOffsets, float xfract, float yfract,
- float zfract, int normalized_coords, MTdata d,
- int lod);
+extern bool get_image_dimensions(image_descriptor *imageInfo, size_t &width,
+ size_t &height, size_t &depth);
template <class T>
int determine_validation_error_offset(
@@ -63,8 +59,12 @@
bool clampingErr = false, clamped = false, otherClampingBug = false;
int clampedX, clampedY, clampedZ;
- size_t imageWidth = imageInfo->width, imageHeight = imageInfo->height,
- imageDepth = imageInfo->depth;
+ size_t imageWidth, imageHeight, imageDepth;
+ if (get_image_dimensions(imageInfo, imageWidth, imageHeight, imageDepth))
+ {
+ log_error("ERROR: invalid image dimensions");
+ return TEST_FAIL;
+ }
clamped = get_integer_coords_offset(x, y, z, xAddressOffset, yAddressOffset,
zAddressOffset, imageWidth, imageHeight,
@@ -147,82 +147,67 @@
}
if (!clampingErr)
{
- /* if( clamped && ( (int)x + (int)xOffsetValues[ j ] < 0 ||
- (int)y + (int)yOffsetValues[ j ] < 0 ) )
- {
- log_error( "NEGATIVE COORDINATE ERROR\n" );
- return -1;
- }
- */
- if (true) // gExtraValidateInfo )
+ if (printAsFloat)
{
- if (printAsFloat)
- {
- log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
- "validate!\n\tExpected (%g,%g,%g,%g),\n\t got "
- "(%g,%g,%g,%g), error of %g\n",
- j, x, x, y, y, z, z, (float)expected[0],
- (float)expected[1], (float)expected[2],
- (float)expected[3], (float)resultPtr[0],
- (float)resultPtr[1], (float)resultPtr[2],
- (float)resultPtr[3], error);
- }
- else
- {
- log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
- "validate!\n\tExpected (%x,%x,%x,%x),\n\t got "
- "(%x,%x,%x,%x)\n",
- j, x, x, y, y, z, z, (int)expected[0],
- (int)expected[1], (int)expected[2], (int)expected[3],
- (int)resultPtr[0], (int)resultPtr[1],
- (int)resultPtr[2], (int)resultPtr[3]);
- }
- log_error(
- "Integer coords resolve to %d,%d,%d with img size %d,%d,%d\n",
- clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight,
- (int)imageDepth);
+ log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
+ "validate!\n\tExpected (%g,%g,%g,%g),\n\t got "
+ "(%g,%g,%g,%g), error of %g\n",
+ j, x, x, y, y, z, z, (float)expected[0],
+ (float)expected[1], (float)expected[2],
+ (float)expected[3], (float)resultPtr[0],
+ (float)resultPtr[1], (float)resultPtr[2],
+ (float)resultPtr[3], error);
+ }
+ else
+ {
+ log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
+ "validate!\n\tExpected (%x,%x,%x,%x),\n\t got "
+ "(%x,%x,%x,%x)\n",
+ j, x, x, y, y, z, z, (int)expected[0], (int)expected[1],
+ (int)expected[2], (int)expected[3], (int)resultPtr[0],
+ (int)resultPtr[1], (int)resultPtr[2], (int)resultPtr[3]);
+ }
+ log_error(
+ "Integer coords resolve to %d,%d,%d with img size %d,%d,%d\n",
+ clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight,
+ (int)imageDepth);
- if (printAsFloat && gExtraValidateInfo)
+ if (printAsFloat && gExtraValidateInfo)
+ {
+ log_error("\nNearby values:\n");
+ for (int zOff = -1; zOff <= 1; zOff++)
{
- log_error("\nNearby values:\n");
- for (int zOff = -1; zOff <= 1; zOff++)
+ for (int yOff = -1; yOff <= 1; yOff++)
{
- for (int yOff = -1; yOff <= 1; yOff++)
- {
- float top[4], real[4], bot[4];
- read_image_pixel_float(imagePtr, imageInfo,
- clampedX - 1, clampedY + yOff,
- clampedZ + zOff, top);
- read_image_pixel_float(imagePtr, imageInfo, clampedX,
- clampedY + yOff, clampedZ + zOff,
- real);
- read_image_pixel_float(imagePtr, imageInfo,
- clampedX + 1, clampedY + yOff,
- clampedZ + zOff, bot);
- log_error("\t(%g,%g,%g,%g)", top[0], top[1], top[2],
- top[3]);
- log_error(" (%g,%g,%g,%g)", real[0], real[1], real[2],
- real[3]);
- log_error(" (%g,%g,%g,%g)\n", bot[0], bot[1], bot[2],
- bot[3]);
- }
+ float top[4], real[4], bot[4];
+ read_image_pixel_float(imagePtr, imageInfo, clampedX - 1,
+ clampedY + yOff, clampedZ + zOff,
+ top);
+ read_image_pixel_float(imagePtr, imageInfo, clampedX,
+ clampedY + yOff, clampedZ + zOff,
+ real);
+ read_image_pixel_float(imagePtr, imageInfo, clampedX + 1,
+ clampedY + yOff, clampedZ + zOff,
+ bot);
+ log_error("\t(%g,%g,%g,%g)", top[0], top[1], top[2],
+ top[3]);
+ log_error(" (%g,%g,%g,%g)", real[0], real[1], real[2],
+ real[3]);
+ log_error(" (%g,%g,%g,%g)\n", bot[0], bot[1], bot[2],
+ bot[3]);
}
}
- // }
- // else
- // log_error( "\n" );
- if (imageSampler->filter_mode != CL_FILTER_LINEAR)
- {
- if (found)
- log_error(
- "\tValue really found in image at %d,%d,%d (%s)\n",
- actualX, actualY, actualZ,
- (found > 1) ? "NOT unique!!" : "unique");
- else
- log_error("\tValue not actually found in image\n");
- }
- log_error("\n");
}
+ if (imageSampler->filter_mode != CL_FILTER_LINEAR)
+ {
+ if (found)
+ log_error("\tValue really found in image at %d,%d,%d (%s)\n",
+ actualX, actualY, actualZ,
+ (found > 1) ? "NOT unique!!" : "unique");
+ else
+ log_error("\tValue not actually found in image\n");
+ }
+ log_error("\n");
numClamped = -1; // We force the clamped counter to never work
if ((--numTries) == 0) return -1;
diff --git a/test_conformance/images/samplerlessReads/test_iterations.cpp b/test_conformance/images/samplerlessReads/test_iterations.cpp
index 55eaaf4..e2f89aa 100644
--- a/test_conformance/images/samplerlessReads/test_iterations.cpp
+++ b/test_conformance/images/samplerlessReads/test_iterations.cpp
@@ -215,6 +215,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
// Determine types
diff --git a/test_conformance/images/samplerlessReads/test_read_1D.cpp b/test_conformance/images/samplerlessReads/test_read_1D.cpp
index aa261b7..6ed9910 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D.cpp
@@ -215,6 +215,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
// Determine types
diff --git a/test_conformance/images/samplerlessReads/test_read_1D_array.cpp b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
index fb0c263..677eb9f 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
@@ -214,6 +214,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
// Determine types
diff --git a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
index 7a3084d..c3a991a 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
@@ -219,6 +219,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
// note: image_buffer test uses image1D for results validation.
diff --git a/test_conformance/images/samplerlessReads/test_read_2D_array.cpp b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
index 99f2426..8273f53 100644
--- a/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
@@ -202,6 +202,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
// Determine types
diff --git a/test_conformance/images/samplerlessReads/test_read_3D.cpp b/test_conformance/images/samplerlessReads/test_read_3D.cpp
index cf41140..0df46c8 100644
--- a/test_conformance/images/samplerlessReads/test_read_3D.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_3D.cpp
@@ -206,6 +206,7 @@
if (memSize > (cl_ulong)SIZE_MAX) {
memSize = (cl_ulong)SIZE_MAX;
+ maxAllocSize = (cl_ulong)SIZE_MAX;
}
// Determine types
diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt
index 28d2716..3281402 100644
--- a/test_conformance/math_brute_force/CMakeLists.txt
+++ b/test_conformance/math_brute_force/CMakeLists.txt
@@ -9,6 +9,7 @@
binary_operator_float.cpp
binary_two_results_i_double.cpp
binary_two_results_i_float.cpp
+ common.cpp
common.h
function_list.cpp
function_list.h
@@ -40,4 +41,8 @@
utility.h
)
+# math_brute_force compiles cleanly with -Wall (except for a few remaining
+# warnings), but other tests not (yet); so enable -Wall locally.
+set_gnulike_module_compile_flags("-Wall -Wno-strict-aliasing -Wno-unknown-pragmas")
+
include(../CMakeCommon.txt)
diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index 1b1f7d4..f18d0b9 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -134,7 +134,7 @@
maxErrorValue; // position of the max error value (param 1). Init to 0.
double maxErrorValue2; // position of the max error value (param 2). Init
// to 0.
- MTdata d;
+ MTdataHolder d;
// Per thread command queue to improve performance
clCommandQueueWrapper tQueue;
@@ -630,7 +630,7 @@
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+ vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
"ThreadCount:%2u\n",
base, job->step, job->scale, buffer_elements, job->ulps,
job->threadCount);
@@ -691,7 +691,7 @@
test_info.k[i].resize(test_info.threadCount, nullptr);
}
- test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+ test_info.tinfo.resize(test_info.threadCount);
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
cl_buffer_region region = {
@@ -740,7 +740,7 @@
goto exit;
}
- test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+ test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
}
// Init the kernels
@@ -792,10 +792,5 @@
}
}
- for (auto &threadInfo : test_info.tinfo)
- {
- free_mtdata(threadInfo.d);
- }
-
return error;
}
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index d229a37..fe1491d 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -132,7 +132,7 @@
maxErrorValue; // position of the max error value (param 1). Init to 0.
double maxErrorValue2; // position of the max error value (param 2). Init
// to 0.
- MTdata d;
+ MTdataHolder d;
// Per thread command queue to improve performance
clCommandQueueWrapper tQueue;
@@ -755,7 +755,7 @@
{
vlog_error(
"\nERROR: %s%s: %f ulp error at {%a (0x%x), %a "
- "(0x%x)}: *%a vs. %a (0x%8.8x) at index: %d\n",
+ "(0x%x)}: *%a vs. %a (0x%8.8x) at index: %zu\n",
name, sizeNames[k], err, s[j], ((cl_uint *)s)[j],
s2[j], ((cl_uint *)s2)[j], r[j], test,
((cl_uint *)&test)[0], j);
@@ -787,7 +787,7 @@
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+ vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
"ThreadCount:%2u\n",
base, job->step, job->scale, buffer_elements, job->ulps,
job->threadCount);
@@ -848,7 +848,7 @@
test_info.k[i].resize(test_info.threadCount, nullptr);
}
- test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+ test_info.tinfo.resize(test_info.threadCount);
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
cl_buffer_region region = {
@@ -897,7 +897,7 @@
goto exit;
}
- test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+ test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
}
// Init the kernels
@@ -949,10 +949,5 @@
}
}
- for (auto &threadInfo : test_info.tinfo)
- {
- free_mtdata(threadInfo.d);
- }
-
return error;
}
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
index 7baa21a..f8786e6 100644
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -133,7 +133,7 @@
maxErrorValue; // position of the max error value (param 1). Init to 0.
cl_int maxErrorValue2; // position of the max error value (param 2). Init
// to 0.
- MTdata d;
+ MTdataHolder d;
// Per thread command queue to improve performance
clCommandQueueWrapper tQueue;
@@ -610,7 +610,7 @@
test_info.k[i].resize(test_info.threadCount, nullptr);
}
- test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+ test_info.tinfo.resize(test_info.threadCount);
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
cl_buffer_region region = {
@@ -662,7 +662,7 @@
goto exit;
}
- test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+ test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
}
// Init the kernels
@@ -714,10 +714,5 @@
}
}
- for (auto &threadInfo : test_info.tinfo)
- {
- free_mtdata(threadInfo.d);
- }
-
return error;
}
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index 3f998e2..d855f44 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -131,7 +131,7 @@
maxErrorValue; // position of the max error value (param 1). Init to 0.
cl_int maxErrorValue2; // position of the max error value (param 2). Init
// to 0.
- MTdata d;
+ MTdataHolder d;
// Per thread command queue to improve performance
clCommandQueueWrapper tQueue;
@@ -516,7 +516,7 @@
{
vlog_error(
"\nERROR: %s%s: %f ulp error at {%a (0x%8.8x), %d}: "
- "*%a (0x%8.8x) vs. %a (0x%8.8x) at index: %d\n",
+ "*%a (0x%8.8x) vs. %a (0x%8.8x) at index: %zu\n",
name, sizeNames[k], err, s[j], ((uint32_t *)s)[j],
s2[j], r[j], ((uint32_t *)r)[j], test,
((cl_uint *)&test)[0], j);
@@ -545,7 +545,7 @@
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+ vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
"ThreadCount:%2u\n",
base, job->step, job->scale, buffer_elements, job->ulps,
job->threadCount);
@@ -603,7 +603,7 @@
test_info.k[i].resize(test_info.threadCount, nullptr);
}
- test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+ test_info.tinfo.resize(test_info.threadCount);
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
cl_buffer_region region = {
@@ -655,7 +655,7 @@
goto exit;
}
- test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+ test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
}
// Init the kernels
@@ -707,10 +707,5 @@
}
}
- for (auto &threadInfo : test_info.tinfo)
- {
- free_mtdata(threadInfo.d);
- }
-
return error;
}
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index 7488366..bbe5c43 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -133,7 +133,7 @@
maxErrorValue; // position of the max error value (param 1). Init to 0.
double maxErrorValue2; // position of the max error value (param 2). Init
// to 0.
- MTdata d;
+ MTdataHolder d;
// Per thread command queue to improve performance
clCommandQueueWrapper tQueue;
@@ -601,7 +601,7 @@
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+ vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
"ThreadCount:%2u\n",
base, job->step, job->scale, buffer_elements, job->ulps,
job->threadCount);
@@ -658,7 +658,7 @@
test_info.k[i].resize(test_info.threadCount, nullptr);
}
- test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+ test_info.tinfo.resize(test_info.threadCount);
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
cl_buffer_region region = {
@@ -707,7 +707,7 @@
goto exit;
}
- test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+ test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
}
// Init the kernels
@@ -759,10 +759,5 @@
}
}
- for (auto &threadInfo : test_info.tinfo)
- {
- free_mtdata(threadInfo.d);
- }
-
return error;
}
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index 56f293c..1a28d8d 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -131,7 +131,7 @@
maxErrorValue; // position of the max error value (param 1). Init to 0.
double maxErrorValue2; // position of the max error value (param 2). Init
// to 0.
- MTdata d;
+ MTdataHolder d;
// Per thread command queue to improve performance
clCommandQueueWrapper tQueue;
@@ -698,7 +698,7 @@
if (fail)
{
vlog_error("\nERROR: %s%s: %f ulp error at {%a, %a}: *%a "
- "vs. %a (0x%8.8x) at index: %d\n",
+ "vs. %a (0x%8.8x) at index: %zu\n",
name, sizeNames[k], err, s[j], s2[j], r[j], test,
((cl_uint *)&test)[0], j);
error = -1;
@@ -726,7 +726,7 @@
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+ vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
"ThreadCount:%2u\n",
base, job->step, job->scale, buffer_elements, job->ulps,
job->threadCount);
@@ -785,7 +785,7 @@
test_info.k[i].resize(test_info.threadCount, nullptr);
}
- test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+ test_info.tinfo.resize(test_info.threadCount);
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
cl_buffer_region region = {
@@ -834,7 +834,7 @@
goto exit;
}
- test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+ test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
}
// Init the kernels
@@ -886,10 +886,5 @@
}
}
- for (auto &threadInfo : test_info.tinfo)
- {
- free_mtdata(threadInfo.d);
- }
-
return error;
}
diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
index 59a5bfe..bbfd707 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
@@ -19,6 +19,7 @@
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <climits>
#include <cstring>
@@ -527,17 +528,20 @@
if (fail)
{
- vlog_error(
- "\nERROR: %sD%s: {%f, %lld} ulp error at {%.13la, "
- "%.13la} ({ 0x%16.16llx, 0x%16.16llx}): *{%.13la, "
- "%d} ({ 0x%16.16llx, 0x%8.8x}) vs. {%.13la, %d} ({ "
- "0x%16.16llx, 0x%8.8x})\n",
- f->name, sizeNames[k], err, iErr, ((double *)gIn)[j],
- ((double *)gIn2)[j], ((cl_ulong *)gIn)[j],
- ((cl_ulong *)gIn2)[j], ((double *)gOut_Ref)[j],
- ((int *)gOut_Ref2)[j], ((cl_ulong *)gOut_Ref)[j],
- ((cl_uint *)gOut_Ref2)[j], test, q2[j],
- ((cl_ulong *)q)[j], ((cl_uint *)q2)[j]);
+ vlog_error("\nERROR: %sD%s: {%f, %" PRId64
+ "} ulp error at {%.13la, "
+ "%.13la} ({ 0x%16.16" PRIx64 ", 0x%16.16" PRIx64
+ "}): *{%.13la, "
+ "%d} ({ 0x%16.16" PRIx64
+ ", 0x%8.8x}) vs. {%.13la, %d} ({ "
+ "0x%16.16" PRIx64 ", 0x%8.8x})\n",
+ f->name, sizeNames[k], err, iErr,
+ ((double *)gIn)[j], ((double *)gIn2)[j],
+ ((cl_ulong *)gIn)[j], ((cl_ulong *)gIn2)[j],
+ ((double *)gOut_Ref)[j], ((int *)gOut_Ref2)[j],
+ ((cl_ulong *)gOut_Ref)[j],
+ ((cl_uint *)gOut_Ref2)[j], test, q2[j],
+ ((cl_ulong *)q)[j], ((cl_uint *)q2)[j]);
error = -1;
goto exit;
}
@@ -548,8 +552,9 @@
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
@@ -566,8 +571,8 @@
else
vlog("passed");
- vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
- maxErrorVal2);
+ vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
+ maxErrorVal, maxErrorVal2);
}
vlog("\n");
diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
index 6c1dd3b..0747337 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
@@ -19,6 +19,7 @@
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <climits>
#include <cstring>
@@ -513,16 +514,17 @@
if (fail)
{
- vlog_error(
- "\nERROR: %s%s: {%f, %lld} ulp error at {%a, %a} "
- "({0x%8.8x, 0x%8.8x}): *{%a, %d} ({0x%8.8x, "
- "0x%8.8x}) vs. {%a, %d} ({0x%8.8x, 0x%8.8x})\n",
- f->name, sizeNames[k], err, iErr, ((float *)gIn)[j],
- ((float *)gIn2)[j], ((cl_uint *)gIn)[j],
- ((cl_uint *)gIn2)[j], ((float *)gOut_Ref)[j],
- ((int *)gOut_Ref2)[j], ((cl_uint *)gOut_Ref)[j],
- ((cl_uint *)gOut_Ref2)[j], test, q2[j],
- ((cl_uint *)&test)[0], ((cl_uint *)q2)[j]);
+ vlog_error("\nERROR: %s%s: {%f, %" PRId64
+ "} ulp error at {%a, %a} "
+ "({0x%8.8x, 0x%8.8x}): *{%a, %d} ({0x%8.8x, "
+ "0x%8.8x}) vs. {%a, %d} ({0x%8.8x, 0x%8.8x})\n",
+ f->name, sizeNames[k], err, iErr,
+ ((float *)gIn)[j], ((float *)gIn2)[j],
+ ((cl_uint *)gIn)[j], ((cl_uint *)gIn2)[j],
+ ((float *)gOut_Ref)[j], ((int *)gOut_Ref2)[j],
+ ((cl_uint *)gOut_Ref)[j],
+ ((cl_uint *)gOut_Ref2)[j], test, q2[j],
+ ((cl_uint *)&test)[0], ((cl_uint *)q2)[j]);
error = -1;
goto exit;
}
@@ -533,8 +535,9 @@
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
@@ -551,8 +554,8 @@
else
vlog("passed");
- vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
- maxErrorVal2);
+ vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
+ maxErrorVal, maxErrorVal2);
}
vlog("\n");
diff --git a/test_conformance/math_brute_force/common.cpp b/test_conformance/math_brute_force/common.cpp
new file mode 100644
index 0000000..f5e9f99
--- /dev/null
+++ b/test_conformance/math_brute_force/common.cpp
@@ -0,0 +1,170 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "common.h"
+
+#include "utility.h" // for sizeNames and sizeValues.
+
+#include <sstream>
+#include <string>
+
+namespace {
+
+const char *GetTypeName(ParameterType type)
+{
+ switch (type)
+ {
+ case ParameterType::Float: return "float";
+ case ParameterType::Double: return "double";
+ }
+ return nullptr;
+}
+
+const char *GetUndefValue(ParameterType type)
+{
+ switch (type)
+ {
+ case ParameterType::Float:
+ case ParameterType::Double: return "NAN";
+ }
+ return nullptr;
+}
+
+void EmitDefineType(std::ostringstream &kernel, const char *name,
+ ParameterType type, int vector_size_index)
+{
+ kernel << "#define " << name << " " << GetTypeName(type)
+ << sizeNames[vector_size_index] << '\n';
+ kernel << "#define " << name << "_SCALAR " << GetTypeName(type) << '\n';
+}
+
+void EmitDefineUndef(std::ostringstream &kernel, const char *name,
+ ParameterType type)
+{
+ kernel << "#define " << name << " " << GetUndefValue(type) << '\n';
+}
+
+void EmitEnableExtension(std::ostringstream &kernel, ParameterType type)
+{
+ switch (type)
+ {
+ case ParameterType::Double:
+ kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+ break;
+
+ case ParameterType::Float:
+ // No extension required.
+ break;
+ }
+}
+
+} // anonymous namespace
+
+std::string GetKernelName(int vector_size_index)
+{
+ return std::string("math_kernel") + sizeNames[vector_size_index];
+}
+
+std::string GetTernaryKernel(const std::string &kernel_name,
+ const char *builtin, ParameterType retType,
+ ParameterType type1, ParameterType type2,
+ ParameterType type3, int vector_size_index)
+{
+ // To keep the kernel code readable, use macros for types and undef values.
+ std::ostringstream kernel;
+ EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
+ EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
+ EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
+ EmitDefineType(kernel, "TYPE3", type3, vector_size_index);
+ EmitDefineUndef(kernel, "UNDEF1", type1);
+ EmitDefineUndef(kernel, "UNDEF2", type2);
+ EmitDefineUndef(kernel, "UNDEF3", type3);
+ EmitEnableExtension(kernel, type1);
+
+ // clang-format off
+ const char *kernel_nonvec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
+ __global TYPE1* in1,
+ __global TYPE2* in2,
+ __global TYPE3* in3)
+{
+ size_t i = get_global_id(0);
+ out[i] = )", builtin, R"((in1[i], in2[i], in3[i]);
+}
+)" };
+
+ const char *kernel_vec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
+ __global TYPE1_SCALAR* in1,
+ __global TYPE2_SCALAR* in2,
+ __global TYPE3_SCALAR* in3)
+{
+ size_t i = get_global_id(0);
+
+ if (i + 1 < get_global_size(0))
+ {
+ TYPE1 a = vload3(0, in1 + 3 * i);
+ TYPE2 b = vload3(0, in2 + 3 * i);
+ TYPE3 c = vload3(0, in3 + 3 * i);
+ RETTYPE res = )", builtin, R"((a, b, c);
+ vstore3(res, 0, out + 3 * i);
+ }
+ else
+ {
+ // Figure out how many elements are left over after
+ // BUFFER_SIZE % (3 * sizeof(type)).
+ // Assume power of two buffer size.
+ size_t parity = i & 1;
+ TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
+ TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
+ TYPE3 c = (TYPE3)(UNDEF3, UNDEF3, UNDEF3);
+ switch (parity)
+ {
+ case 0:
+ a.y = in1[3 * i + 1];
+ b.y = in2[3 * i + 1];
+ c.y = in3[3 * i + 1];
+ // fall through
+ case 1:
+ a.x = in1[3 * i];
+ b.x = in2[3 * i];
+ c.x = in3[3 * i];
+ break;
+ }
+
+ RETTYPE res = )", builtin, R"((a, b, c);
+
+ switch (parity)
+ {
+ case 0:
+ out[3 * i + 1] = res.y;
+ // fall through
+ case 1:
+ out[3 * i] = res.x;
+ break;
+ }
+ }
+}
+)" };
+ // clang-format on
+
+ if (sizeValues[vector_size_index] != 3)
+ for (const auto &chunk : kernel_nonvec3) kernel << chunk;
+ else
+ for (const auto &chunk : kernel_vec3) kernel << chunk;
+
+ return kernel.str();
+}
diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h
index 6f17898..143814c 100644
--- a/test_conformance/math_brute_force/common.h
+++ b/test_conformance/math_brute_force/common.h
@@ -20,6 +20,7 @@
#include "utility.h"
#include <array>
+#include <string>
#include <vector>
// Array of thread-specific kernels for each vector size.
@@ -31,6 +32,22 @@
// Array of buffers for each vector size.
using Buffers = std::array<clMemWrapper, VECTOR_SIZE_COUNT>;
+// Types supported for kernel code generation.
+enum class ParameterType
+{
+ Float,
+ Double,
+};
+
+// Return kernel name suffixed with vector size.
+std::string GetKernelName(int vector_size_index);
+
+// Generate kernel code for the given builtin function/operator.
+std::string GetTernaryKernel(const std::string &kernel_name,
+ const char *builtin, ParameterType retType,
+ ParameterType type1, ParameterType type2,
+ ParameterType type3, int vector_size_index);
+
// Information to generate OpenCL kernels.
struct BuildKernelInfo
{
diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp
index a05737d..0cbcf86 100644
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ b/test_conformance/math_brute_force/i_unary_double.cpp
@@ -19,6 +19,7 @@
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
namespace {
@@ -271,8 +272,9 @@
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp
index 13442e6..90bb1e1 100644
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ b/test_conformance/math_brute_force/i_unary_float.cpp
@@ -19,6 +19,7 @@
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
namespace {
@@ -268,8 +269,9 @@
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index a697a7b..412f210 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -19,6 +19,7 @@
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
namespace {
@@ -127,7 +128,7 @@
clMemWrapper inBuf2;
Buffers outBuf;
- MTdata d;
+ MTdataHolder d;
// Per thread command queue to improve performance
clCommandQueueWrapper tQueue;
@@ -487,8 +488,9 @@
cl_ulong err = t[j] - q[j];
if (q[j] > t[j]) err = q[j] - t[j];
- vlog_error("\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld "
- "vs. %lld (index: %d)\n",
+ vlog_error("\nERROR: %s: %" PRId64
+ " ulp error at {%.13la, %.13la}: *%" PRId64 " "
+ "vs. %" PRId64 " (index: %zu)\n",
name, err, ((double *)s)[j], ((double *)s2)[j], t[j],
q[j], j);
error = -1;
@@ -535,8 +537,9 @@
cl_ulong err = -t[j] - q[j];
if (q[j] > -t[j]) err = q[j] + t[j];
- vlog_error("\nERROR: %sD%s: %lld ulp error at {%.13la, "
- "%.13la}: *%lld vs. %lld (index: %d)\n",
+ vlog_error("\nERROR: %sD%s: %" PRId64 " ulp error at {%.13la, "
+ "%.13la}: *%" PRId64 " vs. %" PRId64
+ " (index: %zu)\n",
name, sizeNames[k], err, ((double *)s)[j],
((double *)s2)[j], -t[j], q[j], j);
error = -1;
@@ -616,7 +619,7 @@
test_info.k[i].resize(test_info.threadCount, nullptr);
}
- test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+ test_info.tinfo.resize(test_info.threadCount);
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
cl_buffer_region region = {
@@ -665,7 +668,7 @@
goto exit;
}
- test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+ test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
}
// Init the kernels
@@ -704,10 +707,5 @@
}
}
- for (auto &threadInfo : test_info.tinfo)
- {
- free_mtdata(threadInfo.d);
- }
-
return error;
}
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index 97e2f67..cb915fc 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -125,7 +125,7 @@
clMemWrapper inBuf2;
Buffers outBuf;
- MTdata d;
+ MTdataHolder d;
// Per thread command queue to improve performance
clCommandQueueWrapper tQueue;
@@ -478,7 +478,7 @@
uint32_t err = t[j] - q[j];
if (q[j] > t[j]) err = q[j] - t[j];
vlog_error("\nERROR: %s: %d ulp error at {%a, %a}: *0x%8.8x vs. "
- "0x%8.8x (index: %d)\n",
+ "0x%8.8x (index: %zu)\n",
name, err, ((float *)s)[j], ((float *)s2)[j], t[j], q[j],
j);
error = -1;
@@ -524,7 +524,7 @@
cl_uint err = -t[j] - q[j];
if (q[j] > -t[j]) err = q[j] + t[j];
vlog_error("\nERROR: %s%s: %d ulp error at {%a, %a}: *0x%8.8x "
- "vs. 0x%8.8x (index: %d)\n",
+ "vs. 0x%8.8x (index: %zu)\n",
name, sizeNames[k], err, ((float *)s)[j],
((float *)s2)[j], -t[j], q[j], j);
error = -1;
@@ -605,7 +605,7 @@
test_info.k[i].resize(test_info.threadCount, nullptr);
}
- test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+ test_info.tinfo.resize(test_info.threadCount);
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
cl_buffer_region region = {
@@ -654,7 +654,7 @@
goto exit;
}
- test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+ test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
}
// Init the kernels
@@ -693,10 +693,5 @@
}
}
- for (auto &threadInfo : test_info.tinfo)
- {
- free_mtdata(threadInfo.d);
- }
-
return error;
}
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index 5a3ad35..c2e7cdc 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -19,6 +19,7 @@
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
namespace {
@@ -297,7 +298,8 @@
cl_ulong err = t[j] - q[j];
if (q[j] > t[j]) err = q[j] - t[j];
- vlog_error("\nERROR: %sD: %zd ulp error at %.13la: *%zd vs. %zd\n",
+ vlog_error("\nERROR: %sD: %" PRId64
+ " ulp error at %.13la: *%" PRId64 " vs. %" PRId64 "\n",
name, err, ((double *)gIn)[j], t[j], q[j]);
return -1;
}
@@ -323,7 +325,8 @@
cl_ulong err = -t[j] - q[j];
if (q[j] > -t[j]) err = q[j] + t[j];
vlog_error(
- "\nERROR: %sD%s: %zd ulp error at %.13la: *%zd vs. %zd\n",
+ "\nERROR: %sD%s: %" PRId64 " ulp error at %.13la: *%" PRId64
+ " vs. %" PRId64 "\n",
name, sizeNames[k], err, ((double *)gIn)[j], -t[j], q[j]);
return -1;
}
@@ -400,7 +403,7 @@
test_info.k[i].resize(test_info.threadCount, nullptr);
}
- test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+ test_info.tinfo.resize(test_info.threadCount);
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
cl_buffer_region region = {
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index d298215..6a1b9b9 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -414,7 +414,7 @@
test_info.k[i].resize(test_info.threadCount, nullptr);
}
- test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+ test_info.tinfo.resize(test_info.threadCount);
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
cl_buffer_region region = {
diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp
index 3def6a8..8d8fec5 100644
--- a/test_conformance/math_brute_force/mad_double.cpp
+++ b/test_conformance/math_brute_force/mad_double.cpp
@@ -26,94 +26,13 @@
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
- const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
- "__kernel void math_kernel",
- sizeNames[vectorSize],
- "( __global double",
- sizeNames[vectorSize],
- "* out, __global double",
- sizeNames[vectorSize],
- "* in1, __global double",
- sizeNames[vectorSize],
- "* in2, __global double",
- sizeNames[vectorSize],
- "* in3 )\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " out[i] = ",
- name,
- "( in1[i], in2[i], in3[i] );\n"
- "}\n" };
-
- const char *c3[] = {
- "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
- "__kernel void math_kernel",
- sizeNames[vectorSize],
- "( __global double* out, __global double* in, __global double* in2, "
- "__global double* in3)\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " if( i + 1 < get_global_size(0) )\n"
- " {\n"
- " double3 d0 = vload3( 0, in + 3 * i );\n"
- " double3 d1 = vload3( 0, in2 + 3 * i );\n"
- " double3 d2 = vload3( 0, in3 + 3 * i );\n"
- " d0 = ",
- name,
- "( d0, d1, d2 );\n"
- " vstore3( d0, 0, out + 3*i );\n"
- " }\n"
- " else\n"
- " {\n"
- " size_t parity = i & 1; // Figure out how many elements are "
- "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
- "buffer size \n"
- " double3 d0;\n"
- " double3 d1;\n"
- " double3 d2;\n"
- " switch( parity )\n"
- " {\n"
- " case 1:\n"
- " d0 = (double3)( in[3*i], NAN, NAN ); \n"
- " d1 = (double3)( in2[3*i], NAN, NAN ); \n"
- " d2 = (double3)( in3[3*i], NAN, NAN ); \n"
- " break;\n"
- " case 0:\n"
- " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
- " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
- " d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
- " break;\n"
- " }\n"
- " d0 = ",
- name,
- "( d0, d1, d2 );\n"
- " switch( parity )\n"
- " {\n"
- " case 0:\n"
- " out[3*i+1] = d0.y; \n"
- " // fall through\n"
- " case 1:\n"
- " out[3*i] = d0.x; \n"
- " break;\n"
- " }\n"
- " }\n"
- "}\n"
- };
-
- const char **kern = c;
- size_t kernSize = sizeof(c) / sizeof(c[0]);
-
- if (sizeValues[vectorSize] == 3)
- {
- kern = c3;
- kernSize = sizeof(c3) / sizeof(c3[0]);
- }
-
- char testName[32];
- snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
- sizeNames[vectorSize]);
-
- return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
+ auto kernel_name = GetKernelName(vectorSize);
+ auto source = GetTernaryKernel(kernel_name, name, ParameterType::Double,
+ ParameterType::Double, ParameterType::Double,
+ ParameterType::Double, vectorSize);
+ std::array<const char *, 1> sources{ source.c_str() };
+ return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
+ relaxedMode);
}
struct BuildKernelInfo2
diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp
index 498f25e..04ac5aa 100644
--- a/test_conformance/math_brute_force/mad_float.cpp
+++ b/test_conformance/math_brute_force/mad_float.cpp
@@ -26,92 +26,13 @@
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
- const char *c[] = { "__kernel void math_kernel",
- sizeNames[vectorSize],
- "( __global float",
- sizeNames[vectorSize],
- "* out, __global float",
- sizeNames[vectorSize],
- "* in1, __global float",
- sizeNames[vectorSize],
- "* in2, __global float",
- sizeNames[vectorSize],
- "* in3 )\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " out[i] = ",
- name,
- "( in1[i], in2[i], in3[i] );\n"
- "}\n" };
-
- const char *c3[] = {
- "__kernel void math_kernel",
- sizeNames[vectorSize],
- "( __global float* out, __global float* in, __global float* in2, "
- "__global float* in3)\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " if( i + 1 < get_global_size(0) )\n"
- " {\n"
- " float3 f0 = vload3( 0, in + 3 * i );\n"
- " float3 f1 = vload3( 0, in2 + 3 * i );\n"
- " float3 f2 = vload3( 0, in3 + 3 * i );\n"
- " f0 = ",
- name,
- "( f0, f1, f2 );\n"
- " vstore3( f0, 0, out + 3*i );\n"
- " }\n"
- " else\n"
- " {\n"
- " size_t parity = i & 1; // Figure out how many elements are "
- "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
- "buffer size \n"
- " float3 f0;\n"
- " float3 f1;\n"
- " float3 f2;\n"
- " switch( parity )\n"
- " {\n"
- " case 1:\n"
- " f0 = (float3)( in[3*i], NAN, NAN ); \n"
- " f1 = (float3)( in2[3*i], NAN, NAN ); \n"
- " f2 = (float3)( in3[3*i], NAN, NAN ); \n"
- " break;\n"
- " case 0:\n"
- " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
- " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
- " f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
- " break;\n"
- " }\n"
- " f0 = ",
- name,
- "( f0, f1, f2 );\n"
- " switch( parity )\n"
- " {\n"
- " case 0:\n"
- " out[3*i+1] = f0.y; \n"
- " // fall through\n"
- " case 1:\n"
- " out[3*i] = f0.x; \n"
- " break;\n"
- " }\n"
- " }\n"
- "}\n"
- };
-
- const char **kern = c;
- size_t kernSize = sizeof(c) / sizeof(c[0]);
-
- if (sizeValues[vectorSize] == 3)
- {
- kern = c3;
- kernSize = sizeof(c3) / sizeof(c3[0]);
- }
-
- char testName[32];
- snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
- sizeNames[vectorSize]);
-
- return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
+ auto kernel_name = GetKernelName(vectorSize);
+ auto source = GetTernaryKernel(kernel_name, name, ParameterType::Float,
+ ParameterType::Float, ParameterType::Float,
+ ParameterType::Float, vectorSize);
+ std::array<const char *, 1> sources{ source.c_str() };
+ return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
+ relaxedMode);
}
struct BuildKernelInfo2
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index ee3fcbd..64491bd 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -98,7 +98,7 @@
cl_mem gInBuffer3 = NULL;
cl_mem gOutBuffer[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
cl_mem gOutBuffer2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
-static MTdata gMTdata;
+static MTdataHolder gMTdata;
cl_device_fp_config gFloatCapabilities = 0;
int gWimpyReductionFactor = 32;
int gVerboseBruteForce = 0;
@@ -132,7 +132,7 @@
if ((gStartTestNumber != ~0u && i < gStartTestNumber)
|| i > gEndTestNumber)
{
- vlog("Skipping function #%d\n", i);
+ vlog("Skipping function #%zu\n", i);
return 0;
}
@@ -326,7 +326,7 @@
vlog("\n-------------------------------------------------------------------"
"----------------------------------------\n");
- gMTdata = init_genrand(gRandomSeed);
+ gMTdata = MTdataHolder(gRandomSeed);
FPU_mode_type oldMode;
DisableFTZ(&oldMode);
@@ -336,8 +336,6 @@
RestoreFPState(&oldMode);
- free_mtdata(gMTdata);
-
if (gQueue)
{
int error_code = clFinish(gQueue);
@@ -504,8 +502,6 @@
gWimpyMode = 1;
}
- vlog("\nTest binary built %s %s\n", __DATE__, __TIME__);
-
PrintArch();
if (gWimpyMode)
diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp
index 94fbe26..b5f1ab0 100644
--- a/test_conformance/math_brute_force/ternary_double.cpp
+++ b/test_conformance/math_brute_force/ternary_double.cpp
@@ -19,6 +19,7 @@
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
#define CORRECTLY_ROUNDED 0
@@ -29,94 +30,13 @@
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
- const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
- "__kernel void math_kernel",
- sizeNames[vectorSize],
- "( __global double",
- sizeNames[vectorSize],
- "* out, __global double",
- sizeNames[vectorSize],
- "* in1, __global double",
- sizeNames[vectorSize],
- "* in2, __global double",
- sizeNames[vectorSize],
- "* in3 )\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " out[i] = ",
- name,
- "( in1[i], in2[i], in3[i] );\n"
- "}\n" };
-
- const char *c3[] = {
- "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
- "__kernel void math_kernel",
- sizeNames[vectorSize],
- "( __global double* out, __global double* in, __global double* in2, "
- "__global double* in3)\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " if( i + 1 < get_global_size(0) )\n"
- " {\n"
- " double3 d0 = vload3( 0, in + 3 * i );\n"
- " double3 d1 = vload3( 0, in2 + 3 * i );\n"
- " double3 d2 = vload3( 0, in3 + 3 * i );\n"
- " d0 = ",
- name,
- "( d0, d1, d2 );\n"
- " vstore3( d0, 0, out + 3*i );\n"
- " }\n"
- " else\n"
- " {\n"
- " size_t parity = i & 1; // Figure out how many elements are "
- "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
- "buffer size \n"
- " double3 d0;\n"
- " double3 d1;\n"
- " double3 d2;\n"
- " switch( parity )\n"
- " {\n"
- " case 1:\n"
- " d0 = (double3)( in[3*i], NAN, NAN ); \n"
- " d1 = (double3)( in2[3*i], NAN, NAN ); \n"
- " d2 = (double3)( in3[3*i], NAN, NAN ); \n"
- " break;\n"
- " case 0:\n"
- " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
- " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
- " d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
- " break;\n"
- " }\n"
- " d0 = ",
- name,
- "( d0, d1, d2 );\n"
- " switch( parity )\n"
- " {\n"
- " case 0:\n"
- " out[3*i+1] = d0.y; \n"
- " // fall through\n"
- " case 1:\n"
- " out[3*i] = d0.x; \n"
- " break;\n"
- " }\n"
- " }\n"
- "}\n"
- };
-
- const char **kern = c;
- size_t kernSize = sizeof(c) / sizeof(c[0]);
-
- if (sizeValues[vectorSize] == 3)
- {
- kern = c3;
- kernSize = sizeof(c3) / sizeof(c3[0]);
- }
-
- char testName[32];
- snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
- sizeNames[vectorSize]);
-
- return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
+ auto kernel_name = GetKernelName(vectorSize);
+ auto source = GetTernaryKernel(kernel_name, name, ParameterType::Double,
+ ParameterType::Double, ParameterType::Double,
+ ParameterType::Double, vectorSize);
+ std::array<const char *, 1> sources{ source.c_str() };
+ return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
+ relaxedMode);
}
struct BuildKernelInfo2
@@ -708,8 +628,9 @@
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp
index 762c57d..cf36184 100644
--- a/test_conformance/math_brute_force/ternary_float.cpp
+++ b/test_conformance/math_brute_force/ternary_float.cpp
@@ -19,6 +19,7 @@
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
#define CORRECTLY_ROUNDED 0
@@ -29,92 +30,13 @@
int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
bool relaxedMode)
{
- const char *c[] = { "__kernel void math_kernel",
- sizeNames[vectorSize],
- "( __global float",
- sizeNames[vectorSize],
- "* out, __global float",
- sizeNames[vectorSize],
- "* in1, __global float",
- sizeNames[vectorSize],
- "* in2, __global float",
- sizeNames[vectorSize],
- "* in3 )\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " out[i] = ",
- name,
- "( in1[i], in2[i], in3[i] );\n"
- "}\n" };
-
- const char *c3[] = {
- "__kernel void math_kernel",
- sizeNames[vectorSize],
- "( __global float* out, __global float* in, __global float* in2, "
- "__global float* in3)\n"
- "{\n"
- " size_t i = get_global_id(0);\n"
- " if( i + 1 < get_global_size(0) )\n"
- " {\n"
- " float3 f0 = vload3( 0, in + 3 * i );\n"
- " float3 f1 = vload3( 0, in2 + 3 * i );\n"
- " float3 f2 = vload3( 0, in3 + 3 * i );\n"
- " f0 = ",
- name,
- "( f0, f1, f2 );\n"
- " vstore3( f0, 0, out + 3*i );\n"
- " }\n"
- " else\n"
- " {\n"
- " size_t parity = i & 1; // Figure out how many elements are "
- "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
- "buffer size \n"
- " float3 f0;\n"
- " float3 f1;\n"
- " float3 f2;\n"
- " switch( parity )\n"
- " {\n"
- " case 1:\n"
- " f0 = (float3)( in[3*i], NAN, NAN ); \n"
- " f1 = (float3)( in2[3*i], NAN, NAN ); \n"
- " f2 = (float3)( in3[3*i], NAN, NAN ); \n"
- " break;\n"
- " case 0:\n"
- " f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
- " f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
- " f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
- " break;\n"
- " }\n"
- " f0 = ",
- name,
- "( f0, f1, f2 );\n"
- " switch( parity )\n"
- " {\n"
- " case 0:\n"
- " out[3*i+1] = f0.y; \n"
- " // fall through\n"
- " case 1:\n"
- " out[3*i] = f0.x; \n"
- " break;\n"
- " }\n"
- " }\n"
- "}\n"
- };
-
- const char **kern = c;
- size_t kernSize = sizeof(c) / sizeof(c[0]);
-
- if (sizeValues[vectorSize] == 3)
- {
- kern = c3;
- kernSize = sizeof(c3) / sizeof(c3[0]);
- }
-
- char testName[32];
- snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
- sizeNames[vectorSize]);
-
- return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
+ auto kernel_name = GetKernelName(vectorSize);
+ auto source = GetTernaryKernel(kernel_name, name, ParameterType::Float,
+ ParameterType::Float, ParameterType::Float,
+ ParameterType::Float, vectorSize);
+ std::array<const char *, 1> sources{ source.c_str() };
+ return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
+ relaxedMode);
}
struct BuildKernelInfo2
@@ -843,8 +765,8 @@
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10u bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64 " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index 7dfc12b..177cfe5 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -19,6 +19,7 @@
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
namespace {
@@ -345,7 +346,7 @@
if (fail)
{
vlog_error("\nERROR: %s%s: %f ulp error at %.13la "
- "(0x%16.16llx): *%.13la vs. %.13la\n",
+ "(0x%16.16" PRIx64 "): *%.13la vs. %.13la\n",
job->f->name, sizeNames[k], err,
((cl_double *)gIn)[j], ((cl_ulong *)gIn)[j],
((cl_double *)gOut_Ref)[j], test);
@@ -427,7 +428,7 @@
test_info.k[i].resize(test_info.threadCount, nullptr);
}
- test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+ test_info.tinfo.resize(test_info.threadCount);
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
cl_buffer_region region = {
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index 6a5c353..4c1f1a1 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -580,7 +580,7 @@
test_info.k[i].resize(test_info.threadCount, nullptr);
}
- test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+ test_info.tinfo.resize(test_info.threadCount);
for (cl_uint i = 0; i < test_info.threadCount; i++)
{
cl_buffer_region region = {
diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp
index 858b2c3..6d7c61d 100644
--- a/test_conformance/math_brute_force/unary_two_results_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_double.cpp
@@ -19,6 +19,7 @@
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
namespace {
@@ -414,8 +415,9 @@
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp
index 85e5d01..42e858c 100644
--- a/test_conformance/math_brute_force/unary_two_results_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_float.cpp
@@ -19,6 +19,7 @@
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
namespace {
@@ -546,8 +547,9 @@
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
index 4cfbca9..8b75194 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
@@ -19,6 +19,7 @@
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <climits>
#include <cstring>
@@ -386,8 +387,9 @@
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
@@ -404,8 +406,8 @@
else
vlog("passed");
- vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
- maxErrorVal2);
+ vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
+ maxErrorVal, maxErrorVal2);
}
vlog("\n");
diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
index e324ad0..54843a2 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
@@ -19,6 +19,7 @@
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <climits>
#include <cstring>
@@ -384,8 +385,9 @@
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
@@ -402,8 +404,8 @@
else
vlog("passed");
- vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
- maxErrorVal2);
+ vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
+ maxErrorVal, maxErrorVal2);
}
vlog("\n");
diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp
index a0c6b79..9b60904 100644
--- a/test_conformance/math_brute_force/unary_u_double.cpp
+++ b/test_conformance/math_brute_force/unary_u_double.cpp
@@ -19,6 +19,7 @@
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
namespace {
@@ -267,11 +268,11 @@
}
if (fail)
{
- vlog_error("\n%s%sD: %f ulp error at 0x%16.16llx: "
- "*%.13la vs. %.13la\n",
- f->name, sizeNames[k], err,
- ((uint64_t *)gIn)[j],
- ((double *)gOut_Ref)[j], test);
+ vlog_error(
+ "\n%s%sD: %f ulp error at 0x%16.16" PRIx64 ": "
+ "*%.13la vs. %.13la\n",
+ f->name, sizeNames[k], err, ((uint64_t *)gIn)[j],
+ ((double *)gOut_Ref)[j], test);
error = -1;
goto exit;
}
@@ -283,8 +284,9 @@
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp
index ccfbc3b..b67a9bd 100644
--- a/test_conformance/math_brute_force/unary_u_float.cpp
+++ b/test_conformance/math_brute_force/unary_u_float.cpp
@@ -19,6 +19,7 @@
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
namespace {
@@ -285,8 +286,9 @@
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
- BUFFER_SIZE);
+ vlog("base:%14" PRIu64 " step:%10" PRIu64
+ " bufferSize:%10d \n",
+ i, step, BUFFER_SIZE);
}
else
{
diff --git a/test_conformance/pipes/test_pipe_limits.cpp b/test_conformance/pipes/test_pipe_limits.cpp
index 7e97925..e1048f5 100644
--- a/test_conformance/pipes/test_pipe_limits.cpp
+++ b/test_conformance/pipes/test_pipe_limits.cpp
@@ -69,7 +69,7 @@
}
}
)";
- // clang-format om
+ // clang-format on
}
stream << R"(
}
diff --git a/test_conformance/pipes/test_pipe_read_write.cpp b/test_conformance/pipes/test_pipe_read_write.cpp
index a502e03..425c7ae 100644
--- a/test_conformance/pipes/test_pipe_read_write.cpp
+++ b/test_conformance/pipes/test_pipe_read_write.cpp
@@ -414,9 +414,9 @@
static int verify_readwrite_double(void *ptr1, void *ptr2, int n)
{
int i;
- long long int sum_input = 0, sum_output = 0;
- long long int *inptr = (long long int *)ptr1;
- long long int *outptr = (long long int *)ptr2;
+ cl_long sum_input = 0, sum_output = 0;
+ cl_long *inptr = (cl_long *)ptr1;
+ cl_long *outptr = (cl_long *)ptr2;
for(i = 0; i < n; i++)
{
@@ -1075,7 +1075,8 @@
if(!is_extension_available(deviceID, "cl_khr_fp16"))
{
- log_info("cl_khr_fp16 is not supported on this platoform. Skipping test.\n");
+ log_info(
+ "cl_khr_fp16 is not supported on this platform. Skipping test.\n");
return CL_SUCCESS;
}
ptrSizes[0] = sizeof(cl_float) / 2;
@@ -1245,7 +1246,7 @@
size_t min_alignment = get_min_alignment(context);
- foo = verify_readwrite_long;
+ foo = verify_readwrite_double;
ptrSizes[0] = sizeof(cl_double);
ptrSizes[1] = ptrSizes[0] << 1;
@@ -1256,7 +1257,8 @@
//skip devices that don't support double
if(!is_extension_available(deviceID, "cl_khr_fp64"))
{
- log_info("cl_khr_fp64 is not supported on this platoform. Skipping test.\n");
+ log_info(
+ "cl_khr_fp64 is not supported on this platform. Skipping test.\n");
return CL_SUCCESS;
}
@@ -1403,7 +1405,8 @@
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_int(deviceID, context, queue, num_elements);
@@ -1417,7 +1420,8 @@
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_uint(deviceID, context, queue, num_elements);
@@ -1431,7 +1435,8 @@
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_short(deviceID, context, queue, num_elements);
@@ -1445,7 +1450,8 @@
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_ushort(deviceID, context, queue, num_elements);
@@ -1459,7 +1465,8 @@
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_char(deviceID, context, queue, num_elements);
@@ -1473,7 +1480,8 @@
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_uchar(deviceID, context, queue, num_elements);
@@ -1488,7 +1496,8 @@
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_float(deviceID, context, queue, num_elements);
@@ -1502,7 +1511,8 @@
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_half(deviceID, context, queue, num_elements);
@@ -1516,7 +1526,8 @@
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_long(deviceID, context, queue, num_elements);
@@ -1530,7 +1541,8 @@
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_ulong(deviceID, context, queue, num_elements);
@@ -1544,7 +1556,8 @@
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
return test_pipe_readwrite_double(deviceID, context, queue, num_elements);
@@ -1554,7 +1567,8 @@
{
if(!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
const char *kernelNames[] = {"test_pipe_subgroup_write_struct","test_pipe_subgroup_read_struct"};
diff --git a/test_conformance/pipes/test_pipe_subgroups.cpp b/test_conformance/pipes/test_pipe_subgroups.cpp
index b3e1718..8e2f6e5 100644
--- a/test_conformance/pipes/test_pipe_subgroups.cpp
+++ b/test_conformance/pipes/test_pipe_subgroups.cpp
@@ -114,9 +114,8 @@
if (!is_extension_available(deviceID, "cl_khr_subgroups"))
{
- log_info(
- "cl_khr_subgroups is not supported on this platoform. Skipping "
- "test.\n");
+ log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+ "test.\n");
return CL_SUCCESS;
}
diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp
index a32ee4e..d638cd4 100644
--- a/test_conformance/printf/test_printf.cpp
+++ b/test_conformance/printf/test_printf.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -1030,8 +1030,6 @@
return TEST_SKIP;
}
- log_info( "Test binary built %s %s\n", __DATE__, __TIME__ );
-
gFd = acquireOutputStream(&err);
if (err != 0)
{
diff --git a/test_conformance/run_conformance.py b/test_conformance/run_conformance.py
index bb8f86f..974491e 100755
--- a/test_conformance/run_conformance.py
+++ b/test_conformance/run_conformance.py
@@ -16,7 +16,6 @@
import subprocess
import time
import tempfile
-import string
DEBUG = 0
@@ -27,7 +26,6 @@
# to the screen while the tests are running.
seconds_between_status_updates = 60 * 60 * 24 * 7 # effectively never
-
# Help info
def write_help_info():
print("run_conformance.py test_list [CL_DEVICE_TYPE(s) to test] [partial-test-names, ...] [log=path/to/log/file/]")
@@ -66,16 +64,16 @@
device_specific_match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*,\s*(.+?)\s*$", line)
if device_specific_match:
if device_specific_match.group(1) in devices_to_test:
- test_path = string.replace(device_specific_match.group(3), '/', os.sep)
- test_name = string.replace(device_specific_match.group(2), '/', os.sep)
+ test_path = str.replace(device_specific_match.group(3), '/', os.sep)
+ test_name = str.replace(device_specific_match.group(2), '/', os.sep)
tests.append((test_name, test_path))
else:
print("Skipping " + device_specific_match.group(2) + " because " + device_specific_match.group(1) + " is not in the list of devices to test.")
continue
match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*$", line)
if match:
- test_path = string.replace(match.group(2), '/', os.sep)
- test_name = string.replace(match.group(1), '/', os.sep)
+ test_path = str.replace(match.group(2), '/', os.sep)
+ test_name = str.replace(match.group(1), '/', os.sep)
tests.append((test_name, test_path))
return tests
@@ -243,7 +241,10 @@
# Catch an interrupt from the user
write_screen_log("\nFAILED: Execution interrupted. Killing test process, but not aborting full test run.")
os.kill(process_pid, 9)
- answer = raw_input("Abort all tests? (y/n)")
+ if sys.version_info[0] < 3:
+ answer = raw_input("Abort all tests? (y/n)")
+ else:
+ answer = input("Abort all tests? (y/n)")
if answer.find("y") != -1:
write_screen_log("\nUser chose to abort all tests.")
log_file.close()
diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp
index 27ee5ff..972a53c 100644
--- a/test_conformance/select/test_select.cpp
+++ b/test_conformance/select/test_select.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -639,7 +639,6 @@
s_wimpy_mode = true;
}
- log_info( "Test binary built %s %s\n", __DATE__, __TIME__ );
if (s_wimpy_mode) {
log_info("\n");
log_info("*** WARNING: Testing in Wimpy mode! ***\n");
@@ -668,4 +667,3 @@
log_info( "\t%s\n", test_list[i].name );
}
}
-
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index 0944ffb..0a2c390 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -72,7 +72,7 @@
size_t subgroup_size;
cl_uint cluster_size;
bs128 work_items_mask;
- int dynsc;
+ size_t dynsc;
bool use_core_subgroups;
std::vector<bs128> all_work_item_masks;
int divergence_mask_arg;
@@ -1495,7 +1495,7 @@
{
size_t tmp;
cl_int error;
- int subgroup_size, num_subgroups;
+ size_t subgroup_size, num_subgroups;
size_t global = test_params.global_workgroup_size;
size_t local = test_params.local_workgroup_size;
clProgramWrapper program;
@@ -1580,7 +1580,7 @@
return TEST_FAIL;
}
- subgroup_size = (int)tmp;
+ subgroup_size = tmp;
error = clGetKernelSubGroupInfo_ptr(
kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
@@ -1593,11 +1593,11 @@
return TEST_FAIL;
}
- num_subgroups = (int)tmp;
+ num_subgroups = tmp;
// Make sure the number of sub groups is what we expect
if (num_subgroups != (local + subgroup_size - 1) / subgroup_size)
{
- log_error("ERROR: unexpected number of subgroups (%d) returned\n",
+ log_error("ERROR: unexpected number of subgroups (%zu) returned\n",
num_subgroups);
return TEST_FAIL;
}
@@ -1606,13 +1606,12 @@
std::vector<Ty> odata;
size_t input_array_size = global;
size_t output_array_size = global;
- int dynscl = test_params.dynsc;
+ size_t dynscl = test_params.dynsc;
if (dynscl != 0)
{
- input_array_size =
- (int)global / (int)local * num_subgroups * dynscl;
- output_array_size = (int)global / (int)local * dynscl;
+ input_array_size = global / local * num_subgroups * dynscl;
+ output_array_size = global / local * dynscl;
}
idata.resize(input_array_size);
diff --git a/test_conformance/vulkan/main.cpp b/test_conformance/vulkan/main.cpp
index 6cbde5c..2eeb0c3 100644
--- a/test_conformance/vulkan/main.cpp
+++ b/test_conformance/vulkan/main.cpp
@@ -134,7 +134,6 @@
const size_t bufsize = BUFFERSIZE;
char buf[BUFFERSIZE];
cl_uchar uuid[CL_UUID_SIZE_KHR];
-VulkanDevice vkDevice;
unsigned int numCQ;
bool multiImport;
bool multiCtx;
@@ -220,9 +219,12 @@
if (!checkVkSupport())
{
log_info("Vulkan supported GPU not found \n");
+ log_info("TEST SKIPPED \n");
return 0;
}
+ VulkanDevice vkDevice;
+
cl_device_type requestedDeviceType = CL_DEVICE_TYPE_GPU;
char *force_cpu = getenv("CL_DEVICE_TYPE");
if (force_cpu != NULL)
diff --git a/test_conformance/vulkan/shaders/buffer.comp b/test_conformance/vulkan/shaders/buffer.comp
new file mode 100644
index 0000000..d8756f9
--- /dev/null
+++ b/test_conformance/vulkan/shaders/buffer.comp
@@ -0,0 +1,28 @@
+#version 450
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int8 : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable
+
+#define MAX_BUFFERS 5
+
+layout(binding = 0) buffer Params
+{
+ uint32_t numBuffers;
+ uint32_t bufferSize;
+ uint32_t interBufferOffset;
+};
+layout(binding = 1) buffer Buffer
+{
+ uint8_t ptr[];
+} bufferPtrList[MAX_BUFFERS];
+layout(local_size_x = 512) in;
+void main() {
+ for (uint32_t bufIdx = 0; bufIdx < numBuffers; bufIdx++) {
+ uint32_t ptrIdx = gl_GlobalInvocationID.x;
+ uint32_t limit = bufferSize;
+ while (ptrIdx < limit) {
+ bufferPtrList[bufIdx].ptr[ptrIdx]++;
+ ptrIdx += (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
+ }
+ }
+}
\ No newline at end of file
diff --git a/test_conformance/vulkan/shaders/buffer.spv b/test_conformance/vulkan/shaders/buffer.spv
new file mode 100644
index 0000000..685523b
--- /dev/null
+++ b/test_conformance/vulkan/shaders/buffer.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D.comp b/test_conformance/vulkan/shaders/image2D.comp
new file mode 100644
index 0000000..42fa2f7
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D.comp
@@ -0,0 +1,31 @@
+#version 450
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int32 : enable
+
+#define MAX_2D_IMAGES 5
+#define MAX_2D_IMAGE_MIP_LEVELS 11
+#define MAX_2D_IMAGE_DESCRIPTORS MAX_2D_IMAGES * MAX_2D_IMAGE_MIP_LEVELS
+
+layout(binding = 0) buffer Params
+{
+ uint32_t numImage2DDescriptors;
+};
+layout(binding = 1, rgba32f ) uniform image2D image2DList[ MAX_2D_IMAGE_DESCRIPTORS ];
+layout(local_size_x = 32, local_size_y = 32) in;
+void main() {
+ uvec3 numThreads = gl_NumWorkGroups * gl_WorkGroupSize;
+ for (uint32_t image2DIdx = 0; image2DIdx < numImage2DDescriptors; image2DIdx++) {
+ ivec2 imageDim = imageSize(image2DList[image2DIdx]);
+ uint32_t heightBy2 = imageDim.y / 2;
+ for (uint32_t row = gl_GlobalInvocationID.y; row < heightBy2; row += numThreads.y) {
+ for (uint32_t col = gl_GlobalInvocationID.x; col < imageDim.x; col += numThreads.x) {
+ ivec2 coordsA = ivec2(col, row);
+ ivec2 coordsB = ivec2(col, imageDim.y - row - 1);
+ vec4 dataA = imageLoad(image2DList[image2DIdx], coordsA);
+ vec4 dataB = imageLoad(image2DList[image2DIdx], coordsB);
+ imageStore(image2DList[image2DIdx], coordsA, dataB);
+ imageStore(image2DList[image2DIdx], coordsB, dataA);
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/test_conformance/vulkan/shaders/image2D_r16i.spv b/test_conformance/vulkan/shaders/image2D_r16i.spv
new file mode 100644
index 0000000..00c5c28
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r16i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_r16ui.spv b/test_conformance/vulkan/shaders/image2D_r16ui.spv
new file mode 100644
index 0000000..87514d9
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r16ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_r32f.spv b/test_conformance/vulkan/shaders/image2D_r32f.spv
new file mode 100644
index 0000000..e82c9c1
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r32f.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_r32i.spv b/test_conformance/vulkan/shaders/image2D_r32i.spv
new file mode 100644
index 0000000..7ea8d26
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r32i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_r32ui.spv b/test_conformance/vulkan/shaders/image2D_r32ui.spv
new file mode 100644
index 0000000..dbcdbc5
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r32ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_r8i.spv b/test_conformance/vulkan/shaders/image2D_r8i.spv
new file mode 100644
index 0000000..1a64147
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r8i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_r8ui.spv b/test_conformance/vulkan/shaders/image2D_r8ui.spv
new file mode 100644
index 0000000..a90ccf9
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_r8ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg16i.spv b/test_conformance/vulkan/shaders/image2D_rg16i.spv
new file mode 100644
index 0000000..0799617
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg16i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg16ui.spv b/test_conformance/vulkan/shaders/image2D_rg16ui.spv
new file mode 100644
index 0000000..f73e096
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg16ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg32f.spv b/test_conformance/vulkan/shaders/image2D_rg32f.spv
new file mode 100644
index 0000000..1489660
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg32f.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg32i.spv b/test_conformance/vulkan/shaders/image2D_rg32i.spv
new file mode 100644
index 0000000..b7d302f
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg32i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg32ui.spv b/test_conformance/vulkan/shaders/image2D_rg32ui.spv
new file mode 100644
index 0000000..6cf2f1b
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg32ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg8i.spv b/test_conformance/vulkan/shaders/image2D_rg8i.spv
new file mode 100644
index 0000000..a71b9bf
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg8i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg8ui.spv b/test_conformance/vulkan/shaders/image2D_rg8ui.spv
new file mode 100644
index 0000000..2aca929
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rg8ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba16i.spv b/test_conformance/vulkan/shaders/image2D_rgba16i.spv
new file mode 100644
index 0000000..0cb95df
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba16i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba16ui.spv b/test_conformance/vulkan/shaders/image2D_rgba16ui.spv
new file mode 100644
index 0000000..84c3d3d
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba16ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba32f.spv b/test_conformance/vulkan/shaders/image2D_rgba32f.spv
new file mode 100644
index 0000000..35136c5
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba32f.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba32i.spv b/test_conformance/vulkan/shaders/image2D_rgba32i.spv
new file mode 100644
index 0000000..4d1ae58
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba32i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba32ui.spv b/test_conformance/vulkan/shaders/image2D_rgba32ui.spv
new file mode 100644
index 0000000..bed86f0
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba32ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba8i.spv b/test_conformance/vulkan/shaders/image2D_rgba8i.spv
new file mode 100644
index 0000000..edf8c58
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba8i.spv
Binary files differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba8ui.spv b/test_conformance/vulkan/shaders/image2D_rgba8ui.spv
new file mode 100644
index 0000000..bb9a770
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D_rgba8ui.spv
Binary files differ
diff --git a/test_conformance/vulkan/test_vulkan_api_consistency.cpp b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
index 2987418..f22ac31 100644
--- a/test_conformance/vulkan/test_vulkan_api_consistency.cpp
+++ b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
@@ -238,7 +238,7 @@
const VulkanMemoryTypeList& memoryTypeList = vkImage2D->getMemoryTypeList();
uint64_t totalImageMemSize = vkImage2D->getSize();
- log_info("Memory type index: %d\n", (uint32_t)memoryTypeList[0]);
+ log_info("Memory type index: %lu\n", (uint32_t)memoryTypeList[0]);
log_info("Memory type property: %d\n",
memoryTypeList[0].getMemoryTypeProperty());
log_info("Image size : %d\n", totalImageMemSize);
@@ -552,17 +552,17 @@
// Pass invalid object to release call
- errNum = clReleaseSemaphoreObjectKHRptr(NULL);
+ errNum = clReleaseSemaphoreKHRptr(NULL);
test_failure_error(errNum, CL_INVALID_VALUE,
- "clReleaseSemaphoreObjectKHRptr fails with "
+ "clReleaseSemaphoreKHRptr fails with "
"CL_INVALID_VALUE when NULL semaphore object is passed");
// Release both semaphore objects
- errNum = clReleaseSemaphoreObjectKHRptr(clVk2Clsemaphore);
- test_error(errNum, "clReleaseSemaphoreObjectKHRptr failed");
+ errNum = clReleaseSemaphoreKHRptr(clVk2Clsemaphore);
+ test_error(errNum, "clReleaseSemaphoreKHRptr failed");
- errNum = clReleaseSemaphoreObjectKHRptr(clCl2Vksemaphore);
- test_error(errNum, "clReleaseSemaphoreObjectKHRptr failed");
+ errNum = clReleaseSemaphoreKHRptr(clCl2Vksemaphore);
+ test_error(errNum, "clReleaseSemaphoreKHRptr failed");
return TEST_PASS;
}
diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
index 7daf96d..9b0bc9d 100644
--- a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
+++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
@@ -39,35 +39,6 @@
};
}
-static const char *vkBufferShader =
- "#version 450\n"
- "#extension GL_ARB_separate_shader_objects : enable\n"
- "#extension GL_NV_gpu_shader5 : enable\n"
- "layout(binding = 0) buffer Params\n"
- "{\n"
- " uint32_t numBuffers;\n"
- " uint32_t bufferSize;\n"
- " uint32_t interBufferOffset;\n"
- "};\n"
- "layout(binding = 1) buffer Buffer\n"
- "{\n"
- " uint8_t ptr[];\n"
- "} bufferPtrList[" STRING(
- MAX_BUFFERS) "];\n"
- "layout(local_size_x = 512) in;\n"
- "void main() {\n"
- " for (uint32_t bufIdx = 0; bufIdx < numBuffers;"
- " bufIdx++) {\n"
- " uint32_t ptrIdx = gl_GlobalInvocationID.x;\n"
- " uint32_t limit = bufferSize;\n"
- " while (ptrIdx < limit) {\n"
- " bufferPtrList[bufIdx].ptr[ptrIdx]++;\n"
- " ptrIdx += (gl_NumWorkGroups.x * "
- "gl_WorkGroupSize.x);\n"
- " }\n"
- " }\n"
- "}\n";
-
const char *kernel_text_numbuffer_1 = " \
__kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a) { \n\
int gid = get_global_id(0); \n\
@@ -149,6 +120,8 @@
VulkanQueue &vkQueue = vkDevice.getQueue();
+ std::vector<char> vkBufferShader = readFile("buffer.spv");
+
VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
@@ -446,6 +419,7 @@
VulkanQueue &vkQueue = vkDevice.getQueue();
+ std::vector<char> vkBufferShader = readFile("buffer.spv");
VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
@@ -716,6 +690,8 @@
VulkanQueue &vkQueue = vkDevice.getQueue();
+ std::vector<char> vkBufferShader = readFile("buffer.spv");
+
VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
@@ -1050,6 +1026,8 @@
VulkanQueue &vkQueue = vkDevice.getQueue();
+ std::vector<char> vkBufferShader = readFile("buffer.spv");
+
VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
diff --git a/test_conformance/vulkan/test_vulkan_interop_image.cpp b/test_conformance/vulkan/test_vulkan_interop_image.cpp
index f1d0af1..7577de0 100644
--- a/test_conformance/vulkan/test_vulkan_interop_image.cpp
+++ b/test_conformance/vulkan/test_vulkan_interop_image.cpp
@@ -25,8 +25,6 @@
#define MAX_2D_IMAGE_ELEMENT_SIZE 16
#define MAX_2D_IMAGE_MIP_LEVELS 11
#define MAX_2D_IMAGE_DESCRIPTORS MAX_2D_IMAGES *MAX_2D_IMAGE_MIP_LEVELS
-#define GLSL_FORMAT_STRING "<GLSL_FORMAT>"
-#define GLSL_TYPE_PREFIX_STRING "<GLSL_TYPE_PREFIX>"
#define NUM_THREADS_PER_GROUP_X 32
#define NUM_THREADS_PER_GROUP_Y 32
#define NUM_BLOCKS(size, blockSize) \
@@ -54,61 +52,8 @@
}
static cl_uchar uuid[CL_UUID_SIZE_KHR];
static cl_device_id deviceId = NULL;
-
-static const char *vkImage2DShader =
- "#version 450\n"
- "#extension GL_ARB_separate_shader_objects : enable\n"
- "#extension GL_NV_gpu_shader5 : enable\n"
- "layout(binding = 0) buffer Params\n"
- "{\n"
- " uint32_t numImage2DDescriptors;\n"
- "};\n"
- "layout(binding = 1, " GLSL_FORMAT_STRING
- ") uniform " GLSL_TYPE_PREFIX_STRING "image2D image2DList[" STRING(
- MAX_2D_IMAGE_DESCRIPTORS) "];\n"
- "layout(local_size_x = 32, local_size_y = "
- "32) in;\n"
- "void main() {\n"
- " uvec3 numThreads = gl_NumWorkGroups * "
- "gl_WorkGroupSize;\n"
- " for (uint32_t image2DIdx = 0; "
- "image2DIdx < numImage2DDescriptors; "
- "image2DIdx++)"
- " {\n"
- " ivec2 imageDim = "
- "imageSize(image2DList[image2DIdx]);\n"
- " uint32_t heightBy2 = imageDim.y / "
- "2;\n"
- " for (uint32_t row = "
- "gl_GlobalInvocationID.y; row < heightBy2; "
- "row += numThreads.y)"
- " {\n"
- " for (uint32_t col = "
- "gl_GlobalInvocationID.x; col < imageDim.x; "
- "col += numThreads.x)"
- " {\n"
- " ivec2 coordsA = ivec2(col, "
- "row);\n"
- " ivec2 coordsB = ivec2(col, "
- "imageDim.y - row - 1);\n"
- " " GLSL_TYPE_PREFIX_STRING
- "vec4 dataA = "
- "imageLoad(image2DList[image2DIdx], "
- "coordsA);\n"
- " " GLSL_TYPE_PREFIX_STRING
- "vec4 dataB = "
- "imageLoad(image2DList[image2DIdx], "
- "coordsB);\n"
- " "
- "imageStore(image2DList[image2DIdx], "
- "coordsA, dataB);\n"
- " "
- "imageStore(image2DList[image2DIdx], "
- "coordsB, dataA);\n"
- " }\n"
- " }\n"
- " }\n"
- "}\n";
+size_t max_width = MAX_2D_IMAGE_WIDTH;
+size_t max_height = MAX_2D_IMAGE_HEIGHT;
const char *kernel_text_numImage_1 = " \
__constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
@@ -268,8 +213,8 @@
VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
- uint64_t maxImage2DSize = MAX_2D_IMAGE_WIDTH * MAX_2D_IMAGE_HEIGHT
- * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
+ uint64_t maxImage2DSize =
+ max_width * max_height * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize);
VulkanDeviceMemory vkSrcBufferDeviceMemory(
vkDevice, vkSrcBuffer.getSize(),
@@ -310,6 +255,12 @@
clCl2VkExternalSemaphore = new clExternalSemaphore(
vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory1;
+ std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory2;
+ std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory1;
+ std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory2;
+ std::vector<char> vkImage2DShader;
+
for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
{
VulkanFormat vkFormat = vkFormatList[fIdx];
@@ -317,15 +268,13 @@
uint32_t elementSize = getVulkanFormatElementSize(vkFormat);
ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE);
log_info("elementSize= %d\n", elementSize);
- std::map<std::string, std::string> patternToSubstituteMap;
- patternToSubstituteMap[GLSL_FORMAT_STRING] =
- getVulkanFormatGLSLFormat(vkFormat);
- patternToSubstituteMap[GLSL_TYPE_PREFIX_STRING] =
- getVulkanFormatGLSLTypePrefix(vkFormat);
- VulkanShaderModule vkImage2DShaderModule(
- vkDevice,
- prepareVulkanShader(vkImage2DShader, patternToSubstituteMap));
+ std::string fileName = "image2D_"
+ + std::string(getVulkanFormatGLSLFormat(vkFormat)) + ".spv";
+ log_info("Load %s file", fileName.c_str());
+ vkImage2DShader = readFile(fileName);
+ VulkanShaderModule vkImage2DShaderModule(vkDevice, vkImage2DShader);
+
VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
vkImage2DShaderModule);
@@ -333,13 +282,13 @@
{
uint32_t width = widthList[wIdx];
log_info("Width: %d\n", width);
- ASSERT_LEQ(width, (uint32_t)MAX_2D_IMAGE_WIDTH);
+ if (width > max_width) continue;
region[0] = width;
for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++)
{
uint32_t height = heightList[hIdx];
log_info("Height: %d", height);
- ASSERT_LEQ(height, (uint32_t)MAX_2D_IMAGE_HEIGHT);
+ if (height > max_height) continue;
region[1] = height;
uint32_t numMipLevels = 1;
@@ -418,14 +367,6 @@
const VulkanMemoryTypeList &memoryTypeList =
vkDummyImage2D.getMemoryTypeList();
- std::vector<VulkanDeviceMemory *>
- vkNonDedicatedImage2DListDeviceMemory1;
- std::vector<VulkanDeviceMemory *>
- vkNonDedicatedImage2DListDeviceMemory2;
- std::vector<clExternalMemoryImage *>
- nonDedicatedExternalMemory1;
- std::vector<clExternalMemoryImage *>
- nonDedicatedExternalMemory2;
for (size_t mtIdx = 0; mtIdx < memoryTypeList.size();
mtIdx++)
{
@@ -834,6 +775,8 @@
}
}
}
+
+ vkImage2DShader.clear();
}
CLEANUP:
if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
@@ -866,8 +809,8 @@
VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
- uint64_t maxImage2DSize = MAX_2D_IMAGE_WIDTH * MAX_2D_IMAGE_HEIGHT
- * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
+ uint64_t maxImage2DSize =
+ max_width * max_height * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize);
VulkanDeviceMemory vkSrcBufferDeviceMemory(
vkDevice, vkSrcBuffer.getSize(),
@@ -908,6 +851,12 @@
clCl2VkExternalSemaphore = new clExternalSemaphore(
vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory1;
+ std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory2;
+ std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory1;
+ std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory2;
+ std::vector<char> vkImage2DShader;
+
for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
{
VulkanFormat vkFormat = vkFormatList[fIdx];
@@ -915,15 +864,13 @@
uint32_t elementSize = getVulkanFormatElementSize(vkFormat);
ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE);
log_info("elementSize= %d\n", elementSize);
- std::map<std::string, std::string> patternToSubstituteMap;
- patternToSubstituteMap[GLSL_FORMAT_STRING] =
- getVulkanFormatGLSLFormat(vkFormat);
- patternToSubstituteMap[GLSL_TYPE_PREFIX_STRING] =
- getVulkanFormatGLSLTypePrefix(vkFormat);
- VulkanShaderModule vkImage2DShaderModule(
- vkDevice,
- prepareVulkanShader(vkImage2DShader, patternToSubstituteMap));
+ std::string fileName = "image2D_"
+ + std::string(getVulkanFormatGLSLFormat(vkFormat)) + ".spv";
+ log_info("Load %s file", fileName.c_str());
+ vkImage2DShader = readFile(fileName);
+ VulkanShaderModule vkImage2DShaderModule(vkDevice, vkImage2DShader);
+
VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
vkImage2DShaderModule);
@@ -931,13 +878,13 @@
{
uint32_t width = widthList[wIdx];
log_info("Width: %d\n", width);
- ASSERT_LEQ(width, (uint32_t)MAX_2D_IMAGE_WIDTH);
+ if (width > max_width) continue;
region[0] = width;
for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++)
{
uint32_t height = heightList[hIdx];
log_info("Height: %d\n", height);
- ASSERT_LEQ(height, (uint32_t)MAX_2D_IMAGE_HEIGHT);
+ if (height > max_height) continue;
region[1] = height;
uint32_t numMipLevels = 1;
@@ -1016,14 +963,6 @@
const VulkanMemoryTypeList &memoryTypeList =
vkDummyImage2D.getMemoryTypeList();
- std::vector<VulkanDeviceMemory *>
- vkNonDedicatedImage2DListDeviceMemory1;
- std::vector<VulkanDeviceMemory *>
- vkNonDedicatedImage2DListDeviceMemory2;
- std::vector<clExternalMemoryImage *>
- nonDedicatedExternalMemory1;
- std::vector<clExternalMemoryImage *>
- nonDedicatedExternalMemory2;
for (size_t mtIdx = 0; mtIdx < memoryTypeList.size();
mtIdx++)
{
@@ -1368,6 +1307,7 @@
}
}
}
+ vkImage2DShader.clear();
}
CLEANUP:
if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
@@ -1494,6 +1434,14 @@
goto CLEANUP;
}
deviceId = devices[device_no];
+ err = setMaxImageDimensions(deviceId, max_width, max_height);
+ if (CL_SUCCESS != err)
+ {
+ print_error(err, "error setting max image dimensions");
+ goto CLEANUP;
+ }
+ log_info("Set max_width to %lu and max_height to %lu\n", max_width,
+ max_height);
context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
NULL, NULL, &err);
if (CL_SUCCESS != err)
diff --git a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp
index 136818f..9d9a660 100644
--- a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp
+++ b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp
@@ -23,6 +23,7 @@
#include <stdexcept>
#define ASSERT(x) assert((x))
+#define GB(x) ((unsigned long long)(x) << 30)
pfnclCreateSemaphoreWithPropertiesKHR clCreateSemaphoreWithPropertiesKHRptr;
pfnclEnqueueWaitSemaphoresKHR clEnqueueWaitSemaphoresKHRptr;
@@ -31,7 +32,7 @@
clEnqueueAcquireExternalMemObjectsKHRptr;
pfnclEnqueueReleaseExternalMemObjectsKHR
clEnqueueReleaseExternalMemObjectsKHRptr;
-pfnclReleaseSemaphoreObjectKHR clReleaseSemaphoreObjectKHRptr;
+pfnclReleaseSemaphoreKHR clReleaseSemaphoreKHRptr;
void init_cl_vk_ext(cl_platform_id opencl_platform)
{
@@ -51,13 +52,13 @@
throw std::runtime_error("Failed to get the function pointer of "
"clEnqueueSignalSemaphoresKHRptr!");
}
- clReleaseSemaphoreObjectKHRptr = (pfnclReleaseSemaphoreObjectKHR)
- clGetExtensionFunctionAddressForPlatform(opencl_platform,
- "clReleaseSemaphoreObjectKHR");
- if (NULL == clReleaseSemaphoreObjectKHRptr)
+ clReleaseSemaphoreKHRptr =
+ (pfnclReleaseSemaphoreKHR)clGetExtensionFunctionAddressForPlatform(
+ opencl_platform, "clReleaseSemaphoreKHR");
+ if (NULL == clReleaseSemaphoreKHRptr)
{
throw std::runtime_error("Failed to get the function pointer of "
- "clReleaseSemaphoreObjectKHRptr!");
+ "clReleaseSemaphoreKHRptr!");
}
clCreateSemaphoreWithPropertiesKHRptr =
(pfnclCreateSemaphoreWithPropertiesKHR)
@@ -70,6 +71,40 @@
}
}
+cl_int setMaxImageDimensions(cl_device_id deviceID, size_t &max_width,
+ size_t &max_height)
+{
+ cl_int result = CL_SUCCESS;
+ cl_ulong val;
+ size_t paramSize;
+
+ result = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE,
+ sizeof(cl_ulong), &val, ¶mSize);
+
+ if (result != CL_SUCCESS)
+ {
+ return result;
+ }
+
+ if (val < GB(4))
+ {
+ max_width = 256;
+ max_height = 256;
+ }
+ else if (val < GB(8))
+ {
+ max_width = 512;
+ max_height = 256;
+ }
+ else
+ {
+ max_width = 1024;
+ max_height = 512;
+ }
+
+ return result;
+}
+
cl_int getCLFormatFromVkFormat(VkFormat vkFormat,
cl_image_format *clImageFormat)
{
@@ -798,10 +833,10 @@
clExternalSemaphore::~clExternalSemaphore()
{
- cl_int err = clReleaseSemaphoreObjectKHRptr(m_externalSemaphore);
+ cl_int err = clReleaseSemaphoreKHRptr(m_externalSemaphore);
if (err != CL_SUCCESS)
{
- throw std::runtime_error("clReleaseSemaphoreObjectKHR failed!");
+ throw std::runtime_error("clReleaseSemaphoreKHR failed!");
}
}
diff --git a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp
index c1d2a76..d9f8dcc 100644
--- a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp
+++ b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp
@@ -49,7 +49,7 @@
cl_command_queue command_queue, cl_uint num_mem_objects,
const cl_mem *mem_objects, cl_uint num_events_in_wait_list,
const cl_event *event_wait_list, cl_event *event);
-typedef cl_int (*pfnclReleaseSemaphoreObjectKHR)(cl_semaphore_khr sema_object);
+typedef cl_int (*pfnclReleaseSemaphoreKHR)(cl_semaphore_khr sema_object);
extern pfnclCreateSemaphoreWithPropertiesKHR
clCreateSemaphoreWithPropertiesKHRptr;
@@ -59,7 +59,7 @@
clEnqueueAcquireExternalMemObjectsKHRptr;
extern pfnclEnqueueReleaseExternalMemObjectsKHR
clEnqueueReleaseExternalMemObjectsKHRptr;
-extern pfnclReleaseSemaphoreObjectKHR clReleaseSemaphoreObjectKHRptr;
+extern pfnclReleaseSemaphoreKHR clReleaseSemaphoreKHRptr;
cl_int getCLImageInfoFromVkImageInfo(const VkImageCreateInfo *, size_t,
cl_image_format *, cl_image_desc *);
@@ -69,6 +69,8 @@
cl_int check_external_semaphore_handle_type(
cl_device_id deviceID,
cl_external_semaphore_handle_type_khr requiredHandleType);
+cl_int setMaxImageDimensions(cl_device_id deviceID, size_t &width,
+ size_t &height);
class clExternalMemory {
protected:
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp
index 831403e..5220677 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp
@@ -37,7 +37,7 @@
virtual size_t size() const;
virtual const VulkanWrapper &operator[](size_t idx) const;
virtual VulkanWrapper &operator[](size_t idx);
- virtual operator const VulkanNative *() const;
+ virtual const VulkanNative *operator()() const;
};
template <class VulkanKey, class VulkanValue> class VulkanMap {
@@ -335,15 +335,12 @@
template <class VulkanWrapper, class VulkanNative>
VulkanWrapper &VulkanList<VulkanWrapper, VulkanNative>::operator[](size_t idx)
{
- if (idx < m_wrapperList.size())
- {
- // CHECK_LT(idx, m_wrapperList.size());
- return m_wrapperList[idx].get();
- }
+ // CHECK_LT(idx, m_wrapperList.size());
+ return m_wrapperList[idx].get();
}
template <class VulkanWrapper, class VulkanNative>
-VulkanList<VulkanWrapper, VulkanNative>::operator const VulkanNative *() const
+const VulkanNative *VulkanList<VulkanWrapper, VulkanNative>::operator()() const
{
return m_nativeList.data();
}
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp
index 81e1262..1a313cc 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp
@@ -18,6 +18,7 @@
#include "vulkan_wrapper.hpp"
#include <assert.h>
#include <iostream>
+#include <fstream>
#include <set>
#include <string>
#include <CL/cl.h>
@@ -182,7 +183,7 @@
const VulkanInstance &instance = getVulkanInstance();
const VulkanPhysicalDeviceList &physicalDeviceList =
instance.getPhysicalDeviceList();
- if (physicalDeviceList == NULL)
+ if (physicalDeviceList() == NULL)
{
std::cout << "physicalDeviceList is null, No GPUs found with "
"Vulkan support !!!\n";
@@ -541,59 +542,6 @@
return (const char *)size_t(0);
}
-const char *getVulkanFormatGLSLTypePrefix(VulkanFormat format)
-{
- switch (format)
- {
- case VULKAN_FORMAT_R8_UINT:
- case VULKAN_FORMAT_R8G8_UINT:
- case VULKAN_FORMAT_R8G8B8A8_UINT:
- case VULKAN_FORMAT_R16_UINT:
- case VULKAN_FORMAT_R16G16_UINT:
- case VULKAN_FORMAT_R16G16B16A16_UINT:
- case VULKAN_FORMAT_R32_UINT:
- case VULKAN_FORMAT_R32G32_UINT:
- case VULKAN_FORMAT_R32G32B32A32_UINT: return "u";
-
- case VULKAN_FORMAT_R8_SINT:
- case VULKAN_FORMAT_R8G8_SINT:
- case VULKAN_FORMAT_R8G8B8A8_SINT:
- case VULKAN_FORMAT_R16_SINT:
- case VULKAN_FORMAT_R16G16_SINT:
- case VULKAN_FORMAT_R16G16B16A16_SINT:
- case VULKAN_FORMAT_R32_SINT:
- case VULKAN_FORMAT_R32G32_SINT:
- case VULKAN_FORMAT_R32G32B32A32_SINT: return "i";
-
- case VULKAN_FORMAT_R32_SFLOAT:
- case VULKAN_FORMAT_R32G32_SFLOAT:
- case VULKAN_FORMAT_R32G32B32A32_SFLOAT: return "";
-
- default: ASSERT(0); std::cout << "Unknown format";
- }
-
- return "";
-}
-
-std::string prepareVulkanShader(
- std::string shaderCode,
- const std::map<std::string, std::string> &patternToSubstituteMap)
-{
- for (std::map<std::string, std::string>::const_iterator psIt =
- patternToSubstituteMap.begin();
- psIt != patternToSubstituteMap.end(); ++psIt)
- {
- std::string::size_type pos = 0u;
- while ((pos = shaderCode.find(psIt->first, pos)) != std::string::npos)
- {
- shaderCode.replace(pos, psIt->first.length(), psIt->second);
- pos += psIt->second.length();
- }
- }
-
- return shaderCode;
-}
-
std::ostream &operator<<(std::ostream &os,
VulkanMemoryTypeProperty memoryTypeProperty)
{
@@ -691,3 +639,54 @@
return os;
}
+
+static char *findFilePath(const std::string filename)
+{
+ const char *searchPath[] = {
+ "./", // Same dir
+ "./shaders/", // In shaders folder in same dir
+ "../test_conformance/vulkan/shaders/" // In src folder
+ };
+ for (unsigned int i = 0; i < sizeof(searchPath) / sizeof(char *); ++i)
+ {
+ std::string path(searchPath[i]);
+
+ path.append(filename);
+ FILE *fp;
+ fp = fopen(path.c_str(), "rb");
+
+ if (fp != NULL)
+ {
+ fclose(fp);
+ // File found
+ char *file_path = (char *)(malloc(path.length() + 1));
+ strncpy(file_path, path.c_str(), path.length() + 1);
+ return file_path;
+ }
+ if (fp)
+ {
+ fclose(fp);
+ }
+ }
+ // File not found
+ return 0;
+}
+
+std::vector<char> readFile(const std::string &filename)
+{
+ char *file_path = findFilePath(filename);
+
+ std::ifstream file(file_path, std::ios::ate | std::ios::binary);
+
+ if (!file.is_open())
+ {
+ throw std::runtime_error("failed to open shader spv file!\n");
+ }
+ size_t fileSize = (size_t)file.tellg();
+ std::vector<char> buffer(fileSize);
+ file.seekg(0);
+ file.read(buffer.data(), fileSize);
+ file.close();
+ printf("filesize is %d", fileSize);
+ return buffer;
+}
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp
index 7022fd5..04f5a59 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp
@@ -66,4 +66,5 @@
VulkanExternalSemaphoreHandleType externalSemaphoreHandleType);
std::ostream& operator<<(std::ostream& os, VulkanFormat format);
+std::vector<char> readFile(const std::string& filename);
#endif // _vulkan_utility_hpp_
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp
index c044e00..6209a74 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp
@@ -201,7 +201,8 @@
if (physicalDeviceCount == uint32_t(0))
{
- throw std::runtime_error("failed to find GPUs with Vulkan support!");
+ std::cout << "failed to find GPUs with Vulkan support!\n";
+ return;
}
std::vector<VkPhysicalDevice> vkPhysicalDeviceList(physicalDeviceCount,
@@ -625,12 +626,12 @@
vkSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
vkSubmitInfo.pNext = NULL;
vkSubmitInfo.waitSemaphoreCount = (uint32_t)waitSemaphoreList.size();
- vkSubmitInfo.pWaitSemaphores = waitSemaphoreList;
+ vkSubmitInfo.pWaitSemaphores = waitSemaphoreList();
vkSubmitInfo.pWaitDstStageMask = vkPipelineStageFlagsList.data();
vkSubmitInfo.commandBufferCount = (uint32_t)commandBufferList.size();
- vkSubmitInfo.pCommandBuffers = commandBufferList;
+ vkSubmitInfo.pCommandBuffers = commandBufferList();
vkSubmitInfo.signalSemaphoreCount = (uint32_t)signalSemaphoreList.size();
- vkSubmitInfo.pSignalSemaphores = signalSemaphoreList;
+ vkSubmitInfo.pSignalSemaphores = signalSemaphoreList();
vkQueueSubmit(m_vkQueue, 1, &vkSubmitInfo, NULL);
}
@@ -728,7 +729,8 @@
vkDescriptorSetLayoutCreateInfo.flags = 0;
vkDescriptorSetLayoutCreateInfo.bindingCount =
(uint32_t)descriptorSetLayoutBindingList.size();
- vkDescriptorSetLayoutCreateInfo.pBindings = descriptorSetLayoutBindingList;
+ vkDescriptorSetLayoutCreateInfo.pBindings =
+ descriptorSetLayoutBindingList();
vkCreateDescriptorSetLayout(m_device, &vkDescriptorSetLayoutCreateInfo,
NULL, &m_vkDescriptorSetLayout);
@@ -799,7 +801,7 @@
vkPipelineLayoutCreateInfo.flags = 0;
vkPipelineLayoutCreateInfo.setLayoutCount =
(uint32_t)descriptorSetLayoutList.size();
- vkPipelineLayoutCreateInfo.pSetLayouts = descriptorSetLayoutList;
+ vkPipelineLayoutCreateInfo.pSetLayouts = descriptorSetLayoutList();
vkPipelineLayoutCreateInfo.pushConstantRangeCount = 0;
vkPipelineLayoutCreateInfo.pPushConstantRanges = NULL;
@@ -846,23 +848,18 @@
{}
VulkanShaderModule::VulkanShaderModule(const VulkanDevice &device,
- const std::string &code)
+ const std::vector<char> &code)
: m_device(device)
{
- std::string paddedCode = code;
- while (paddedCode.size() % 4)
- {
- paddedCode += " ";
- }
VkShaderModuleCreateInfo vkShaderModuleCreateInfo = {};
vkShaderModuleCreateInfo.sType =
VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
vkShaderModuleCreateInfo.pNext = NULL;
vkShaderModuleCreateInfo.flags = 0;
- vkShaderModuleCreateInfo.codeSize = paddedCode.size();
+ vkShaderModuleCreateInfo.codeSize = code.size();
vkShaderModuleCreateInfo.pCode =
- (const uint32_t *)(void *)paddedCode.c_str();
+ reinterpret_cast<const uint32_t *>(code.data());
vkCreateShaderModule(m_device, &vkShaderModuleCreateInfo, NULL,
&m_vkShaderModule);
@@ -1577,7 +1574,7 @@
vkImageCreateInfo.queueFamilyIndexCount =
(uint32_t)m_device.getPhysicalDevice().getQueueFamilyList().size();
vkImageCreateInfo.pQueueFamilyIndices =
- m_device.getPhysicalDevice().getQueueFamilyList();
+ m_device.getPhysicalDevice().getQueueFamilyList()();
vkImageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
VkExternalMemoryImageCreateInfo vkExternalMemoryImageCreateInfo = {};
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp
index 1f68a92..37925ee 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp
@@ -240,7 +240,8 @@
VulkanShaderModule(const VulkanShaderModule &shaderModule);
public:
- VulkanShaderModule(const VulkanDevice &device, const std::string &code);
+ VulkanShaderModule(const VulkanDevice &device,
+ const std::vector<char> &code);
virtual ~VulkanShaderModule();
operator VkShaderModule() const;
};