test_common/harness/imageHelpers.cpp - platform/external/OpenCL-CTS - Git at Google

 //
 // Copyright (c) 2017 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
 #include "imageHelpers.h"
 #include <limits.h>
 #include <assert.h>
 #if defined( __APPLE__ )
 #include <sys/mman.h>
 #endif
 #if !defined (_WIN32) && !defined(__APPLE__)
 #include <malloc.h>
 #endif
 #include <algorithm>
 #include <iterator>
 #if !defined (_WIN32)
 #include <cmath>
 #endif

 RoundingMode gFloatToHalfRoundingMode = kDefaultRoundingMode;

 static cl_ushort float2half_rte( float f );
 static cl_ushort float2half_rtz( float f );

 cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
 bool gTestRounding = false;
 double
 sRGBmap(float fc)
 {
     double c = (double)fc;

 #if !defined (_WIN32)
     if (std::isnan(c))
         c = 0.0;
 #else
     if (_isnan(c))
         c = 0.0;
 #endif

     if (c > 1.0)
         c = 1.0;
     else if (c < 0.0)
         c = 0.0;
     else if (c < 0.0031308)
         c = 12.92 * c;
     else
         c = (1055.0/1000.0) * pow(c, 5.0/12.0) - (55.0/1000.0);

     return c * 255.0;
 }

 double
 sRGBunmap(float fc)
 {
     double c = (double)fc;
     double result;

     if (c <= 0.04045)
         result = c / 12.92;
     else
         result = pow((c + 0.055) / 1.055, 2.4);

     return result;
 }


 size_t get_format_type_size( const cl_image_format *format )
 {
     return get_channel_data_type_size( format->image_channel_data_type );
 }

 size_t get_channel_data_type_size( cl_channel_type channelType )
 {
     switch( channelType )
     {
         case CL_SNORM_INT8:
         case CL_UNORM_INT8:
         case CL_SIGNED_INT8:
         case CL_UNSIGNED_INT8:
             return 1;

         case CL_SNORM_INT16:
         case CL_UNORM_INT16:
         case CL_SIGNED_INT16:
         case CL_UNSIGNED_INT16:
         case CL_HALF_FLOAT:
 #ifdef CL_SFIXED14_APPLE
         case CL_SFIXED14_APPLE:
 #endif
             return sizeof( cl_short );

         case CL_SIGNED_INT32:
         case CL_UNSIGNED_INT32:
             return sizeof( cl_int );

         case CL_UNORM_SHORT_565:
         case CL_UNORM_SHORT_555:
 #ifdef OBSOLETE_FORAMT
         case CL_UNORM_SHORT_565_REV:
         case CL_UNORM_SHORT_555_REV:
 #endif
             return 2;

 #ifdef OBSOLETE_FORAMT
         case CL_UNORM_INT_8888:
         case CL_UNORM_INT_8888_REV:
             return 4;
 #endif

         case CL_UNORM_INT_101010:
 #ifdef OBSOLETE_FORAMT
         case CL_UNORM_INT_101010_REV:
 #endif
             return 4;

         case CL_FLOAT:
             return sizeof( cl_float );

         default:
             return 0;
     }
 }

 size_t get_format_channel_count( const cl_image_format *format )
 {
     return get_channel_order_channel_count( format->image_channel_order );
 }

 size_t get_channel_order_channel_count( cl_channel_order order )
 {
     switch( order )
     {
         case CL_R:
         case CL_A:
         case CL_Rx:
         case CL_INTENSITY:
         case CL_LUMINANCE:
         case CL_DEPTH:
         case CL_DEPTH_STENCIL:
             return 1;

         case CL_RG:
         case CL_RA:
         case CL_RGx:
             return 2;

         case CL_RGB:
         case CL_RGBx:
         case CL_sRGB:
         case CL_sRGBx:
             return 3;

         case CL_RGBA:
         case CL_ARGB:
         case CL_BGRA:
         case CL_sRGBA:
         case CL_sBGRA:
         case CL_ABGR:
 #ifdef CL_1RGB_APPLE
         case CL_1RGB_APPLE:
 #endif
 #ifdef CL_BGR1_APPLE
         case CL_BGR1_APPLE:
 #endif
 #ifdef CL_ABGR_APPLE
         case CL_ABGR_APPLE:
 #endif
           return 4;

         default:
           log_error("%s does not support 0x%x\n",__FUNCTION__,order);
           return 0;
     }
 }

 cl_channel_type  get_channel_type_from_name( const char *name )
 {
     struct {
         cl_channel_type type;
         const char *name;
     } typeNames[] = {
         { CL_SNORM_INT8, "CL_SNORM_INT8" },
         { CL_SNORM_INT16, "CL_SNORM_INT16" },
         { CL_UNORM_INT8, "CL_UNORM_INT8" },
         { CL_UNORM_INT16, "CL_UNORM_INT16" },
         { CL_UNORM_INT24, "CL_UNORM_INT24" },
         { CL_UNORM_SHORT_565, "CL_UNORM_SHORT_565" },
         { CL_UNORM_SHORT_555, "CL_UNORM_SHORT_555" },
         { CL_UNORM_INT_101010, "CL_UNORM_INT_101010" },
         { CL_SIGNED_INT8, "CL_SIGNED_INT8" },
         { CL_SIGNED_INT16, "CL_SIGNED_INT16" },
         { CL_SIGNED_INT32, "CL_SIGNED_INT32" },
         { CL_UNSIGNED_INT8, "CL_UNSIGNED_INT8" },
         { CL_UNSIGNED_INT16, "CL_UNSIGNED_INT16" },
         { CL_UNSIGNED_INT32, "CL_UNSIGNED_INT32" },
         { CL_HALF_FLOAT, "CL_HALF_FLOAT" },
         { CL_FLOAT, "CL_FLOAT" },
 #ifdef CL_SFIXED14_APPLE
         { CL_SFIXED14_APPLE, "CL_SFIXED14_APPLE" }
 #endif
     };
     for( size_t i = 0; i < sizeof( typeNames ) / sizeof( typeNames[ 0 ] ); i++ )
     {
         if( strcmp( typeNames[ i ].name, name ) == 0 || strcmp( typeNames[ i ].name + 3, name ) == 0 )
             return typeNames[ i ].type;
     }
     return (cl_channel_type)-1;
 }

 cl_channel_order  get_channel_order_from_name( const char *name )
 {
     const struct
     {
         cl_channel_order    order;
         const char          *name;
     }orderNames[] =
     {
         { CL_R, "CL_R" },
         { CL_A, "CL_A" },
         { CL_Rx, "CL_Rx" },
         { CL_RG, "CL_RG" },
         { CL_RA, "CL_RA" },
         { CL_RGx, "CL_RGx" },
         { CL_RGB, "CL_RGB" },
         { CL_RGBx, "CL_RGBx" },
         { CL_RGBA, "CL_RGBA" },
         { CL_BGRA, "CL_BGRA" },
         { CL_ARGB, "CL_ARGB" },
         { CL_INTENSITY, "CL_INTENSITY"},
         { CL_LUMINANCE, "CL_LUMINANCE"},
         { CL_DEPTH, "CL_DEPTH" },
         { CL_DEPTH_STENCIL, "CL_DEPTH_STENCIL" },
         { CL_sRGB, "CL_sRGB" },
         { CL_sRGBx, "CL_sRGBx" },
         { CL_sRGBA, "CL_sRGBA" },
         { CL_sBGRA, "CL_sBGRA" },
         { CL_ABGR, "CL_ABGR" },
 #ifdef CL_1RGB_APPLE
         { CL_1RGB_APPLE, "CL_1RGB_APPLE" },
 #endif
 #ifdef CL_BGR1_APPLE
         { CL_BGR1_APPLE, "CL_BGR1_APPLE" },
 #endif
     };

     for( size_t i = 0; i < sizeof( orderNames ) / sizeof( orderNames[ 0 ] ); i++ )
     {
         if( strcmp( orderNames[ i ].name, name ) == 0 || strcmp( orderNames[ i ].name + 3, name ) == 0 )
             return orderNames[ i ].order;
     }
     return (cl_channel_order)-1;
 }


 int is_format_signed( const cl_image_format *format )
 {
     switch( format->image_channel_data_type )
     {
         case CL_SNORM_INT8:
         case CL_SIGNED_INT8:
         case CL_SNORM_INT16:
         case CL_SIGNED_INT16:
         case CL_SIGNED_INT32:
         case CL_HALF_FLOAT:
         case CL_FLOAT:
 #ifdef CL_SFIXED14_APPLE
         case CL_SFIXED14_APPLE:
 #endif
             return 1;

         default:
             return 0;
     }
 }

 size_t get_pixel_size( cl_image_format *format )
 {
   switch( format->image_channel_data_type )
   {
     case CL_SNORM_INT8:
     case CL_UNORM_INT8:
     case CL_SIGNED_INT8:
     case CL_UNSIGNED_INT8:
       return get_format_channel_count( format );

     case CL_SNORM_INT16:
     case CL_UNORM_INT16:
     case CL_SIGNED_INT16:
     case CL_UNSIGNED_INT16:
     case CL_HALF_FLOAT:
 #ifdef  CL_SFIXED14_APPLE
         case CL_SFIXED14_APPLE:
 #endif
       return get_format_channel_count( format ) * sizeof( cl_ushort );

     case CL_SIGNED_INT32:
     case CL_UNSIGNED_INT32:
       return get_format_channel_count( format ) * sizeof( cl_int );

     case CL_UNORM_SHORT_565:
     case CL_UNORM_SHORT_555:
 #ifdef OBSOLETE_FORAMT
     case CL_UNORM_SHORT_565_REV:
     case CL_UNORM_SHORT_555_REV:
 #endif
       return 2;

 #ifdef OBSOLETE_FORAMT
     case CL_UNORM_INT_8888:
     case CL_UNORM_INT_8888_REV:
       return 4;
 #endif

     case CL_UNORM_INT_101010:
 #ifdef OBSOLETE_FORAMT
     case CL_UNORM_INT_101010_REV:
 #endif
       return 4;

     case CL_FLOAT:
       return get_format_channel_count( format ) * sizeof( cl_float );

     default:
       return 0;
   }
 }

 int get_8_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
 {
     cl_image_format formatList[ 128 ];
     unsigned int outFormatCount, i;
     int error;


     /* Make sure each image format is supported */
     if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
     return error;


     /* Look for one that is an 8-bit format */
     for( i = 0; i < outFormatCount; i++ )
     {
         if( formatList[ i ].image_channel_data_type == CL_SNORM_INT8 ||
        formatList[ i ].image_channel_data_type == CL_UNORM_INT8 ||
            formatList[ i ].image_channel_data_type == CL_SIGNED_INT8 ||
            formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT8 )
         {
       if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
       {
         *outFormat = formatList[ i ];
         return 0;
       }
         }
     }

     return -1;
 }

 int get_32_bit_image_format( cl_context context, cl_mem_object_type objType, cl_mem_flags flags, size_t channelCount, cl_image_format *outFormat )
 {
     cl_image_format formatList[ 128 ];
     unsigned int outFormatCount, i;
     int error;


   /* Make sure each image format is supported */
   if ((error = clGetSupportedImageFormats( context, flags, objType, 128, formatList, &outFormatCount )))
     return error;

   /* Look for one that is an 8-bit format */
   for( i = 0; i < outFormatCount; i++ )
   {
         if( formatList[ i ].image_channel_data_type == CL_UNORM_INT_101010 ||
             formatList[ i ].image_channel_data_type == CL_FLOAT ||
             formatList[ i ].image_channel_data_type == CL_SIGNED_INT32 ||
             formatList[ i ].image_channel_data_type == CL_UNSIGNED_INT32 )
     {
       if ( !channelCount || ( channelCount && ( get_format_channel_count( &formatList[ i ] ) == channelCount ) ) )
       {
         *outFormat = formatList[ i ];
         return 0;
       }
     }
     }

     return -1;
 }

 int random_log_in_range( int minV, int maxV, MTdata d  )
 {
     double v = log2( ( (double)genrand_int32(d) / (double)0xffffffff ) + 1 );
     int iv = (int)( (float)( maxV - minV ) * v );
     return iv + minV;
 }


 // Define the addressing functions
 typedef int (*AddressFn)( int value, size_t maxValue );

 int         NoAddressFn( int value, size_t maxValue )               { return value; }
 int         RepeatAddressFn( int value, size_t maxValue )
 {
     if( value < 0 )
         value += (int)maxValue;
     else if( value >= (int)maxValue )
         value -= (int)maxValue;
     return value;
 }
 int         MirroredRepeatAddressFn( int value, size_t maxValue )
 {
     if( value < 0 )
         value  = 0;
     else if( (size_t) value >= maxValue )
         value = (int) (maxValue - 1);
     return value;
 }
 int         ClampAddressFn( int value, size_t maxValue )            { return ( value < -1 ) ? -1 : ( ( value > (cl_long) maxValue ) ? (int)maxValue : value ); }
 int         ClampToEdgeNearestFn( int value, size_t maxValue )  { return ( value < 0 ) ? 0 : ( ( (size_t)value > maxValue - 1 ) ? (int)maxValue - 1 : value ); }
 AddressFn   ClampToEdgeLinearFn                                                 = ClampToEdgeNearestFn;

 // Note: normalized coords get repeated in normalized space, not unnormalized space! hence the special case here
 volatile float gFloatHome;
 float           RepeatNormalizedAddressFn( float fValue, size_t maxValue )
 {
 #ifndef _MSC_VER // Use original if not the VS compiler.
     // General computation for repeat
     return (fValue - floorf( fValue )) * (float) maxValue; // Reduce to [0, 1.f]
 #else // Otherwise, use this instead:
     // Home the subtraction to a float to break up the sequence of x87
     // instructions emitted by the VS compiler.
     gFloatHome = fValue - floorf(fValue);
     return gFloatHome * (float)maxValue;
 #endif
 }

 float           MirroredRepeatNormalizedAddressFn( float fValue, size_t maxValue )
 {
     // Round to nearest multiple of two
     float s_prime = 2.0f * rintf( fValue * 0.5f );        // Note halfway values flip flop here due to rte, but they both end up pointing the same place at the end of the day

     // Reduce to [-1, 1], Apply mirroring -> [0, 1]
     s_prime = fabsf( fValue - s_prime );

     // un-normalize
     return s_prime * (float) maxValue;
 }

 struct AddressingTable
 {
     AddressingTable()
     {
         ct_assert( ( CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE < 6 ) );
         ct_assert( CL_FILTER_NEAREST - CL_FILTER_LINEAR < 2 );

         mTable[ CL_ADDRESS_NONE - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ]            = NoAddressFn;
         mTable[ CL_ADDRESS_NONE - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ]             = NoAddressFn;
         mTable[ CL_ADDRESS_REPEAT - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ]          = RepeatAddressFn;
         mTable[ CL_ADDRESS_REPEAT - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ]           = RepeatAddressFn;
         mTable[ CL_ADDRESS_CLAMP_TO_EDGE - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ]   = ClampToEdgeNearestFn;
         mTable[ CL_ADDRESS_CLAMP_TO_EDGE - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ]    = ClampToEdgeLinearFn;
         mTable[ CL_ADDRESS_CLAMP - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ]           = ClampAddressFn;
         mTable[ CL_ADDRESS_CLAMP - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ]            = ClampAddressFn;
         mTable[ CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE ][ CL_FILTER_NEAREST - CL_FILTER_NEAREST ] = MirroredRepeatAddressFn;
         mTable[ CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE ][ CL_FILTER_LINEAR - CL_FILTER_NEAREST ]  = MirroredRepeatAddressFn;
     }

     AddressFn operator[]( image_sampler_data *sampler )
     {
         return mTable[ (int)sampler->addressing_mode - CL_ADDRESS_NONE ][ (int)sampler->filter_mode - CL_FILTER_NEAREST ];
     }

     AddressFn mTable[ 6 ][ 2 ];
 };

 static AddressingTable  sAddressingTable;

 bool is_sRGBA_order(cl_channel_order image_channel_order){
     switch (image_channel_order) {
         case CL_sRGB:
         case CL_sRGBx:
         case CL_sRGBA:
         case CL_sBGRA:
             return true;
         default:
             return false;
     }
 }

 // Format helpers

 int has_alpha(cl_image_format *format) {
     switch (format->image_channel_order) {
         case CL_R:
             return 0;
         case CL_A:
             return 1;
         case CL_Rx:
             return 0;
         case CL_RG:
             return 0;
         case CL_RA:
             return 1;
         case CL_RGx:
             return 0;
         case CL_RGB:
         case CL_sRGB:
             return 0;
         case CL_RGBx:
         case CL_sRGBx:
             return 0;
         case CL_RGBA:
             return 1;
         case CL_BGRA:
             return 1;
         case CL_ARGB:
             return 1;
         case CL_INTENSITY:
             return 1;
         case CL_LUMINANCE:
             return 0;
 #ifdef CL_BGR1_APPLE
         case CL_BGR1_APPLE: return 1;
 #endif
 #ifdef CL_1RGB_APPLE
         case CL_1RGB_APPLE: return 1;
 #endif
         case CL_sRGBA:
         case CL_sBGRA:
             return 1;
         case CL_DEPTH:
             return 0;
         default:
             log_error("Invalid image channel order: %d\n", format->image_channel_order);
             return 0;
     }

 }

 #define PRINT_MAX_SIZE_LOGIC 0

 #define SWAP( _a, _b )      do{ _a ^= _b; _b ^= _a; _a ^= _b; }while(0)
 #ifndef MAX
     #define MAX( _a, _b )   ((_a) > (_b) ? (_a) : (_b))
 #endif

 void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
                    size_t sizes[][3], size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize,
                    const cl_ulong maxIndividualAllocSize,       // CL_DEVICE_MAX_MEM_ALLOC_SIZE
                    const cl_ulong maxTotalAllocSize,            // CL_DEVICE_GLOBAL_MEM_SIZE
                    cl_mem_object_type image_type, cl_image_format *format, int usingMaxPixelSizeBuffer) {

     bool is3D = (image_type == CL_MEM_OBJECT_IMAGE3D);
     bool isArray = (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY);

     // Validate we have a reasonable max depth for 3D
     if (is3D && maxDepth < 2) {
         log_error("ERROR: Requesting max image sizes for 3D images when max depth is < 2.\n");
         *numberOfSizes = 0;
         return;
     }
     // Validate we have a reasonable max array size for 1D & 2D image arrays
     if (isArray && maxArraySize < 2) {
         log_error("ERROR: Requesting max image sizes for an image array when max array size is < 1.\n");
         *numberOfSizes = 0;
         return;
     }

     // Reduce the maximum because we are trying to test the max image dimensions, not the memory allocation
     cl_ulong adjustedMaxTotalAllocSize = maxTotalAllocSize / 4;
     cl_ulong adjustedMaxIndividualAllocSize = maxIndividualAllocSize / 4;
     log_info("Note: max individual allocation adjusted down from %gMB to %gMB and max total allocation adjusted down from %gMB to %gMB.\n",
              maxIndividualAllocSize/(1024.0*1024.0), adjustedMaxIndividualAllocSize/(1024.0*1024.0),
              maxTotalAllocSize/(1024.0*1024.0), adjustedMaxTotalAllocSize/(1024.0*1024.0));

     // Cap our max allocation to 1.0GB.
     // FIXME -- why?  In the interest of not taking a long time?  We should still test this stuff...
     if (adjustedMaxTotalAllocSize > (cl_ulong)1024*1024*1024) {
       adjustedMaxTotalAllocSize = (cl_ulong)1024*1024*1024;
       log_info("Limiting max total allocation size to %gMB (down from %gMB) for test.\n",
         adjustedMaxTotalAllocSize/(1024.0*1024.0), maxTotalAllocSize/(1024.0*1024.0));
     }

     cl_ulong maxAllocSize = adjustedMaxIndividualAllocSize;
     if (adjustedMaxTotalAllocSize < adjustedMaxIndividualAllocSize*2)
         maxAllocSize = adjustedMaxTotalAllocSize/2;

     size_t raw_pixel_size = get_pixel_size(format);
     // If the test will be creating input (src) buffer of type int4 or float4, number of pixels will be
     // governed by sizeof(int4 or float4) and not sizeof(dest fomat)
     // Also if pixel size is 12 bytes i.e. RGB or RGBx, we adjust it to 16 bytes as GPUs has no concept
     // of 3 channel images. GPUs expand these to four channel RGBA.
     if(usingMaxPixelSizeBuffer || raw_pixel_size == 12)
       raw_pixel_size = 16;
     size_t max_pixels = (size_t)maxAllocSize / raw_pixel_size;

     log_info("Maximums: [%ld x %ld x %ld], raw pixel size %lu bytes, per-allocation limit %gMB.\n",
              maxWidth, maxHeight, isArray ? maxArraySize : maxDepth, raw_pixel_size, (maxAllocSize/(1024.0*1024.0)));

   // Keep track of the maximum sizes for each dimension
   size_t maximum_sizes[] = { maxWidth, maxHeight, maxDepth };

   switch (image_type) {
     case CL_MEM_OBJECT_IMAGE1D_ARRAY:
       maximum_sizes[1] = maxArraySize;
       maximum_sizes[2] = 1;
       break;
     case CL_MEM_OBJECT_IMAGE2D_ARRAY:
       maximum_sizes[2] = maxArraySize;
       break;
   }


   // Given one fixed sized dimension, this code finds one or two other dimensions,
   // both with very small size, such that the size does not exceed the maximum
   // passed to this function

 #if defined(__x86_64) || defined (__arm64__) || defined (__ppc64__)
   size_t other_sizes[] = { 2, 3, 5, 6, 7, 9, 10, 11, 13, 15};
 #else
   size_t other_sizes[] = { 2, 3, 5, 6, 7, 9, 11, 13};
 #endif

   static size_t other_size = 0;
   enum { num_other_sizes = sizeof(other_sizes)/sizeof(size_t) };

   (*numberOfSizes) = 0;

   if (image_type == CL_MEM_OBJECT_IMAGE1D) {

     double M = maximum_sizes[0];

     // Store the size
     sizes[(*numberOfSizes)][0] = (size_t)M;
     sizes[(*numberOfSizes)][1] = 1;
     sizes[(*numberOfSizes)][2] = 1;
     ++(*numberOfSizes);
   }

   else if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D) {

     for (int fixed_dim=0;fixed_dim<2;++fixed_dim) {

       // Determine the size of the fixed dimension
       double M = maximum_sizes[fixed_dim];
       double A = max_pixels;

       int x0_dim = !fixed_dim;
       double x0  = fmin(fmin(other_sizes[(other_size++)%num_other_sizes],A/M), maximum_sizes[x0_dim]);

       // Store the size
       sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
       sizes[(*numberOfSizes)][x0_dim]    = (size_t)x0;
       sizes[(*numberOfSizes)][2]         = 1;
       ++(*numberOfSizes);
     }
   }

   else if (image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE3D) {

     // Iterate over dimensions, finding sizes for the non-fixed dimension
     for (int fixed_dim=0;fixed_dim<3;++fixed_dim) {

       // Determine the size of the fixed dimension
       double M = maximum_sizes[fixed_dim];
       double A = max_pixels;

       // Find two other dimensions, x0 and x1
       int x0_dim = (fixed_dim == 0) ? 1 : 0;
       int x1_dim = (fixed_dim == 2) ? 1 : 2;

       // Choose two other sizes for these dimensions
       double x0 = fmin(fmin(A/M,maximum_sizes[x0_dim]),other_sizes[(other_size++)%num_other_sizes]);
       // GPUs have certain restrictions on minimum width (row alignment) of images which has given us issues
       // testing small widths in this test (say we set width to 3 for testing, and compute size based on this width and decide
       // it fits within vram ... but GPU driver decides that, due to row alignment requirements, it has to use
       // width of 16 which doesnt fit in vram). For this purpose we are not testing width < 16 for this test.
       if(x0_dim == 0 && x0 < 16)
         x0 = 16;
       double x1 = fmin(fmin(A/M/x0,maximum_sizes[x1_dim]),other_sizes[(other_size++)%num_other_sizes]);

       // Valid image sizes cannot be below 1. Due to the workaround for the xo_dim where x0 is overidden to 16
       // there might not be enough space left for x1 dimension. This could be a fractional 0.x size that when cast to
       // integer would result in a value 0. In these cases we clamp the size to a minimum of 1.
       if ( x1 < 1 )
         x1 = 1;

       // M and x0 cannot be '0' as they derive from clDeviceInfo calls
       assert(x0 > 0 && M > 0);

       // Store the size
       sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
       sizes[(*numberOfSizes)][x0_dim]    = (size_t)x0;
       sizes[(*numberOfSizes)][x1_dim]    = (size_t)x1;
       ++(*numberOfSizes);
     }
   }

   // Log the results
   for (int j=0; j<(int)(*numberOfSizes); j++) {
     switch (image_type) {
       case CL_MEM_OBJECT_IMAGE1D:
         log_info(" size[%d] = [%ld] (%g MB image)\n",
                  j, sizes[j][0], raw_pixel_size*sizes[j][0]*sizes[j][1]*sizes[j][2]/(1024.0*1024.0));
         break;
       case CL_MEM_OBJECT_IMAGE1D_ARRAY:
       case CL_MEM_OBJECT_IMAGE2D:
         log_info(" size[%d] = [%ld %ld] (%g MB image)\n",
                  j, sizes[j][0], sizes[j][1], raw_pixel_size*sizes[j][0]*sizes[j][1]*sizes[j][2]/(1024.0*1024.0));
         break;
       case CL_MEM_OBJECT_IMAGE2D_ARRAY:
       case CL_MEM_OBJECT_IMAGE3D:
         log_info(" size[%d] = [%ld %ld %ld] (%g MB image)\n",
                  j, sizes[j][0], sizes[j][1], sizes[j][2], raw_pixel_size*sizes[j][0]*sizes[j][1]*sizes[j][2]/(1024.0*1024.0));
         break;
     }
   }
 }

 float get_max_absolute_error( cl_image_format *format, image_sampler_data *sampler) {
     if (sampler->filter_mode == CL_FILTER_NEAREST)
         return 0.0f;

     switch (format->image_channel_data_type) {
         case CL_SNORM_INT8:
             return 1.0f/127.0f;
         case CL_UNORM_INT8:
             return 1.0f/255.0f;
         case CL_UNORM_INT16:
             return 1.0f/65535.0f;
         case CL_SNORM_INT16:
             return 1.0f/32767.0f;
         case CL_FLOAT:
             return CL_FLT_MIN;
 #ifdef  CL_SFIXED14_APPLE
         case CL_SFIXED14_APPLE:
             return 0x1.0p-14f;
 #endif
         default:
             return 0.0f;
     }
 }

 float get_max_relative_error( cl_image_format *format, image_sampler_data *sampler, int is3D, int isLinearFilter )
 {
     float maxError = 0.0f;
     float sampleCount = 1.0f;
     if( isLinearFilter )
         sampleCount =  is3D ? 8.0f : 4.0f;

     // Note that the ULP is defined here as the unit in the last place of the maximum
     // magnitude sample used for filtering.

     // Section 8.3
     switch( format->image_channel_data_type )
     {
             // The spec allows 2 ulps of error for normalized formats
         case CL_SNORM_INT8:
         case CL_UNORM_INT8:
         case CL_SNORM_INT16:
         case CL_UNORM_INT16:
         case CL_UNORM_SHORT_565:
         case CL_UNORM_SHORT_555:
         case CL_UNORM_INT_101010:
             maxError = 2*FLT_EPSILON*sampleCount;       // Maximum sampling error for round to zero normalization based on multiplication
             // by reciprocal (using reciprocal generated in round to +inf mode, so that 1.0 matches spec)
             break;

             // If the implementation supports these formats then it will have to allow rounding error here too,
             // because not all 32-bit ints are exactly representable in float
         case CL_SIGNED_INT32:
         case CL_UNSIGNED_INT32:
             maxError = 1*FLT_EPSILON;
             break;
     }


     // Section 8.2
     if( sampler->addressing_mode == CL_ADDRESS_REPEAT || sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT || sampler->filter_mode != CL_FILTER_NEAREST || sampler->normalized_coords )
 #if defined( __APPLE__ )
     {
         if( sampler->filter_mode != CL_FILTER_NEAREST )
         {
             // The maximum
             if( gDeviceType == CL_DEVICE_TYPE_GPU )
                 maxError += MAKE_HEX_FLOAT(0x1.0p-4f, 0x1L, -4);              // Some GPUs ain't so accurate
             else
                 // The standard method of 2d linear filtering delivers 4.0 ulps of error in round to nearest (8 in rtz).
                 maxError += 4.0f * FLT_EPSILON;
         }
         else
             maxError += 4.0f * FLT_EPSILON;    // normalized coordinates will introduce some error into the fractional part of the address, affecting results
     }
 #else
     {
 #if !defined(_WIN32)
 #warning Implementations will likely wish to pick a max allowable sampling error policy here that is better than the spec
 #endif
         // The spec allows linear filters to return any result most of the time.
         // That's fine for implementations but a problem for testing. After all
         // users aren't going to like garbage images.  We have "picked a number"
         // here that we are going to attempt to conform to. Implementations are
         // free to pick another number, like infinity, if they like.
         // We picked a number for you, to provide /some/ sanity
         maxError = MAKE_HEX_FLOAT(0x1.0p-7f, 0x1L, -7);
         // ...but this is what the spec allows:
         // maxError = INFINITY;
         // Please feel free to pick any positive number. (NaN wont work.)
     }
 #endif

     // The error calculation itself can introduce error
     maxError += FLT_EPSILON * 2;

     return maxError;
 }

 size_t get_format_max_int( cl_image_format *format )
 {
     switch( format->image_channel_data_type )
     {
         case CL_SNORM_INT8:
         case CL_SIGNED_INT8:
             return 127;
         case CL_UNORM_INT8:
         case CL_UNSIGNED_INT8:
             return 255;

         case CL_SNORM_INT16:
         case CL_SIGNED_INT16:
             return 32767;

         case CL_UNORM_INT16:
         case CL_UNSIGNED_INT16:
             return 65535;

         case CL_SIGNED_INT32:
             return 2147483647L;

         case CL_UNSIGNED_INT32:
             return 4294967295LL;

         case CL_UNORM_SHORT_565:
         case CL_UNORM_SHORT_555:
             return 31;

         case CL_UNORM_INT_101010:
             return 1023;

         case CL_HALF_FLOAT:
             return 1<<10;

 #ifdef CL_SFIXED14_APPLE
         case CL_SFIXED14_APPLE:
             return 16384;
 #endif
         default:
             return 0;
     }
 }

 int get_format_min_int( cl_image_format *format )
 {
     switch( format->image_channel_data_type )
     {
         case CL_SNORM_INT8:
         case CL_SIGNED_INT8:
             return -128;
         case CL_UNORM_INT8:
         case CL_UNSIGNED_INT8:
             return 0;

         case CL_SNORM_INT16:
         case CL_SIGNED_INT16:
             return -32768;

         case CL_UNORM_INT16:
         case CL_UNSIGNED_INT16:
             return 0;

         case CL_SIGNED_INT32:
             return -2147483648LL;

         case CL_UNSIGNED_INT32:
             return 0;

         case CL_UNORM_SHORT_565:
         case CL_UNORM_SHORT_555:
         case CL_UNORM_INT_101010:
             return 0;

         case CL_HALF_FLOAT: return -(1 << 10);

 #ifdef CL_SFIXED14_APPLE
         case CL_SFIXED14_APPLE:
             return -16384;
 #endif

         default:
             return 0;
     }
 }

 float convert_half_to_float( unsigned short halfValue )
 {
     // We have to take care of a few special cases, but in general, we just extract
     // the same components from the half that exist in the float and re-stuff them
     // For a description of the actual half format, see http://en.wikipedia.org/wiki/Half_precision
     // Note: we store these in 32-bit ints to make the bit manipulations easier later
     int sign =     ( halfValue >> 15 ) & 0x0001;
     int exponent = ( halfValue >> 10 ) & 0x001f;
     int mantissa = ( halfValue )       & 0x03ff;

     // Note: we use a union here to be able to access the bits of a float directly
     union
     {
         unsigned int bits;
         float floatValue;
     } outFloat;

     // Special cases first
     if( exponent == 0 )
     {
         if( mantissa == 0 )
         {
             // If both exponent and mantissa are 0, the number is +/- 0
             outFloat.bits  = sign << 31;
             return outFloat.floatValue; // Already done!
         }

         // If exponent is 0, it's a denormalized number, so we renormalize it
         // Note: this is not terribly efficient, but oh well
         while( ( mantissa & 0x00000400 ) == 0 )
         {
             mantissa <<= 1;
             exponent--;
         }

         // The first bit is implicit, so we take it off and inc the exponent accordingly
         exponent++;
         mantissa &= ~(0x00000400);
     }
     else if( exponent == 31 ) // Special-case "numbers"
     {
         // If the exponent is 31, it's a special case number (+/- infinity or NAN).
         // If the mantissa is 0, it's infinity, else it's NAN, but in either case, the packing
         // method is the same
         outFloat.bits = ( sign << 31 ) | 0x7f800000 | ( mantissa << 13 );
         return outFloat.floatValue;
     }

     // Plain ol' normalized number, so adjust to the ranges a 32-bit float expects and repack
     exponent += ( 127 - 15 );
     mantissa <<= 13;

     outFloat.bits = ( sign << 31 ) | ( exponent << 23 ) | mantissa;
     return outFloat.floatValue;
 }


 cl_ushort convert_float_to_half( float f )
 {
     switch( gFloatToHalfRoundingMode )
     {
         case kRoundToNearestEven:
             return float2half_rte( f );
         case kRoundTowardZero:
             return float2half_rtz( f );
         default:
             log_error( "ERROR: Test internal error -- unhandled or unknown float->half rounding mode.\n" );
             exit(-1);
             return 0xffff;
     }

 }

 cl_ushort float2half_rte( float f )
     {
     union{ float f; cl_uint u; } u = {f};
     cl_uint sign = (u.u >> 16) & 0x8000;
     float x = fabsf(f);

     //Nan
     if( x != x )
     {
         u.u >>= (24-11);
         u.u &= 0x7fff;
         u.u |= 0x0200;      //silence the NaN
         return u.u | sign;
                 }

     // overflow
     if( x >= MAKE_HEX_FLOAT(0x1.ffep15f, 0x1ffeL, 3) )
         return 0x7c00 | sign;

     // underflow
     if( x <= MAKE_HEX_FLOAT(0x1.0p-25f, 0x1L, -25) )
         return sign;    // The halfway case can return 0x0001 or 0. 0 is even.

     // very small
     if( x < MAKE_HEX_FLOAT(0x1.8p-24f, 0x18L, -28) )
         return sign | 1;

     // half denormal
     if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
     {
         u.f = x * MAKE_HEX_FLOAT(0x1.0p-125f, 0x1L, -125);
         return sign | u.u;
         }

     u.f *= MAKE_HEX_FLOAT(0x1.0p13f, 0x1L, 13);
     u.u &= 0x7f800000;
     x += u.f;
     u.f = x - u.f;
     u.f *= MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);

     return (u.u >> (24-11)) | sign;
     }

 cl_ushort float2half_rtz( float f )
     {
     union{ float f; cl_uint u; } u = {f};
     cl_uint sign = (u.u >> 16) & 0x8000;
     float x = fabsf(f);

     //Nan
     if( x != x )
         {
         u.u >>= (24-11);
         u.u &= 0x7fff;
         u.u |= 0x0200;      //silence the NaN
         return u.u | sign;
         }

     // overflow
     if( x >= MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16) )
         {
         if( x == INFINITY )
             return 0x7c00 | sign;

         return 0x7bff | sign;
         }

     // underflow
     if( x < MAKE_HEX_FLOAT(0x1.0p-24f, 0x1L, -24) )
         return sign;    // The halfway case can return 0x0001 or 0. 0 is even.

     // half denormal
     if( x < MAKE_HEX_FLOAT(0x1.0p-14f, 0x1L, -14) )
     {
         x *= MAKE_HEX_FLOAT(0x1.0p24f, 0x1L, 24);
         return (cl_ushort)((int) x | sign);
     }

     u.u &= 0xFFFFE000U;
     u.u -= 0x38000000U;

     return (u.u >> (24-11)) | sign;
 }

 class TEST
 {
 public:
     TEST();
 };

 static TEST t;
 void  __vstore_half_rte(float f, size_t index, uint16_t *p)
 {
     union{ unsigned int u; float f;} u;

     u.f = f;
     unsigned short r = (u.u >> 16) & 0x8000;
     u.u &= 0x7fffffff;
     if( u.u >= 0x33000000U )
     {
         if( u.u >= 0x47800000 )
         {
             if( u.u <= 0x7f800000 )
                 r |= 0x7c00;
             else
             {
                 r |= 0x7e00 | ( (u.u >> 13) & 0x3ff );
             }
         }
         else
         {
             float x = u.f;
             if( u.u < 0x38800000 )
                 u.u = 0x3f000000;
             else
                 u.u += 0x06800000;
             u.u &= 0x7f800000U;
             x += u.f;
             x -= u.f;
             u.f = x * MAKE_HEX_FLOAT(0x1.0p-112f, 0x1L, -112);
             u.u >>= 13;
             r |= (unsigned short) u.u;
         }
     }

     ((unsigned short*)p)[index] = r;
 }

 TEST::TEST()
 {
     return;
     union
     {
         float f;
         uint32_t i;
     } test;
     uint16_t control, myval;

     log_info(" &&&&&&&&&&&&&&&&&&&&&&&&&&&& TESTING HALFS &&&&&&&&&&&&&&&&&&&&\n" );
     test.i = 0;
     do
     {
         if( ( test.i & 0xffffff ) == 0 )
         {
             if( ( test.i & 0xfffffff ) == 0 )
                 log_info( "*" );
             else
                 log_info( "." );
             fflush(stdout);
         }
         __vstore_half_rte( test.f, 0, &control );
         myval = convert_float_to_half( test.f );
         if( myval != control )
         {
             log_info( "\n******** ERROR: MyVal %04x control %04x source %12.24f\n", myval, control, test.f );
             log_info( "         source bits: %08x   %a\n", test.i, test.f );
             float t, c;
             c = convert_half_to_float( control );
             t = convert_half_to_float( myval );
             log_info( "         converted control: %12.24f myval: %12.24f\n", c, t );
         }
         test.i++;
     } while( test.i != 0 );
     log_info("\n &&&&&&&&&&&&&&&&&&&&&&&&&&&& TESTING HALFS &&&&&&&&&&&&&&&&&&&&\n" );

 }

 cl_ulong get_image_size( image_descriptor const *imageInfo )
 {
     cl_ulong imageSize;

     // Assumes rowPitch and slicePitch are always correctly defined
     if ( /*gTestMipmaps*/ imageInfo->num_mip_levels > 1 )
     {
       imageSize = (size_t) compute_mipmapped_image_size(*imageInfo);
     }
     else
     {
       switch (imageInfo->type)
       {
       case CL_MEM_OBJECT_IMAGE1D:
         imageSize = imageInfo->rowPitch;
         break;
       case CL_MEM_OBJECT_IMAGE2D:
         imageSize = imageInfo->height * imageInfo->rowPitch;
         break;
       case CL_MEM_OBJECT_IMAGE3D:
         imageSize = imageInfo->depth * imageInfo->slicePitch;
         break;
       case CL_MEM_OBJECT_IMAGE1D_ARRAY:
         imageSize = imageInfo->arraySize * imageInfo->slicePitch;
         break;
       case CL_MEM_OBJECT_IMAGE2D_ARRAY:
         imageSize = imageInfo->arraySize * imageInfo->slicePitch;
         break;
       default:
         log_error("ERROR: Cannot identify image type %x\n", imageInfo->type);
         abort();
       }
     }
     return imageSize;
 }

 // Calculate image size in megabytes (strictly, mebibytes). Result is rounded up.
 cl_ulong get_image_size_mb( image_descriptor const *imageInfo )
 {
     cl_ulong imageSize = get_image_size( imageInfo );
     cl_ulong mb = imageSize / ( 1024 * 1024 );
     if ( imageSize % ( 1024 * 1024 ) > 0 )
     {
         mb += 1;
     }
     return  mb;
 }


 uint64_t gRoundingStartValue = 0;


 void escape_inf_nan_values( char* data, size_t allocSize ) {
     // filter values with 8 not-quite-highest bits
     unsigned int *intPtr = (unsigned int *)data;
     for( size_t i = 0; i < allocSize >> 2; i++ )
     {
         if( ( intPtr[ i ] & 0x7F800000 ) == 0x7F800000 )
             intPtr[ i ] ^= 0x40000000;
     }

     // Ditto with half floats (16-bit numbers with the 5 not-quite-highest bits = 0x7C00 are special)
     unsigned short *shortPtr = (unsigned short *)data;
     for( size_t i = 0; i < allocSize >> 1; i++ )
     {
         if( ( shortPtr[ i ] & 0x7C00 ) == 0x7C00 )
             shortPtr[ i ] ^= 0x4000;
     }
 }

 char * generate_random_image_data( image_descriptor *imageInfo, BufferOwningPtr<char> &P, MTdata d )
 {
     size_t allocSize = get_image_size( imageInfo );
     size_t pixelRowBytes = imageInfo->width * get_pixel_size( imageInfo->format );
     size_t i;

     if (imageInfo->num_mip_levels > 1)
       allocSize = compute_mipmapped_image_size(*imageInfo);

 #if defined (__APPLE__ )
     char *data = NULL;
     if (gDeviceType == CL_DEVICE_TYPE_CPU) {
         size_t mapSize = ((allocSize + 4095L) & -4096L) + 8192;

         void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
         intptr_t data_end = (intptr_t)map + mapSize - 4096;
         data = (char *)(data_end - (intptr_t)allocSize);

         mprotect(map, 4096, PROT_NONE);
         mprotect((void *)((char *)map + mapSize - 4096), 4096, PROT_NONE);
         P.reset(data, map, mapSize,allocSize);
     } else {
         data = (char *)malloc(allocSize);
         P.reset(data,NULL,0,allocSize);
     }
 #else
     P.reset( NULL ); // Free already allocated memory first, then try to allocate new block.
     char *data = (char *)align_malloc(allocSize, get_pixel_size(imageInfo->format));
     P.reset(data,NULL,0,allocSize, true);
 #endif

     if (data == NULL) {
       log_error( "ERROR: Unable to malloc %lu bytes for generate_random_image_data\n", allocSize );
       return 0;
     }

     if( gTestRounding )
     {
         // Special case: fill with a ramp from 0 to the size of the type
         size_t typeSize = get_format_type_size( imageInfo->format );
         switch( typeSize )
         {
             case 1:
             {
                 char *ptr = data;
                 for( i = 0; i < allocSize; i++ )
                     ptr[i] = (cl_char) (i + gRoundingStartValue);
             }
                 break;
             case 2:
             {
                 cl_short *ptr = (cl_short*) data;
                 for( i = 0; i < allocSize / 2; i++ )
                     ptr[i] = (cl_short) (i +  gRoundingStartValue);
             }
                 break;
             case 4:
             {
                 cl_int *ptr = (cl_int*) data;
                 for( i = 0; i < allocSize / 4; i++ )
                     ptr[i] = (cl_int) (i +  gRoundingStartValue);
             }
                 break;
         }

         // Note: inf or nan float values would cause problems, although we don't know this will
         // actually be a float, so we just know what to look for
         escape_inf_nan_values( data, allocSize );
         return data;
     }

     // Otherwise, we should be able to just fill with random bits no matter what
     cl_uint *p = (cl_uint*) data;
     for( i = 0; i + 4 <= allocSize; i += 4 )
         p[ i / 4 ] = genrand_int32(d);

     for( ; i < allocSize; i++ )
         data[i] = genrand_int32(d);

     // Note: inf or nan float values would cause problems, although we don't know this will
     // actually be a float, so we just know what to look for
     escape_inf_nan_values( data, allocSize );

     if ( /*!gTestMipmaps*/ imageInfo->num_mip_levels < 2 )
     {
       // Fill unused edges with -1, NaN for float
       if (imageInfo->rowPitch > pixelRowBytes)
       {
           size_t height = 0;

           switch (imageInfo->type)
           {
               case CL_MEM_OBJECT_IMAGE2D:
               case CL_MEM_OBJECT_IMAGE3D:
               case CL_MEM_OBJECT_IMAGE2D_ARRAY:
                   height = imageInfo->height;
                   break;
               case CL_MEM_OBJECT_IMAGE1D_ARRAY:
                   height = imageInfo->arraySize;
                   break;
             }

             // Fill in the row padding regions
             for( i = 0; i < height; i++ )
             {
                 size_t offset = i * imageInfo->rowPitch + pixelRowBytes;
                 size_t length = imageInfo->rowPitch - pixelRowBytes;
                 memset( data + offset, 0xff, length );
             }
       }

       // Fill in the slice padding regions, if necessary:

       size_t slice_dimension = imageInfo->height;
       if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
           slice_dimension = imageInfo->arraySize;
       }

       if (imageInfo->slicePitch > slice_dimension*imageInfo->rowPitch)
       {
           size_t depth = 0;
           switch (imageInfo->type)
           {
             case CL_MEM_OBJECT_IMAGE2D:
             case CL_MEM_OBJECT_IMAGE3D:
                 depth = imageInfo->depth;
                 break;
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
                 depth = imageInfo->arraySize;
                 break;
           }

           for( i = 0; i < depth; i++ )
           {
               size_t offset = i * imageInfo->slicePitch + slice_dimension*imageInfo->rowPitch;
               size_t length = imageInfo->slicePitch - slice_dimension*imageInfo->rowPitch;
               memset( data + offset, 0xff, length );
           }
       }
     }

     return data;
 }

 #define CLAMP_FLOAT( v ) ( fmaxf( fminf( v, 1.f ), -1.f ) )


 void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
                             int x, int y, int z, float *outData, int lod )
 {
     size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth;
     size_t slice_pitch_lod = 0, row_pitch_lod = 0;

     if ( imageInfo->num_mip_levels > 1 )
     {
       switch(imageInfo->type)
       {
       case CL_MEM_OBJECT_IMAGE3D :
         depth_lod = ( imageInfo->depth >> lod ) ? ( imageInfo->depth >> lod ) : 1;
       case CL_MEM_OBJECT_IMAGE2D :
       case CL_MEM_OBJECT_IMAGE2D_ARRAY :
         height_lod = ( imageInfo->height >> lod ) ? ( imageInfo->height >> lod ) : 1;
       default :
         width_lod = ( imageInfo->width >> lod ) ? ( imageInfo->width >> lod ) : 1;
       }
       row_pitch_lod = width_lod * get_pixel_size(imageInfo->format);
       if ( imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY )
         slice_pitch_lod = row_pitch_lod;
       else if ( imageInfo->type == CL_MEM_OBJECT_IMAGE3D || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
         slice_pitch_lod = row_pitch_lod * height_lod;
     }
     else
     {
       row_pitch_lod = imageInfo->rowPitch;
       slice_pitch_lod = imageInfo->slicePitch;
     }
     if ( x < 0 || y < 0 || z < 0 || x >= (int)width_lod
                || ( height_lod != 0 && y >= (int)height_lod )
                || ( depth_lod != 0 && z >= (int)depth_lod )
                || ( imageInfo->arraySize != 0 && z >= (int)imageInfo->arraySize ) )
     {
             outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = outData[ 3 ] = 0;
             if (!has_alpha(imageInfo->format))
                 outData[3] = 1;
         return;
     }

     cl_image_format *format = imageInfo->format;

     unsigned int i;
     float tempData[ 4 ];

     // Advance to the right spot
     char *ptr = (char *)imageData;
     size_t pixelSize = get_pixel_size( format );

     ptr += z * slice_pitch_lod + y * row_pitch_lod + x * pixelSize;

     // OpenCL only supports reading floats from certain formats
     size_t channelCount = get_format_channel_count( format );
     switch( format->image_channel_data_type )
     {
         case CL_SNORM_INT8:
         {
             cl_char *dPtr = (cl_char *)ptr;
             for( i = 0; i < channelCount; i++ )
                 tempData[ i ] = CLAMP_FLOAT( (float)dPtr[ i ] / 127.0f );
             break;
         }

         case CL_UNORM_INT8:
         {
             unsigned char *dPtr = (unsigned char *)ptr;
             for( i = 0; i < channelCount; i++ ) {
                 if((is_sRGBA_order(imageInfo->format->image_channel_order)) && i<3) // only RGB need to be converted for sRGBA
                     tempData[ i ] = (float)sRGBunmap((float)dPtr[ i ] / 255.0f) ;
                 else
                     tempData[ i ] = (float)dPtr[ i ] / 255.0f;
             }
             break;
         }

         case CL_SIGNED_INT8:
         {
             cl_char *dPtr = (cl_char *)ptr;
             for( i = 0; i < channelCount; i++ )
                 tempData[ i ] =  (float)dPtr[ i ];
             break;
         }

         case CL_UNSIGNED_INT8:
         {
             cl_uchar *dPtr = (cl_uchar *)ptr;
             for( i = 0; i < channelCount; i++ )
                 tempData[ i ] = (float) dPtr[ i ];
             break;
         }

         case CL_SNORM_INT16:
         {
             cl_short *dPtr = (cl_short *)ptr;
             for( i = 0; i < channelCount; i++ )
                 tempData[ i ] = CLAMP_FLOAT( (float)dPtr[ i ] / 32767.0f );
             break;
         }

         case CL_UNORM_INT16:
         {
             cl_ushort *dPtr = (cl_ushort *)ptr;
             for( i = 0; i < channelCount; i++ )
                 tempData[ i ] = (float)dPtr[ i ] / 65535.0f;
             break;
         }

         case CL_SIGNED_INT16:
         {
             cl_short *dPtr = (cl_short *)ptr;
             for( i = 0; i < channelCount; i++ )
                 tempData[ i ] = (float)dPtr[ i ];
             break;
         }

         case CL_UNSIGNED_INT16:
         {
             cl_ushort *dPtr = (cl_ushort *)ptr;
             for( i = 0; i < channelCount; i++ )
                 tempData[ i ] = (float) dPtr[ i ];
             break;
         }

         case CL_HALF_FLOAT:
         {
             cl_ushort *dPtr = (cl_ushort *)ptr;
             for( i = 0; i < channelCount; i++ )
                 tempData[ i ] = convert_half_to_float( dPtr[ i ] );
             break;
         }

         case CL_SIGNED_INT32:
         {
             cl_int *dPtr = (cl_int *)ptr;
             for( i = 0; i < channelCount; i++ )
                 tempData[ i ] = (float)dPtr[ i ];
             break;
         }

         case CL_UNSIGNED_INT32:
         {
             cl_uint *dPtr = (cl_uint *)ptr;
             for( i = 0; i < channelCount; i++ )
                 tempData[ i ] = (float)dPtr[ i ];
             break;
         }

         case CL_UNORM_SHORT_565:
         {
             cl_ushort *dPtr = (cl_ushort *)ptr;
             tempData[ 0 ] = (float)( dPtr[ 0 ] >> 11 ) / (float)31;
             tempData[ 1 ] = (float)( ( dPtr[ 0 ] >> 5 ) & 63 ) / (float)63;
             tempData[ 2 ] = (float)( dPtr[ 0 ] & 31 ) / (float)31;
             break;
         }

         case CL_UNORM_SHORT_555:
         {
             cl_ushort *dPtr = (cl_ushort *)ptr;
             tempData[ 0 ] = (float)( ( dPtr[ 0 ] >> 10 ) & 31 ) / (float)31;
             tempData[ 1 ] = (float)( ( dPtr[ 0 ] >> 5 ) & 31 ) / (float)31;
             tempData[ 2 ] = (float)( dPtr[ 0 ] & 31 ) / (float)31;
             break;
         }

         case CL_UNORM_INT_101010:
         {
             cl_uint *dPtr = (cl_uint *)ptr;
             tempData[ 0 ] = (float)( ( dPtr[ 0 ] >> 20 ) & 0x3ff ) / (float)1023;
             tempData[ 1 ] = (float)( ( dPtr[ 0 ] >> 10 ) & 0x3ff ) / (float)1023;
             tempData[ 2 ] = (float)( dPtr[ 0 ] & 0x3ff ) / (float)1023;
             break;
         }

         case CL_FLOAT:
         {
             float *dPtr = (float *)ptr;
             for( i = 0; i < channelCount; i++ )
                 tempData[ i ] = (float)dPtr[ i ];
             break;
         }
 #ifdef  CL_SFIXED14_APPLE
         case CL_SFIXED14_APPLE:
         {
             cl_ushort *dPtr = (cl_ushort*) ptr;
             for( i = 0; i < channelCount; i++ )
                 tempData[i] = ((int) dPtr[i] - 16384) * 0x1.0p-14f;
             break;
         }
 #endif
     }


     outData[ 0 ] = outData[ 1 ] = outData[ 2 ] = 0;
     outData[ 3 ] = 1;

     switch( format->image_channel_order )
     {
         case CL_A:
             outData[ 3 ] = tempData[ 0 ];
             break;
         case CL_R:
         case CL_Rx:
             outData[ 0 ] = tempData[ 0 ];
             break;
         case CL_RA:
             outData[ 0 ] = tempData[ 0 ];
             outData[ 3 ] = tempData[ 1 ];
             break;
         case CL_RG:
         case CL_RGx:
             outData[ 0 ] = tempData[ 0 ];
             outData[ 1 ] = tempData[ 1 ];
             break;
         case CL_RGB:
         case CL_RGBx:
         case CL_sRGB:
         case CL_sRGBx:
             outData[ 0 ] = tempData[ 0 ];
             outData[ 1 ] = tempData[ 1 ];
             outData[ 2 ] = tempData[ 2 ];
             break;
         case CL_RGBA:
             outData[ 0 ] = tempData[ 0 ];
             outData[ 1 ] = tempData[ 1 ];
             outData[ 2 ] = tempData[ 2 ];
             outData[ 3 ] = tempData[ 3 ];
             break;
         case CL_ARGB:
             outData[ 0 ] = tempData[ 1 ];
             outData[ 1 ] = tempData[ 2 ];
             outData[ 2 ] = tempData[ 3 ];
             outData[ 3 ] = tempData[ 0 ];
             break;
         case CL_BGRA:
         case CL_sBGRA:
             outData[ 0 ] = tempData[ 2 ];
             outData[ 1 ] = tempData[ 1 ];
             outData[ 2 ] = tempData[ 0 ];
             outData[ 3 ] = tempData[ 3 ];
             break;
         case CL_INTENSITY:
             outData[ 0 ] = tempData[ 0 ];
             outData[ 1 ] = tempData[ 0 ];
             outData[ 2 ] = tempData[ 0 ];
             outData[ 3 ] = tempData[ 0 ];
             break;
         case CL_LUMINANCE:
             outData[ 0 ] = tempData[ 0 ];
             outData[ 1 ] = tempData[ 0 ];
             outData[ 2 ] = tempData[ 0 ];
             break;
 #ifdef CL_1RGB_APPLE
         case CL_1RGB_APPLE:
             outData[ 0 ] = tempData[ 1 ];
             outData[ 1 ] = tempData[ 2 ];
             outData[ 2 ] = tempData[ 3 ];
             outData[ 3 ] = 1.0f;
             break;
 #endif
 #ifdef CL_BGR1_APPLE
         case CL_BGR1_APPLE:
             outData[ 0 ] = tempData[ 2 ];
             outData[ 1 ] = tempData[ 1 ];
             outData[ 2 ] = tempData[ 0 ];
             outData[ 3 ] = 1.0f;
             break;
 #endif
         case CL_sRGBA:
             outData[ 0 ] = tempData[ 0 ];
             outData[ 1 ] = tempData[ 1 ];
             outData[ 2 ] = tempData[ 2 ];
             outData[ 3 ] = tempData[ 3 ];
             break;
         case CL_DEPTH:
             outData[ 0 ] = tempData[ 0 ];
             break;
         default:
             log_error("Invalid format:");
             print_header(format, true);
             break;
     }
 }

 void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
                             int x, int y, int z, float *outData )
 {
   read_image_pixel_float( imageData, imageInfo, x, y, z, outData, 0 );
 }

 bool get_integer_coords( float x, float y, float z, size_t width, size_t height, size_t depth, image_sampler_data *imageSampler, image_descriptor *imageInfo, int &outX, int &outY, int &outZ ) {
     return get_integer_coords_offset(x, y, z, 0.0f, 0.0f, 0.0f, width, height, depth, imageSampler, imageInfo, outX, outY, outZ);
 }

 bool get_integer_coords_offset( float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
                                size_t width, size_t height, size_t depth, image_sampler_data *imageSampler, image_descriptor *imageInfo, int &outX, int &outY, int &outZ )
 {
     AddressFn adFn = sAddressingTable[ imageSampler ];

     float refX = floorf( x ), refY = floorf( y ), refZ = floorf( z );

     // Handle sampler-directed coordinate normalization + clamping.  Note that
     // the array coordinate for image array types is expected to be
     // unnormalized, and is clamped to 0..arraySize-1.
     if( imageSampler->normalized_coords )
     {
         switch (imageSampler->addressing_mode)
         {
             case CL_ADDRESS_REPEAT:
                 x = RepeatNormalizedAddressFn( x, width );
                 if (height != 0) {
                     if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
                         y = RepeatNormalizedAddressFn( y, height );
                 }
                 if (depth != 0) {
                     if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
                         z = RepeatNormalizedAddressFn( z, depth );
                 }

                 if (xAddressOffset != 0.0) {
                     // Add in the offset
                     x += xAddressOffset;
                     // Handle wrapping
                     if (x > width)
                         x -= (float)width;
                     if (x < 0)
                         x += (float)width;
                 }
                 if ( (yAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) ) {
                     // Add in the offset
                     y += yAddressOffset;
                     // Handle wrapping
                     if (y > height)
                         y -= (float)height;
                     if (y < 0)
                         y += (float)height;
                 }
                 if ( (zAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY) )  {
                     // Add in the offset
                     z += zAddressOffset;
                     // Handle wrapping
                     if (z > depth)
                         z -= (float)depth;
                     if (z < 0)
                         z += (float)depth;
                 }
                 break;

             case CL_ADDRESS_MIRRORED_REPEAT:
                 x = MirroredRepeatNormalizedAddressFn( x, width );
                 if (height != 0) {
                     if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
                         y = MirroredRepeatNormalizedAddressFn( y, height );
                 }
                 if (depth != 0) {
                     if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
                         z = MirroredRepeatNormalizedAddressFn( z, depth );
                 }

                 if (xAddressOffset != 0.0)
                 {
                     float temp = x + xAddressOffset;
                     if( temp > (float) width )
                         temp = (float) width - (temp - (float) width );
                     x = fabsf( temp );
                 }
                 if ( (yAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) ) {
                     float temp = y + yAddressOffset;
                     if( temp > (float) height )
                         temp = (float) height - (temp - (float) height );
                     y = fabsf( temp );
                 }
                 if ( (zAddressOffset != 0.0) && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY) )  {
                     float temp = z + zAddressOffset;
                     if( temp > (float) depth )
                         temp = (float) depth - (temp - (float) depth );
                     z = fabsf( temp );
                 }
                 break;

             default:
                 // Also, remultiply to the original coords. This simulates any truncation in
                 // the pass to OpenCL
                 x *= (float)width;
                 x += xAddressOffset;

                 if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
                 {
                     y *= (float)height;
                     y += yAddressOffset;
                 }

                 if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
                 {
                     z *= (float)depth;
                     z += zAddressOffset;
                 }
                 break;
         }
     }

     // At this point, we're dealing with non-normalized coordinates.

     outX = adFn( floorf( x ), width );

     // 1D and 2D arrays require special care for the index coordinate:

     switch (imageInfo->type) {
         case CL_MEM_OBJECT_IMAGE1D_ARRAY:
             outY = calculate_array_index(y, (float)imageInfo->arraySize - 1.0f);
             outZ = 0.0f; /* don't care! */
             break;
         case CL_MEM_OBJECT_IMAGE2D_ARRAY:
             outY = adFn( floorf( y ), height );
             outZ = calculate_array_index(z, (float)imageInfo->arraySize - 1.0f);
             break;
         default:
             // legacy path:
             if (height != 0)
                 outY = adFn( floorf( y ), height );
             if( depth != 0 )
                 outZ = adFn( floorf( z ), depth );
     }

     return !( (int)refX == outX && (int)refY == outY && (int)refZ == outZ );
 }

 static float frac(float a) {
     return a - floorf(a);
 }

 static inline void pixelMax( const float a[4], const float b[4], float *results );
 static inline void pixelMax( const float a[4], const float b[4], float *results )
 {
     for( int i = 0; i < 4; i++ )
         results[i] = errMax( fabsf(a[i]), fabsf(b[i]) );
 }

 // If containsDenorms is NULL, flush denorms to zero
 // if containsDenorms is not NULL, record whether there are any denorms
 static inline void  check_for_denorms(float a[4], int *containsDenorms );
 static inline void  check_for_denorms(float a[4], int *containsDenorms )
 {
     if( NULL == containsDenorms )
     {
         for( int i = 0; i < 4; i++ )
         {
             if( IsFloatSubnormal( a[i] ) )
                 a[i] = copysignf( 0.0f, a[i] );
         }
     }
     else
     {
         for( int i = 0; i < 4; i++ )
         {
             if( IsFloatSubnormal( a[i] ) )
             {
                 *containsDenorms = 1;
                 break;
             }
         }
     }
 }

 inline float calculate_array_index( float coord, float extent ) {
     // from Section 8.4 of the 1.2 Spec 'Selecting an Image from an Image Array'
     //
     // given coordinate 'w' that represents an index:
     // layer_index = clamp( rint(w), 0, image_array_size - 1)

     float ret = rintf( coord );
     ret = ret > extent ? extent : ret;
     ret = ret < 0.0f ? 0.0f : ret;

     return ret;
 }

 /*
  * Utility function to unnormalized a coordinate given a particular sampler.
  *
  * name     - the name of the coordinate, used for verbose debugging only
  * coord    - the coordinate requiring unnormalization
  * offset   - an addressing offset to be added to the coordinate
  * extent   - the max value for this coordinate (e.g. width for x)
  */
 static float unnormalize_coordinate( const char* name, float coord,
     float offset, float extent, cl_addressing_mode addressing_mode, int verbose )
 {
     float ret = 0.0f;

     switch (addressing_mode) {
         case CL_ADDRESS_REPEAT:
             ret = RepeatNormalizedAddressFn( coord, extent );

             if ( verbose ) {
                 log_info( "\tRepeat filter denormalizes %s (%f) to %f\n",
                     name, coord, ret );
             }

             if (offset != 0.0) {
                 // Add in the offset, and handle wrapping.
                 ret += offset;
                 if (ret > extent) ret -= extent;
                 if (ret < 0.0) ret += extent;
             }

             if (verbose && offset != 0.0f) {
                 log_info( "\tAddress offset of %f added to get %f\n", offset, ret );
             }
             break;

         case CL_ADDRESS_MIRRORED_REPEAT:
             ret = MirroredRepeatNormalizedAddressFn( coord, extent );

             if ( verbose ) {
                 log_info( "\tMirrored repeat filter denormalizes %s (%f) to %f\n",
                     name, coord, ret );
             }

             if (offset != 0.0) {
                 float temp = ret + offset;
                 if( temp > extent )
                     temp = extent - (temp - extent );
                 ret = fabsf( temp );
             }

             if (verbose && offset != 0.0f) {
                 log_info( "\tAddress offset of %f added to get %f\n", offset, ret );
             }
             break;

         default:

             ret = coord * extent;

             if ( verbose ) {
                 log_info( "\tFilter denormalizes %s to %f (%f * %f)\n",
                     name, ret, coord, extent);
             }

             ret += offset;

             if (verbose && offset != 0.0f) {
                 log_info( "\tAddress offset of %f added to get %f\n", offset, ret );
             }
     }

     return ret;
 }

 FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
                                     float x, float y, float z,
                                     image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms ) {
     return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f, 0.0f, 0.0f, imageSampler, outData, verbose, containsDenorms);
 }

 // returns max pixel value of the pixels touched
 FloatPixel sample_image_pixel_float( void *imageData, image_descriptor *imageInfo,
                                     float x, float y, float z,
                                     image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms , int lod) {
     return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f, 0.0f, 0.0f, imageSampler, outData, verbose, containsDenorms, lod);
 }
 FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
                                            float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
                                            image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms , int lod)
 {
     AddressFn adFn = sAddressingTable[ imageSampler ];
     FloatPixel returnVal;
     size_t width_lod = imageInfo->width, height_lod = imageInfo->height, depth_lod = imageInfo->depth;
     size_t slice_pitch_lod = 0, row_pitch_lod = 0;

     if ( imageInfo->num_mip_levels > 1 )
     {
       switch(imageInfo->type)
       {
       case CL_MEM_OBJECT_IMAGE3D :
         depth_lod = ( imageInfo->depth >> lod ) ? ( imageInfo->depth >> lod ) : 1;
       case CL_MEM_OBJECT_IMAGE2D :
       case CL_MEM_OBJECT_IMAGE2D_ARRAY :
         height_lod = ( imageInfo->height >> lod ) ? ( imageInfo->height >> lod ) : 1;
       default :
         width_lod = ( imageInfo->width >> lod ) ? ( imageInfo->width >> lod ) : 1;
       }
       row_pitch_lod = width_lod * get_pixel_size(imageInfo->format);
       if ( imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY )
         slice_pitch_lod = row_pitch_lod;
       else if ( imageInfo->type == CL_MEM_OBJECT_IMAGE3D || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
         slice_pitch_lod = row_pitch_lod * height_lod;
     }
     else
     {
       slice_pitch_lod = imageInfo->slicePitch;
       row_pitch_lod = imageInfo->rowPitch;
     }

     if( containsDenorms )
         *containsDenorms = 0;

     if( imageSampler->normalized_coords ) {

         // We need to unnormalize our coordinates differently depending on
         // the image type, but 'x' is always processed the same way.

         x = unnormalize_coordinate("x", x, xAddressOffset, (float)width_lod,
             imageSampler->addressing_mode, verbose);

         switch (imageInfo->type) {

             // The image array types require special care:

             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
                 z = 0; // don't care -- unused for 1D arrays
                 break;

             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
                 y = unnormalize_coordinate("y", y, yAddressOffset, (float)height_lod,
                     imageSampler->addressing_mode, verbose);
                 break;

             // Everybody else:

             default:
                 y = unnormalize_coordinate("y", y, yAddressOffset, (float)height_lod,
                     imageSampler->addressing_mode, verbose);
                 z = unnormalize_coordinate("z", z, zAddressOffset, (float)depth_lod,
                     imageSampler->addressing_mode, verbose);
         }

     } else if ( verbose ) {

         switch (imageInfo->type) {
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
                 log_info("Starting coordinate: %f, array index %f\n", x, y);
                 break;
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
                 log_info("Starting coordinate: %f, %f, array index %f\n", x, y, z);
                 break;
             case CL_MEM_OBJECT_IMAGE1D:
             case CL_MEM_OBJECT_IMAGE1D_BUFFER:
                 log_info("Starting coordinate: %f\b", x);
                 break;
             case CL_MEM_OBJECT_IMAGE2D:
                 log_info("Starting coordinate: %f, %f\n", x, y);
                 break;
             case CL_MEM_OBJECT_IMAGE3D:
             default:
                 log_info("Starting coordinate: %f, %f, %f\n", x, y, z);
         }
     }

     // At this point, we have unnormalized coordinates.

     if( imageSampler->filter_mode == CL_FILTER_NEAREST )
     {
         int ix, iy, iz;

         // We apply the addressing function to the now-unnormalized
         // coordinates.  Note that the array cases again require special
         // care, per section 8.4 in the OpenCL 1.2 Specification.

         ix = adFn( floorf( x ), width_lod );

         switch (imageInfo->type) {
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
                 iy = calculate_array_index( y, (float)(imageInfo->arraySize - 1) );
                 iz = 0;
                 if( verbose ) {
                   log_info("\tArray index %f evaluates to %d\n",y, iy );
                 }
                 break;
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
                 iy = adFn( floorf( y ), height_lod );
                 iz = calculate_array_index( z, (float)(imageInfo->arraySize - 1) );
                 if( verbose ) {
                     log_info("\tArray index %f evaluates to %d\n",z, iz );
                 }
                 break;
             default:
                 iy = adFn( floorf( y ), height_lod );
                 if( depth_lod != 0 )
                     iz = adFn( floorf( z ), depth_lod );
                 else
                     iz = 0;
         }

         if( verbose ) {
             if( iz )
                 log_info( "\tReference integer coords calculated: { %d, %d, %d }\n", ix, iy, iz );
             else
                 log_info( "\tReference integer coords calculated: { %d, %d }\n", ix, iy );
         }

         read_image_pixel_float( imageData, imageInfo, ix, iy, iz, outData, lod );
         check_for_denorms( outData, containsDenorms );
         for( int i = 0; i < 4; i++ )
             returnVal.p[i] = fabsf( outData[i] );
         return returnVal;
     }
     else
     {
         // Linear filtering cases.

         size_t width = width_lod, height = height_lod, depth = depth_lod;

         // Image arrays can use 2D filtering, but require us to walk into the
         // image a certain number of slices before reading.

         if( depth == 0 || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY ||
                           imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
         {
             float array_index = 0;

             size_t layer_offset = 0;

             if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY) {
                 array_index = calculate_array_index(z, (float)(imageInfo->arraySize - 1));
                 layer_offset = slice_pitch_lod * (size_t)array_index;
             }
             else if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
                 array_index = calculate_array_index(y, (float)(imageInfo->arraySize - 1));
                 layer_offset = slice_pitch_lod * (size_t)array_index;

                 // Set up y and height so that the filtering below is correct
                 // 1D filtering on a single slice.
                 height = 1;
             }

             int x1 = adFn( floorf( x - 0.5f ), width );
             int y1 = 0;
             int x2 = adFn( floorf( x - 0.5f ) + 1, width );
             int y2 = 0;
             if ((imageInfo->type != CL_MEM_OBJECT_IMAGE1D) &&
                 (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY) &&
                 (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_BUFFER)) {
                 y1 = adFn( floorf( y - 0.5f ), height );
                 y2 = adFn( floorf( y - 0.5f ) + 1, height );
             } else {
               y = 0.5f;
             }

             if( verbose ) {
                 log_info( "\tActual integer coords used (i = floor(x-.5)): i0:{ %d, %d } and i1:{ %d, %d }\n", x1, y1, x2, y2 );
                 log_info( "\tArray coordinate is %f\n", array_index);
             }

             // Walk to beginning of the 'correct' slice, if needed.
             char* imgPtr = ((char*)imageData) + layer_offset;

             float upLeft[ 4 ], upRight[ 4 ], lowLeft[ 4 ], lowRight[ 4 ];
             float maxUp[4], maxLow[4];
             read_image_pixel_float( imgPtr, imageInfo, x1, y1, 0, upLeft, lod );
             read_image_pixel_float( imgPtr, imageInfo, x2, y1, 0, upRight, lod );
             check_for_denorms( upLeft, containsDenorms );
             check_for_denorms( upRight, containsDenorms );
             pixelMax( upLeft, upRight, maxUp );
             read_image_pixel_float( imgPtr, imageInfo, x1, y2, 0, lowLeft, lod );
             read_image_pixel_float( imgPtr, imageInfo, x2, y2, 0, lowRight, lod );
             check_for_denorms( lowLeft, containsDenorms );
             check_for_denorms( lowRight, containsDenorms );
             pixelMax( lowLeft, lowRight, maxLow );
             pixelMax( maxUp, maxLow, returnVal.p );

             if( verbose )
             {
                 if( NULL == containsDenorms )
                     log_info( "\tSampled pixels (rgba order, denorms flushed to zero):\n" );
                 else
                     log_info( "\tSampled pixels (rgba order):\n" );
                 log_info( "\t\tp00: %f, %f, %f, %f\n", upLeft[0], upLeft[1], upLeft[2], upLeft[3] );
                 log_info( "\t\tp01: %f, %f, %f, %f\n", upRight[0], upRight[1], upRight[2], upRight[3] );
                 log_info( "\t\tp10: %f, %f, %f, %f\n", lowLeft[0], lowLeft[1], lowLeft[2], lowLeft[3] );
                 log_info( "\t\tp11: %f, %f, %f, %f\n", lowRight[0], lowRight[1], lowRight[2], lowRight[3] );
             }

             bool printMe = false;
             if( x1 <= 0 || x2 <= 0 || x1 >= (int)width-1 || x2 >= (int)width-1 )
                 printMe = true;
             if( y1 <= 0 || y2 <= 0 || y1 >= (int)height-1 || y2 >= (int)height-1 )
                 printMe = true;

             double weights[ 2 ][ 2 ];

             weights[ 0 ][ 0 ] = weights[ 0 ][ 1 ] = 1.0 - frac( x - 0.5f );
             weights[ 1 ][ 0 ] = weights[ 1 ][ 1 ] = frac( x - 0.5f );
             weights[ 0 ][ 0 ] *= 1.0 - frac( y - 0.5f );
             weights[ 1 ][ 0 ] *= 1.0 - frac( y - 0.5f );
             weights[ 0 ][ 1 ] *= frac( y - 0.5f );
             weights[ 1 ][ 1 ] *= frac( y - 0.5f );

             if( verbose )
                 log_info( "\tfrac( x - 0.5f ) = %f,  frac( y - 0.5f ) = %f\n",  frac( x - 0.5f ), frac( y - 0.5f ) );

             for( int i = 0; i < 3; i++ )
             {
                 outData[ i ] = (float)( ( upLeft[ i ] * weights[ 0 ][ 0 ] ) +
                                         ( upRight[ i ] * weights[ 1 ][ 0 ] ) +
                                         ( lowLeft[ i ] * weights[ 0 ][ 1 ] ) +
                                         ( lowRight[ i ] * weights[ 1 ][ 1 ] ));
                 // flush subnormal results to zero if necessary
                 if( NULL == containsDenorms && fabs(outData[i]) < FLT_MIN )
                     outData[i] = copysignf( 0.0f, outData[i] );
             }
             outData[ 3 ] = (float)( ( upLeft[ 3 ] * weights[ 0 ][ 0 ] ) +
                                    ( upRight[ 3 ] * weights[ 1 ][ 0 ] ) +
                                    ( lowLeft[ 3 ] * weights[ 0 ][ 1 ] ) +
                                    ( lowRight[ 3 ] * weights[ 1 ][ 1 ] ));
             // flush subnormal results to zero if necessary
             if( NULL == containsDenorms && fabs(outData[3]) < FLT_MIN )
                 outData[3] = copysignf( 0.0f, outData[3] );
         }
         else
         {
             // 3D linear filtering
             int x1 = adFn( floorf( x - 0.5f ), width_lod );
             int y1 = adFn( floorf( y - 0.5f ), height_lod );
             int z1 = adFn( floorf( z - 0.5f ), depth_lod );
             int x2 = adFn( floorf( x - 0.5f ) + 1, width_lod );
             int y2 = adFn( floorf( y - 0.5f ) + 1, height_lod );
             int z2 = adFn( floorf( z - 0.5f ) + 1, depth_lod );

             if( verbose )
                 log_info( "\tActual integer coords used (i = floor(x-.5)): i0:{%d, %d, %d} and i1:{%d, %d, %d}\n", x1, y1, z1, x2, y2, z2 );

             float upLeftA[ 4 ], upRightA[ 4 ], lowLeftA[ 4 ], lowRightA[ 4 ];
             float upLeftB[ 4 ], upRightB[ 4 ], lowLeftB[ 4 ], lowRightB[ 4 ];
             float pixelMaxA[4], pixelMaxB[4];
             read_image_pixel_float( imageData, imageInfo, x1, y1, z1, upLeftA, lod );
             read_image_pixel_float( imageData, imageInfo, x2, y1, z1, upRightA, lod );
             check_for_denorms( upLeftA, containsDenorms );
             check_for_denorms( upRightA, containsDenorms );
             pixelMax( upLeftA, upRightA, pixelMaxA );
             read_image_pixel_float( imageData, imageInfo, x1, y2, z1, lowLeftA, lod );
             read_image_pixel_float( imageData, imageInfo, x2, y2, z1, lowRightA, lod );
             check_for_denorms( lowLeftA, containsDenorms );
             check_for_denorms( lowRightA, containsDenorms );
             pixelMax( lowLeftA, lowRightA, pixelMaxB );
             pixelMax( pixelMaxA, pixelMaxB, returnVal.p);
             read_image_pixel_float( imageData, imageInfo, x1, y1, z2, upLeftB, lod );
             read_image_pixel_float( imageData, imageInfo, x2, y1, z2, upRightB, lod );
             check_for_denorms( upLeftB, containsDenorms );
             check_for_denorms( upRightB, containsDenorms );
             pixelMax( upLeftB, upRightB, pixelMaxA );
             read_image_pixel_float( imageData, imageInfo, x1, y2, z2, lowLeftB, lod );
             read_image_pixel_float( imageData, imageInfo, x2, y2, z2, lowRightB, lod );
             check_for_denorms( lowLeftB, containsDenorms );
             check_for_denorms( lowRightB, containsDenorms );
             pixelMax( lowLeftB, lowRightB, pixelMaxB );
             pixelMax( pixelMaxA, pixelMaxB, pixelMaxA);
             pixelMax( pixelMaxA, returnVal.p, returnVal.p );

             if( verbose )
             {
                 if( NULL == containsDenorms )
                     log_info( "\tSampled pixels (rgba order, denorms flushed to zero):\n" );
                 else
                     log_info( "\tSampled pixels (rgba order):\n" );
                 log_info( "\t\tp000: %f, %f, %f, %f\n", upLeftA[0], upLeftA[1], upLeftA[2], upLeftA[3] );
                 log_info( "\t\tp001: %f, %f, %f, %f\n", upRightA[0], upRightA[1], upRightA[2], upRightA[3] );
                 log_info( "\t\tp010: %f, %f, %f, %f\n", lowLeftA[0], lowLeftA[1], lowLeftA[2], lowLeftA[3] );
                 log_info( "\t\tp011: %f, %f, %f, %f\n\n", lowRightA[0], lowRightA[1], lowRightA[2], lowRightA[3] );
                 log_info( "\t\tp100: %f, %f, %f, %f\n", upLeftB[0], upLeftB[1], upLeftB[2], upLeftB[3] );
                 log_info( "\t\tp101: %f, %f, %f, %f\n", upRightB[0], upRightB[1], upRightB[2], upRightB[3] );
                 log_info( "\t\tp110: %f, %f, %f, %f\n", lowLeftB[0], lowLeftB[1], lowLeftB[2], lowLeftB[3] );
                 log_info( "\t\tp111: %f, %f, %f, %f\n", lowRightB[0], lowRightB[1], lowRightB[2], lowRightB[3] );
             }

             double weights[ 2 ][ 2 ][ 2 ];

             float a = frac( x - 0.5f ), b = frac( y - 0.5f ), c = frac( z - 0.5f );
             weights[ 0 ][ 0 ][ 0 ] = weights[ 0 ][ 1 ][ 0 ] = weights[ 0 ][ 0 ][ 1 ] = weights[ 0 ][ 1 ][ 1 ] = 1.f - a;
             weights[ 1 ][ 0 ][ 0 ] = weights[ 1 ][ 1 ][ 0 ] = weights[ 1 ][ 0 ][ 1 ] = weights[ 1 ][ 1 ][ 1 ] = a;
             weights[ 0 ][ 0 ][ 0 ] *= 1.f - b;
             weights[ 1 ][ 0 ][ 0 ] *= 1.f - b;
             weights[ 0 ][ 0 ][ 1 ] *= 1.f - b;
             weights[ 1 ][ 0 ][ 1 ] *= 1.f - b;
             weights[ 0 ][ 1 ][ 0 ] *= b;
             weights[ 1 ][ 1 ][ 0 ] *= b;
             weights[ 0 ][ 1 ][ 1 ] *= b;
             weights[ 1 ][ 1 ][ 1 ] *= b;
             weights[ 0 ][ 0 ][ 0 ] *= 1.f - c;
             weights[ 0 ][ 1 ][ 0 ] *= 1.f - c;
             weights[ 1 ][ 0 ][ 0 ] *= 1.f - c;
             weights[ 1 ][ 1 ][ 0 ] *= 1.f - c;
             weights[ 0 ][ 0 ][ 1 ] *= c;
             weights[ 0 ][ 1 ][ 1 ] *= c;
             weights[ 1 ][ 0 ][ 1 ] *= c;
             weights[ 1 ][ 1 ][ 1 ] *= c;

             if( verbose )
                 log_info( "\tfrac( x - 0.5f ) = %f,  frac( y - 0.5f ) = %f, frac( z - 0.5f ) = %f\n",
                          frac( x - 0.5f ), frac( y - 0.5f ), frac( z - 0.5f )  );

             for( int i = 0; i < 3; i++ )
             {
                 outData[ i ] = (float)( ( upLeftA[ i ] * weights[ 0 ][ 0 ][ 0 ] ) +
                                         ( upRightA[ i ] * weights[ 1 ][ 0 ][ 0 ] ) +
                                         ( lowLeftA[ i ] * weights[ 0 ][ 1 ][ 0 ] ) +
                                         ( lowRightA[ i ] * weights[ 1 ][ 1 ][ 0 ] ) +
                                         ( upLeftB[ i ] * weights[ 0 ][ 0 ][ 1 ] ) +
                                         ( upRightB[ i ] * weights[ 1 ][ 0 ][ 1 ] ) +
                                         ( lowLeftB[ i ] * weights[ 0 ][ 1 ][ 1 ] ) +
                                         ( lowRightB[ i ] * weights[ 1 ][ 1 ][ 1 ] ));
                 // flush subnormal results to zero if necessary
                 if( NULL == containsDenorms && fabs(outData[i]) < FLT_MIN )
                     outData[i] = copysignf( 0.0f, outData[i] );
             }
             outData[ 3 ] = (float)( ( upLeftA[ 3 ] * weights[ 0 ][ 0 ][ 0 ] ) +
                                    ( upRightA[ 3 ] * weights[ 1 ][ 0 ][ 0 ] ) +
                                    ( lowLeftA[ 3 ] * weights[ 0 ][ 1 ][ 0 ] ) +
                                    ( lowRightA[ 3 ] * weights[ 1 ][ 1 ][ 0 ] ) +
                                    ( upLeftB[ 3 ] * weights[ 0 ][ 0 ][ 1 ] ) +
                                    ( upRightB[ 3 ] * weights[ 1 ][ 0 ][ 1 ] ) +
                                    ( lowLeftB[ 3 ] * weights[ 0 ][ 1 ][ 1 ] ) +
                                    ( lowRightB[ 3 ] * weights[ 1 ][ 1 ][ 1 ] ));
             // flush subnormal results to zero if necessary
             if( NULL == containsDenorms && fabs(outData[3]) < FLT_MIN )
                 outData[3] = copysignf( 0.0f, outData[3] );
         }

         return returnVal;
     }
 }

 FloatPixel sample_image_pixel_float_offset( void *imageData, image_descriptor *imageInfo,
                                            float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset,
                                            image_sampler_data *imageSampler, float *outData, int verbose, int *containsDenorms )
 {
   return sample_image_pixel_float_offset( imageData, imageInfo, x, y, z, xAddressOffset, yAddressOffset, zAddressOffset,
     imageSampler, outData, verbose, containsDenorms, 0);
 }


 int debug_find_vector_in_image( void *imagePtr, image_descriptor *imageInfo,
                                void *vectorToFind, size_t vectorSize, int *outX, int *outY, int *outZ, size_t lod )
 {
     int foundCount = 0;
     char *iPtr = (char *)imagePtr;
     size_t width;
     size_t depth;
     size_t height;
     size_t row_pitch;
     size_t slice_pitch;

     switch (imageInfo->type)
     {
     case CL_MEM_OBJECT_IMAGE1D:
       width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
       height = 1;
       depth = 1;
       break;
     case CL_MEM_OBJECT_IMAGE1D_ARRAY:
       width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
       height = 1;
       depth = imageInfo->arraySize;
       break;
     case CL_MEM_OBJECT_IMAGE2D:
       width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
       height = (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
       depth = 1;
       break;
     case CL_MEM_OBJECT_IMAGE2D_ARRAY:
       width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
       height = (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
       depth = imageInfo->arraySize;
       break;
     case CL_MEM_OBJECT_IMAGE3D:
       width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
       height = (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
       depth = (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
       break;
     }

     row_pitch = width * get_pixel_size( imageInfo->format );
     slice_pitch = row_pitch * height;

     for( size_t z = 0; z < depth; z++ )
     {
         for( size_t y = 0; y < height; y++ )
         {
             for( size_t x = 0; x < width; x++)
             {
                 if( memcmp( iPtr, vectorToFind, vectorSize ) == 0 )
                 {
                     if( foundCount == 0 )
                     {
                         *outX = (int)x;
                         if (outY != NULL)
                             *outY = (int)y;
                         if( outZ != NULL )
                             *outZ = (int)z;
                     }
                     foundCount++;
                 }
                 iPtr += vectorSize;
             }
             iPtr += row_pitch - ( width * vectorSize );
         }
         iPtr += slice_pitch - ( height * row_pitch );
     }
     return foundCount;
 }

 int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
                               unsigned int *valuesToFind, int *outX, int *outY, int *outZ, int lod )
 {
     char vectorToFind[ 4 * 4 ];
     size_t vectorSize = get_format_channel_count( imageInfo->format );


     if( imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT8 )
     {
         unsigned char *p = (unsigned char *)vectorToFind;
         for( unsigned int i = 0; i < vectorSize; i++ )
             p[i] = (unsigned char)valuesToFind[i];
     }
     else if( imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT16 )
     {
         unsigned short *p = (unsigned short *)vectorToFind;
         for( unsigned int i = 0; i < vectorSize; i++ )
             p[i] = (unsigned short)valuesToFind[i];
         vectorSize *= 2;
     }
     else if( imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT32 )
     {
         unsigned int *p = (unsigned int *)vectorToFind;
         for( unsigned int i = 0; i < vectorSize; i++ )
             p[i] = (unsigned int)valuesToFind[i];
         vectorSize *= 4;
     }
     else
     {
         log_info( "WARNING: Unable to search for debug pixel: invalid image format\n" );
         return false;
     }
     return debug_find_vector_in_image( imagePtr, imageInfo, vectorToFind, vectorSize, outX, outY, outZ, lod );
 }

 int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
                               int *valuesToFind, int *outX, int *outY, int *outZ, int lod )
 {
     char vectorToFind[ 4 * 4 ];
     size_t vectorSize = get_format_channel_count( imageInfo->format );

     if( imageInfo->format->image_channel_data_type == CL_SIGNED_INT8 )
     {
         char *p = (char *)vectorToFind;
         for( unsigned int i = 0; i < vectorSize; i++ )
             p[i] = (char)valuesToFind[i];
     }
     else if( imageInfo->format->image_channel_data_type == CL_SIGNED_INT16 )
     {
         short *p = (short *)vectorToFind;
         for( unsigned int i = 0; i < vectorSize; i++ )
             p[i] = (short)valuesToFind[i];
         vectorSize *= 2;
     }
     else if( imageInfo->format->image_channel_data_type == CL_SIGNED_INT32 )
     {
         int *p = (int *)vectorToFind;
         for( unsigned int i = 0; i < vectorSize; i++ )
             p[i] = (int)valuesToFind[i];
         vectorSize *= 4;
     }
     else
     {
         log_info( "WARNING: Unable to search for debug pixel: invalid image format\n" );
         return false;
     }
     return debug_find_vector_in_image( imagePtr, imageInfo, vectorToFind, vectorSize, outX, outY, outZ, lod );
 }

 int debug_find_pixel_in_image( void *imagePtr, image_descriptor *imageInfo,
                               float *valuesToFind, int *outX, int *outY, int *outZ, int lod )
 {
     char vectorToFind[ 4 * 4 ];
     float swizzled[4];
     memcpy( swizzled, valuesToFind, sizeof( swizzled ) );
     size_t vectorSize = get_pixel_size( imageInfo->format );
     pack_image_pixel( swizzled, imageInfo->format, vectorToFind );
     return debug_find_vector_in_image( imagePtr, imageInfo, vectorToFind, vectorSize, outX, outY, outZ, lod );
 }

 template <class T> void swizzle_vector_for_image( T *srcVector, const cl_image_format *imageFormat )
 {
     T temp;
     switch( imageFormat->image_channel_order )
     {
         case CL_A:
             srcVector[ 0 ] = srcVector[ 3 ];
             break;
         case CL_R:
         case CL_Rx:
         case CL_RG:
         case CL_RGx:
         case CL_RGB:
         case CL_RGBx:
         case CL_RGBA:
         case CL_sRGB:
         case CL_sRGBx:
         case CL_sRGBA:
             break;
         case CL_RA:
             srcVector[ 1 ] = srcVector[ 3 ];
             break;
         case CL_ARGB:
             temp = srcVector[ 3 ];
             srcVector[ 3 ] = srcVector[ 2 ];
             srcVector[ 2 ] = srcVector[ 1 ];
             srcVector[ 1 ] = srcVector[ 0 ];
             srcVector[ 0 ] = temp;
             break;
         case CL_BGRA:
         case CL_sBGRA:
             temp = srcVector[ 0 ];
             srcVector[ 0 ] = srcVector[ 2 ];
             srcVector[ 2 ] = temp;
             break;
         case CL_INTENSITY:
             srcVector[ 3 ] = srcVector[ 0 ];
             srcVector[ 2 ] = srcVector[ 0 ];
             srcVector[ 1 ] = srcVector[ 0 ];
             break;
         case CL_LUMINANCE:
             srcVector[ 2 ] = srcVector[ 0 ];
             srcVector[ 1 ] = srcVector[ 0 ];
             break;
 #ifdef CL_1RGB_APPLE
         case CL_1RGB_APPLE:
             temp = srcVector[ 3 ];
             srcVector[ 3 ] = srcVector[ 2 ];
             srcVector[ 2 ] = srcVector[ 1 ];
             srcVector[ 1 ] = srcVector[ 0 ];
             srcVector[ 0 ] = temp;
             break;
 #endif
 #ifdef CL_BGR1_APPLE
         case CL_BGR1_APPLE:
             temp = srcVector[ 0 ];
             srcVector[ 0 ] = srcVector[ 2 ];
             srcVector[ 2 ] = temp;
             break;
 #endif
     }
 }

 #define SATURATE( v, min, max ) ( v < min ? min : ( v > max ? max : v ) )

 void pack_image_pixel( unsigned int *srcVector, const cl_image_format *imageFormat, void *outData )
 {
     swizzle_vector_for_image<unsigned int>( srcVector, imageFormat );
     size_t channelCount = get_format_channel_count( imageFormat );

     switch( imageFormat->image_channel_data_type )
     {
         case CL_UNSIGNED_INT8:
         {
             unsigned char *ptr = (unsigned char *)outData;
             for( unsigned int i = 0; i < channelCount; i++ )
                 ptr[ i ] = (unsigned char)SATURATE( srcVector[ i ], 0, 255 );
             break;
         }
         case CL_UNSIGNED_INT16:
         {
             unsigned short *ptr = (unsigned short *)outData;
             for( unsigned int i = 0; i < channelCount; i++ )
                 ptr[ i ] = (unsigned short)SATURATE( srcVector[ i ], 0, 65535 );
             break;
         }
         case CL_UNSIGNED_INT32:
         {
             unsigned int *ptr = (unsigned int *)outData;
             for( unsigned int i = 0; i < channelCount; i++ )
                 ptr[ i ] = (unsigned int)srcVector[ i ];
             break;
         }
         default:
             break;
     }
 }

 void pack_image_pixel( int *srcVector, const cl_image_format *imageFormat, void *outData )
 {
     swizzle_vector_for_image<int>( srcVector, imageFormat );
     size_t chanelCount = get_format_channel_count( imageFormat );

     switch( imageFormat->image_channel_data_type )
     {
         case CL_SIGNED_INT8:
         {
             char *ptr = (char *)outData;
             for( unsigned int i = 0; i < chanelCount; i++ )
                 ptr[ i ] = (char)SATURATE( srcVector[ i ], -128, 127 );
             break;
         }
         case CL_SIGNED_INT16:
         {
             short *ptr = (short *)outData;
             for( unsigned int i = 0; i < chanelCount; i++ )
                 ptr[ i ] = (short)SATURATE( srcVector[ i ], -32768, 32767 );
             break;
         }
         case CL_SIGNED_INT32:
         {
             int *ptr = (int *)outData;
             for( unsigned int i = 0; i < chanelCount; i++ )
                 ptr[ i ] = (int)srcVector[ i ];
             break;
         }
         default:
             break;
     }
 }

 int round_to_even( float v )
 {
     // clamp overflow
     if( v >= - (float) INT_MIN )
         return INT_MAX;
     if( v <= (float) INT_MIN )
         return INT_MIN;

     // round fractional values to integer value
     if( fabsf(v) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23) )
     {
         static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23), MAKE_HEX_FLOAT(-0x1.0p23f, -0x1L, 23) };
         float magicVal = magic[ v < 0.0f ];
         v += magicVal;
         v -= magicVal;
     }

     return (int) v;
 }

 void pack_image_pixel( float *srcVector, const cl_image_format *imageFormat, void *outData )
 {
     swizzle_vector_for_image<float>( srcVector, imageFormat );
     size_t channelCount = get_format_channel_count( imageFormat );
     switch( imageFormat->image_channel_data_type )
     {
         case CL_HALF_FLOAT:
         {
             cl_ushort *ptr = (cl_ushort *)outData;

             switch( gFloatToHalfRoundingMode )
             {
                 case kRoundToNearestEven:
             for( unsigned int i = 0; i < channelCount; i++ )
                         ptr[ i ] = float2half_rte( srcVector[ i ] );
             break;
                 case kRoundTowardZero:
                     for( unsigned int i = 0; i < channelCount; i++ )
                         ptr[ i ] = float2half_rtz( srcVector[ i ] );
                     break;
                 default:
                     log_error( "ERROR: Test internal error -- unhandled or unknown float->half rounding mode.\n" );
                     exit(-1);
                     break;
         }
             break;
         }

         case CL_FLOAT:
         {
             cl_float *ptr = (cl_float *)outData;
             for( unsigned int i = 0; i < channelCount; i++ )
                 ptr[ i ] = srcVector[ i ];
             break;
         }

         case CL_SNORM_INT8:
         {
             cl_char *ptr = (cl_char *)outData;
             for( unsigned int i = 0; i < channelCount; i++ )
                 ptr[ i ] = (cl_char)NORMALIZE_SIGNED( srcVector[ i ], -127.0f, 127.f );
             break;
         }
         case CL_SNORM_INT16:
         {
             cl_short *ptr = (cl_short *)outData;
             for( unsigned int i = 0; i < channelCount; i++ )
                 ptr[ i ] = (short)NORMALIZE_SIGNED( srcVector[ i ], -32767.f, 32767.f  );
             break;
         }
         case CL_UNORM_INT8:
         {
             cl_uchar *ptr = (cl_uchar *)outData;
             if ( is_sRGBA_order(imageFormat->image_channel_order) )
             {
                 ptr[ 0 ] = (unsigned char)( sRGBmap( srcVector[ 0 ] ) + 0.5 );
                 ptr[ 1 ] = (unsigned char)( sRGBmap( srcVector[ 1 ] ) + 0.5 );
                 ptr[ 2 ] = (unsigned char)( sRGBmap( srcVector[ 2 ] ) + 0.5 );
                 if (channelCount == 4)
                     ptr[ 3 ] = (unsigned char)NORMALIZE( srcVector[ 3 ], 255.f );
             }
             else
             {
                 for( unsigned int i = 0; i < channelCount; i++ )
                     ptr[ i ] = (unsigned char)NORMALIZE( srcVector[ i ], 255.f );
             }
 #ifdef CL_1RGB_APPLE
             if( imageFormat->image_channel_order == CL_1RGB_APPLE )
                 ptr[0] = 255.0f;
 #endif
 #ifdef CL_BGR1_APPLE
             if( imageFormat->image_channel_order == CL_BGR1_APPLE )
                 ptr[3] = 255.0f;
 #endif
             break;
         }
         case CL_UNORM_INT16:
         {
             cl_ushort *ptr = (cl_ushort *)outData;
             for( unsigned int i = 0; i < channelCount; i++ )
                 ptr[ i ] = (unsigned short)NORMALIZE( srcVector[ i ], 65535.f );
             break;
         }
         case CL_UNORM_SHORT_555:
         {
             cl_ushort *ptr = (cl_ushort *)outData;
             ptr[ 0 ] = ( ( (unsigned short)NORMALIZE( srcVector[ 0 ], 31.f ) & 31 ) << 10 ) |
             ( ( (unsigned short)NORMALIZE( srcVector[ 1 ], 31.f ) & 31 ) << 5 ) |
             ( ( (unsigned short)NORMALIZE( srcVector[ 2 ], 31.f ) & 31 ) << 0 );
             break;
         }
         case CL_UNORM_SHORT_565:
         {
             cl_ushort *ptr = (cl_ushort *)outData;
             ptr[ 0 ] = ( ( (unsigned short)NORMALIZE( srcVector[ 0 ], 31.f ) & 31 ) << 11 ) |
             ( ( (unsigned short)NORMALIZE( srcVector[ 1 ], 63.f ) & 63 ) << 5 ) |
             ( ( (unsigned short)NORMALIZE( srcVector[ 2 ], 31.f ) & 31 ) << 0 );
             break;
         }
         case CL_UNORM_INT_101010:
         {
             cl_uint *ptr = (cl_uint *)outData;
             ptr[ 0 ] = ( ( (unsigned int)NORMALIZE( srcVector[ 0 ], 1023.f ) & 1023 ) << 20 ) |
             ( ( (unsigned int)NORMALIZE( srcVector[ 1 ], 1023.f ) & 1023 ) << 10 ) |
             ( ( (unsigned int)NORMALIZE( srcVector[ 2 ], 1023.f ) & 1023 ) << 0 );
             break;
         }
         case CL_SIGNED_INT8:
         {
             cl_char *ptr = (cl_char *)outData;
             for( unsigned int i = 0; i < channelCount; i++ )
                 ptr[ i ] = (cl_char)CONVERT_INT( srcVector[ i ], -127.0f, 127.f, 127 );
             break;
         }
         case CL_SIGNED_INT16:
         {
             cl_short *ptr = (cl_short *)outData;
             for( unsigned int i = 0; i < channelCount; i++ )
                 ptr[ i ] = (short)CONVERT_INT( srcVector[ i ], -32767.f, 32767.f, 32767  );
             break;
         }
         case CL_SIGNED_INT32:
         {
             cl_int *ptr = (cl_int *)outData;
             for( unsigned int i = 0; i < channelCount; i++ )
                 ptr[ i ] = (int)CONVERT_INT( srcVector[ i ], MAKE_HEX_FLOAT( -0x1.0p31f, -1, 31), MAKE_HEX_FLOAT( 0x1.fffffep30f, 0x1fffffe, 30-23), CL_INT_MAX  );
             break;
         }
         case CL_UNSIGNED_INT8:
         {
             cl_uchar *ptr = (cl_uchar *)outData;
             for( unsigned int i = 0; i < channelCount; i++ )
                 ptr[ i ] = (cl_uchar)CONVERT_UINT( srcVector[ i ], 255.f, CL_UCHAR_MAX );
             break;
         }
         case CL_UNSIGNED_INT16:
         {
             cl_ushort *ptr = (cl_ushort *)outData;
             for( unsigned int i = 0; i < channelCount; i++ )
                 ptr[ i ] = (cl_ushort)CONVERT_UINT( srcVector[ i ], 32767.f, CL_USHRT_MAX );
             break;
         }
         case CL_UNSIGNED_INT32:
         {
             cl_uint *ptr = (cl_uint *)outData;
             for( unsigned int i = 0; i < channelCount; i++ )
                 ptr[ i ] = (cl_uint)CONVERT_UINT( srcVector[ i ], MAKE_HEX_FLOAT( 0x1.fffffep31f, 0x1fffffe, 31-23), CL_UINT_MAX  );
             break;
         }
 #ifdef CL_SFIXED14_APPLE
         case CL_SFIXED14_APPLE:
         {
             cl_ushort *ptr = (cl_ushort*)outData;
             for( unsigned int i = 0; i < channelCount; i++ )
             {
                 cl_float f = fmaxf( srcVector[i], -1.0f );
                 f = fminf( f, 3.0f );
                 cl_int d = rintf(f * 0x1.0p14f);
                 d += 16384;
                 if( d > CL_USHRT_MAX )
                     d = CL_USHRT_MAX;
                 ptr[i] = d;
             }
             break;
         }
 #endif
         default:
             log_error( "INTERNAL ERROR: unknown format (%d)\n", imageFormat->image_channel_data_type);
             exit(-1);
             break;
     }
 }

 void pack_image_pixel_error( const float *srcVector, const cl_image_format *imageFormat, const void *results, float *errors )
 {
     size_t channelCount = get_format_channel_count( imageFormat );
     switch( imageFormat->image_channel_data_type )
     {
         case CL_HALF_FLOAT:
         {
             const cl_ushort *ptr = (const cl_ushort *)results;

             for( unsigned int i = 0; i < channelCount; i++ )
                 errors[i] = Ulp_Error_Half( ptr[i], srcVector[i] );

             break;
         }

         case CL_FLOAT:
         {
             const cl_ushort *ptr = (const cl_ushort *)results;

             for( unsigned int i = 0; i < channelCount; i++ )
                 errors[i] = Ulp_Error( ptr[i], srcVector[i] );

             break;
         }

         case CL_SNORM_INT8:
         {
             const cl_char *ptr = (const cl_char *)results;

             for( unsigned int i = 0; i < channelCount; i++ )
                 errors[i] = ptr[i] - NORMALIZE_SIGNED_UNROUNDED( srcVector[ i ], -127.0f, 127.f );

             break;
         }
         case CL_SNORM_INT16:
         {
             const cl_short *ptr = (const cl_short *)results;

             for( unsigned int i = 0; i < channelCount; i++ )
                 errors[i] = ptr[i] - NORMALIZE_SIGNED_UNROUNDED( srcVector[ i ], -32767.f, 32767.f  );

             break;
         }
         case CL_UNORM_INT8:
         {
             const cl_uchar *ptr = (const cl_uchar *)results;

             for( unsigned int i = 0; i < channelCount; i++ )
                 errors[i] = ptr[i] - NORMALIZE_UNROUNDED( srcVector[ i ], 255.f  );

             break;
         }
         case CL_UNORM_INT16:
         {
             const cl_ushort *ptr = (const cl_ushort *)results;

             for( unsigned int i = 0; i < channelCount; i++ )
                 errors[i] = ptr[i] - NORMALIZE_UNROUNDED( srcVector[ i ], 65535.f  );

             break;
         }
         case CL_UNORM_SHORT_555:
         {
             const cl_ushort *ptr = (const cl_ushort *)results;

             errors[0] = ((ptr[0] >> 10) & 31) - NORMALIZE_UNROUNDED( srcVector[ 0 ], 31.f );
             errors[1] = ((ptr[0] >>  5) & 31) - NORMALIZE_UNROUNDED( srcVector[ 1 ], 31.f );
             errors[2] = ((ptr[0] >>  0) & 31) - NORMALIZE_UNROUNDED( srcVector[ 2 ], 31.f );

             break;
         }
         case CL_UNORM_SHORT_565:
         {
             const cl_ushort *ptr = (const cl_ushort *)results;

             errors[0] = ((ptr[0] >> 11) & 31) - NORMALIZE_UNROUNDED( srcVector[ 0 ], 31.f );
             errors[1] = ((ptr[0] >>  5) & 63) - NORMALIZE_UNROUNDED( srcVector[ 1 ], 63.f );
             errors[2] = ((ptr[0] >>  0) & 31) - NORMALIZE_UNROUNDED( srcVector[ 2 ], 31.f );

             break;
         }
         case CL_UNORM_INT_101010:
         {
             const cl_uint *ptr = (const cl_uint *)results;

             errors[0] = ((ptr[0] >> 20) & 1023) - NORMALIZE_UNROUNDED( srcVector[ 0 ], 1023.f );
             errors[1] = ((ptr[0] >> 10) & 1023) - NORMALIZE_UNROUNDED( srcVector[ 1 ], 1023.f );
             errors[2] = ((ptr[0] >>  0) & 1023) - NORMALIZE_UNROUNDED( srcVector[ 2 ], 1023.f );

             break;
         }
         case CL_SIGNED_INT8:
         {
             const cl_char *ptr = (const cl_char *)results;

             for( unsigned int i = 0; i < channelCount; i++ )
                 errors[ i ] = ptr[i] - CONVERT_INT( srcVector[ i ], -127.0f, 127.f, 127 );

             break;
         }
         case CL_SIGNED_INT16:
         {
             const cl_short *ptr = (const cl_short *)results;
             for( unsigned int i = 0; i < channelCount; i++ )
                 errors[i] = ptr[ i ] - CONVERT_INT( srcVector[ i ], -32767.f, 32767.f, 32767  );
             break;
         }
         case CL_SIGNED_INT32:
         {
             const cl_int *ptr = (const cl_int *)results;
             for( unsigned int i = 0; i < channelCount; i++ )
                 errors[i] = (cl_float)((cl_long) ptr[ i ] - (cl_long) CONVERT_INT( srcVector[ i ], MAKE_HEX_FLOAT( -0x1.0p31f, -1, 31), MAKE_HEX_FLOAT( 0x1.fffffep30f, 0x1fffffe, 30-23), CL_INT_MAX  ));
             break;
         }
         case CL_UNSIGNED_INT8:
         {
             const cl_uchar *ptr = (const cl_uchar *)results;
             for( unsigned int i = 0; i < channelCount; i++ )
                 errors[i] = (cl_int) ptr[ i ] - (cl_int) CONVERT_UINT( srcVector[ i ], 255.f, CL_UCHAR_MAX );
             break;
         }
         case CL_UNSIGNED_INT16:
         {
             const cl_ushort *ptr = (const cl_ushort *)results;
             for( unsigned int i = 0; i < channelCount; i++ )
                 errors[i] = (cl_int) ptr[ i ] - (cl_int) CONVERT_UINT( srcVector[ i ], 32767.f, CL_USHRT_MAX );
             break;
         }
         case CL_UNSIGNED_INT32:
         {
             const cl_uint *ptr = (const cl_uint *)results;
             for( unsigned int i = 0; i < channelCount; i++ )
                 errors[i] = (cl_float)((cl_long) ptr[ i ] - (cl_long)CONVERT_UINT( srcVector[ i ], MAKE_HEX_FLOAT( 0x1.fffffep31f, 0x1fffffe, 31-23), CL_UINT_MAX  ));
             break;
         }
 #ifdef CL_SFIXED14_APPLE
         case CL_SFIXED14_APPLE:
         {
             const cl_ushort *ptr = (const cl_ushort *)results;

             for( unsigned int i = 0; i < channelCount; i++ )
                 errors[i] = ptr[i] - NORMALIZE_SIGNED_UNROUNDED( ((int) srcVector[ i ] - 16384), -16384.f, 49151.f  );

             break;
         }
 #endif
         default:
             log_error( "INTERNAL ERROR: unknown format (%d)\n", imageFormat->image_channel_data_type);
             exit(-1);
             break;
     }
 }


 //
 //  Autodetect which rounding mode is used for image writes to CL_HALF_FLOAT
 //  This should be called lazily before attempting to verify image writes, otherwise an error will occur.
 //
 int  DetectFloatToHalfRoundingMode( cl_command_queue q )  // Returns CL_SUCCESS on success
 {
     cl_int err = CL_SUCCESS;

     if( gFloatToHalfRoundingMode == kDefaultRoundingMode )
     {
         // Some numbers near 0.5f, that we look at to see how the values are rounded.
         static const cl_uint  inData[4*4] = {   0x3f000fffU, 0x3f001000U, 0x3f001001U, 0U, 0x3f001fffU, 0x3f002000U, 0x3f002001U, 0U,
                                                 0x3f002fffU, 0x3f003000U, 0x3f003001U, 0U, 0x3f003fffU, 0x3f004000U, 0x3f004001U, 0U    };
         static const size_t count = sizeof( inData ) / (4*sizeof( inData[0] ));
         const float *inp = (const float*) inData;
         cl_context context = NULL;

     // Create an input buffer
         err = clGetCommandQueueInfo( q, CL_QUEUE_CONTEXT, sizeof(context), &context, NULL );
         if( err )
         {
             log_error( "Error:  could not get context from command queue in DetectFloatToHalfRoundingMode  (%d)", err );
             return err;
         }

         cl_mem inBuf = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR, sizeof( inData ), (void*) inData, &err );
         if( NULL == inBuf || err )
         {
             log_error( "Error:  could not create input buffer in DetectFloatToHalfRoundingMode  (err: %d)", err );
             return err;
         }

     // Create a small output image
         cl_image_format fmt = { CL_RGBA, CL_HALF_FLOAT };
         cl_mem outImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &fmt, count, 1, 0, NULL, &err );
         if( NULL == outImage || err )
         {
             log_error( "Error:  could not create half float out image in DetectFloatToHalfRoundingMode  (err: %d)", err );
             clReleaseMemObject( inBuf );
             return err;
         }

     // Create our program, and a kernel
         const char *kernel[1] = {
             "kernel void detect_round( global float4 *in, write_only image2d_t out )\n"
             "{\n"
             "   write_imagef( out, (int2)(get_global_id(0),0), in[get_global_id(0)] );\n"
             "}\n" };

         clProgramWrapper program;
         err = create_single_kernel_helper_create_program(context, &program, 1, kernel);

         if( NULL == program || err )
         {
             log_error( "Error:  could not create program in DetectFloatToHalfRoundingMode (err: %d)", err );
             clReleaseMemObject( inBuf );
             clReleaseMemObject( outImage );
             return err;
         }

         cl_device_id device = NULL;
         err = clGetCommandQueueInfo( q, CL_QUEUE_DEVICE, sizeof(device), &device, NULL );
         if( err )
         {
             log_error( "Error:  could not get device from command queue in DetectFloatToHalfRoundingMode  (%d)", err );
             clReleaseMemObject( inBuf );
             clReleaseMemObject( outImage );
             return err;
         }

         err = clBuildProgram( program, 1, &device, "", NULL, NULL );
         if( err )
         {
             log_error( "Error:  could not build program in DetectFloatToHalfRoundingMode  (%d)", err );
             clReleaseMemObject( inBuf );
             clReleaseMemObject( outImage );
             return err;
         }

         cl_kernel k = clCreateKernel( program, "detect_round", &err );
         if( NULL == k || err )
         {
             log_error( "Error:  could not create kernel in DetectFloatToHalfRoundingMode  (%d)", err );
             clReleaseMemObject( inBuf );
             clReleaseMemObject( outImage );
             return err;
         }

         err = clSetKernelArg( k, 0, sizeof( cl_mem ), &inBuf );
         if( err )
         {
             log_error( "Error: could not set argument 0 of kernel in DetectFloatToHalfRoundingMode (%d)", err );
             clReleaseMemObject( inBuf );
             clReleaseMemObject( outImage );
             clReleaseKernel( k );
             return err;
         }

         err = clSetKernelArg( k, 1, sizeof( cl_mem ), &outImage );
         if( err )
         {
             log_error( "Error: could not set argument 1 of kernel in DetectFloatToHalfRoundingMode (%d)", err );
             clReleaseMemObject( inBuf );
             clReleaseMemObject( outImage );
             clReleaseKernel( k );
             return err;
         }

     // Run the kernel
         size_t global_work_size = count;
         err = clEnqueueNDRangeKernel( q, k, 1, NULL, &global_work_size, NULL, 0, NULL, NULL );
         if( err )
         {
             log_error( "Error: could not enqueue kernel in DetectFloatToHalfRoundingMode (%d)", err );
             clReleaseMemObject( inBuf );
             clReleaseMemObject( outImage );
             clReleaseKernel( k );
             return err;
         }

     // read the results
         cl_ushort outBuf[count*4];
         memset( outBuf, -1, sizeof( outBuf ) );
         size_t origin[3] = {0,0,0};
         size_t region[3] = {count,1,1};
         err = clEnqueueReadImage( q, outImage, CL_TRUE, origin, region, 0, 0, outBuf, 0, NULL, NULL );
         if( err )
         {
             log_error( "Error: could not read output image in DetectFloatToHalfRoundingMode (%d)", err );
             clReleaseMemObject( inBuf );
             clReleaseMemObject( outImage );
             clReleaseKernel( k );
             return err;
         }

     // Generate our list of reference results
         cl_ushort rte_ref[count*4];
         cl_ushort rtz_ref[count*4];
         for( size_t i = 0; i < 4 * count; i++ )
         {
             rte_ref[i] = float2half_rte( inp[i] );
             rtz_ref[i] = float2half_rtz( inp[i] );
         }

     // Verify that we got something in either rtz or rte mode
         if( 0 == memcmp( rte_ref, outBuf, sizeof( rte_ref )) )
         {
             log_info( "Autodetected float->half rounding mode to be rte\n" );
             gFloatToHalfRoundingMode = kRoundToNearestEven;
         }
         else if ( 0 == memcmp( rtz_ref, outBuf, sizeof( rtz_ref )) )
         {
             log_info( "Autodetected float->half rounding mode to be rtz\n" );
             gFloatToHalfRoundingMode = kRoundTowardZero;
         }
         else
         {
             log_error( "ERROR: float to half conversions proceed with invalid rounding mode!\n" );
             log_info( "\nfor:" );
             for( size_t i = 0; i < count; i++ )
                 log_info( " {%a, %a, %a, %a},", inp[4*i], inp[4*i+1], inp[4*i+2], inp[4*i+3] );
             log_info( "\ngot:" );
             for( size_t i = 0; i < count; i++ )
                 log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", outBuf[4*i], outBuf[4*i+1], outBuf[4*i+2], outBuf[4*i+3] );
             log_info( "\nrte:" );
             for( size_t i = 0; i < count; i++ )
                 log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", rte_ref[4*i], rte_ref[4*i+1], rte_ref[4*i+2], rte_ref[4*i+3] );
             log_info( "\nrtz:" );
             for( size_t i = 0; i < count; i++ )
                 log_info( " {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},", rtz_ref[4*i], rtz_ref[4*i+1], rtz_ref[4*i+2], rtz_ref[4*i+3] );
             log_info( "\n" );
             err = -1;
             gFloatToHalfRoundingMode = kRoundingModeCount;  // illegal value
         }

     // clean up
         clReleaseMemObject( inBuf );
         clReleaseMemObject( outImage );
         clReleaseKernel( k );
         return err;
     }

     // Make sure that the rounding mode was successfully detected, if we checked earlier
     if( gFloatToHalfRoundingMode != kRoundToNearestEven && gFloatToHalfRoundingMode != kRoundTowardZero)
         return -2;

     return err;
 }

 char *create_random_image_data( ExplicitType dataType, image_descriptor *imageInfo, BufferOwningPtr<char> &P, MTdata d, bool image2DFromBuffer )
 {
   size_t allocSize, numPixels;
   if ( /*gTestMipmaps*/ imageInfo->num_mip_levels > 1 )
   {
     allocSize = (size_t) (compute_mipmapped_image_size(*imageInfo) * 4 * get_explicit_type_size( dataType ))/get_pixel_size(imageInfo->format);
     numPixels = allocSize / (get_explicit_type_size( dataType ) * 4);
   }
   else
   {
     numPixels = (image2DFromBuffer? imageInfo->rowPitch: imageInfo->width) * imageInfo->height
       * (imageInfo->depth ? imageInfo->depth : 1)
       * (imageInfo->arraySize ? imageInfo->arraySize : 1);
     allocSize = numPixels * 4 * get_explicit_type_size( dataType );
   }

 #if 0 // DEBUG
     {
       fprintf(stderr,"--- create_random_image_data:\n");
       fprintf(stderr,"allocSize = %zu\n",allocSize);
       fprintf(stderr,"numPixels = %zu\n",numPixels);
       fprintf(stderr,"width = %zu\n",imageInfo->width);
       fprintf(stderr,"height = %zu\n",imageInfo->height);
       fprintf(stderr,"depth = %zu\n",imageInfo->depth);
       fprintf(stderr,"rowPitch = %zu\n",imageInfo->rowPitch);
       fprintf(stderr,"slicePitch = %zu\n",imageInfo->slicePitch);
       fprintf(stderr,"arraySize = %zu\n",imageInfo->arraySize);
       fprintf(stderr,"explicit_type_size = %zu\n",get_explicit_type_size(dataType));
     }
 #endif

 #if defined( __APPLE__ )
     char *data = NULL;
     if (gDeviceType == CL_DEVICE_TYPE_CPU) {
       size_t mapSize = ((allocSize + 4095L) & -4096L) + 8192; // alloc two extra pages.

       void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
       if (map == MAP_FAILED)
       {
         perror("create_random_image_data: mmap");
         log_error("%s:%d: mmap failed, mapSize = %zu\n",__FILE__,__LINE__,mapSize);
       }
       intptr_t data_end = (intptr_t)map + mapSize - 4096;
       data = (char *)(data_end - (intptr_t)allocSize);

       mprotect(map, 4096, PROT_NONE);
       mprotect((void *)((char *)map + mapSize - 4096), 4096, PROT_NONE);
       P.reset(data, map, mapSize);
     } else {
       data = (char *)malloc(allocSize);
       P.reset(data);
     }
 #else
     char *data = (char *)align_malloc(allocSize, get_pixel_size(imageInfo->format));
     P.reset(data,NULL,0,allocSize,true);
 #endif

     if (data == NULL) {
         log_error( "ERROR: Unable to malloc %lu bytes for create_random_image_data\n", allocSize );
         return NULL;
     }

     switch( dataType )
     {
         case kFloat:
         {
             float *inputValues = (float *)data;
             switch (imageInfo->format->image_channel_data_type)
             {
                 case CL_HALF_FLOAT:
                     {
                         // Generate data that is (mostly) inside the range of a half float
                         // const float HALF_MIN = 5.96046448e-08f;
                         const float HALF_MAX = 65504.0f;

                         size_t i = 0;
                         inputValues[ i++ ] = 0.f;
                         inputValues[ i++ ] = 1.f;
                         inputValues[ i++ ] = -1.f;
                         inputValues[ i++ ] = 2.f;
                         for( ; i < numPixels * 4; i++ )
                             inputValues[ i ] = get_random_float( -HALF_MAX - 2.f, HALF_MAX + 2.f, d );
                     }
                     break;
 #ifdef CL_SFIXED14_APPLE
                 case CL_SFIXED14_APPLE:
                     {
                         size_t i = 0;
                         if( numPixels * 4 >= 8 )
                         {
                             inputValues[ i++ ] = INFINITY;
                             inputValues[ i++ ] = 0x1.0p14f;
                             inputValues[ i++ ] = 0x1.0p31f;
                             inputValues[ i++ ] = 0x1.0p32f;
                             inputValues[ i++ ] = -INFINITY;
                             inputValues[ i++ ] = -0x1.0p14f;
                             inputValues[ i++ ] = -0x1.0p31f;
                             inputValues[ i++ ] = -0x1.1p31f;
                         }
                         for( ; i < numPixels * 4; i++ )
                             inputValues[ i ] = get_random_float( -1.1f, 3.1f, d );
                     }
                     break;
 #endif
                 case CL_FLOAT:
                     {
                         size_t i = 0;
                         inputValues[ i++ ] = INFINITY;
                         inputValues[ i++ ] = -INFINITY;
                         inputValues[ i++ ] = 0.0f;
                         inputValues[ i++ ] = 0.0f;
                         cl_uint *p = (cl_uint *)data;
                         for( ; i < numPixels * 4; i++ )
                             p[ i ] = genrand_int32(d);
                     }
                     break;

                 default:
                     size_t i = 0;
                     if( numPixels * 4 >= 36 )
                     {
                         inputValues[ i++ ] = 0.0f;
                         inputValues[ i++ ] = 0.5f;
                         inputValues[ i++ ] = 31.5f;
                         inputValues[ i++ ] = 32.0f;
                         inputValues[ i++ ] = 127.5f;
                         inputValues[ i++ ] = 128.0f;
                         inputValues[ i++ ] = 255.5f;
                         inputValues[ i++ ] = 256.0f;
                         inputValues[ i++ ] = 1023.5f;
                         inputValues[ i++ ] = 1024.0f;
                         inputValues[ i++ ] = 32767.5f;
                         inputValues[ i++ ] = 32768.0f;
                         inputValues[ i++ ] = 65535.5f;
                         inputValues[ i++ ] = 65536.0f;
                         inputValues[ i++ ] = 2147483648.0f;
                         inputValues[ i++ ] = 4294967296.0f;
                         inputValues[ i++ ] = MAKE_HEX_FLOAT( 0x1.0p63f, 1, 63 );
                         inputValues[ i++ ] = MAKE_HEX_FLOAT( 0x1.0p64f, 1, 64 );
                         inputValues[ i++ ] = -0.0f;
                         inputValues[ i++ ] = -0.5f;
                         inputValues[ i++ ] = -31.5f;
                         inputValues[ i++ ] = -32.0f;
                         inputValues[ i++ ] = -127.5f;
                         inputValues[ i++ ] = -128.0f;
                         inputValues[ i++ ] = -255.5f;
                         inputValues[ i++ ] = -256.0f;
                         inputValues[ i++ ] = -1023.5f;
                         inputValues[ i++ ] = -1024.0f;
                         inputValues[ i++ ] = -32767.5f;
                         inputValues[ i++ ] = -32768.0f;
                         inputValues[ i++ ] = -65535.5f;
                         inputValues[ i++ ] = -65536.0f;
                         inputValues[ i++ ] = -2147483648.0f;
                         inputValues[ i++ ] = -4294967296.0f;
                         inputValues[ i++ ] = -MAKE_HEX_FLOAT( 0x1.0p63f, 1, 63 );
                         inputValues[ i++ ] = -MAKE_HEX_FLOAT( 0x1.0p64f, 1, 64 );
                     }
                     if( is_format_signed(imageInfo->format) )
                     {
                         for( ; i < numPixels * 4; i++ )
                             inputValues[ i ] = get_random_float( -1.1f, 1.1f, d );
                     }
                     else
                     {
                         for( ; i < numPixels * 4; i++ )
                             inputValues[ i ] = get_random_float( -0.1f, 1.1f, d );
                     }
                     break;
             }
             break;
         }

         case kInt:
         {
             int *imageData = (int *)data;

             // We want to generate ints (mostly) in range of the target format
             int formatMin = get_format_min_int( imageInfo->format );
             size_t formatMax = get_format_max_int( imageInfo->format );
             if( formatMin == 0 )
             {
                 // Unsigned values, but we are only an int, so cap the actual max at the max of signed ints
                 if( formatMax > 2147483647L )
                     formatMax = 2147483647L;
             }
             // If the final format is small enough, give us a bit of room for out-of-range values to test
             if( formatMax < 2147483647L )
                 formatMax += 2;
             if( formatMin > -2147483648LL )
                 formatMin -= 2;

             // Now gen
             for( size_t i = 0; i < numPixels * 4; i++ )
             {
                 imageData[ i ] = random_in_range( formatMin, (int)formatMax, d );
             }
             break;
         }

         case kUInt:
         case kUnsignedInt:
         {
             unsigned int *imageData = (unsigned int *)data;

             // We want to generate ints (mostly) in range of the target format
             int formatMin = get_format_min_int( imageInfo->format );
             size_t formatMax = get_format_max_int( imageInfo->format );
             if( formatMin < 0 )
                 formatMin = 0;
             // If the final format is small enough, give us a bit of room for out-of-range values to test
             if( formatMax < 4294967295LL )
                 formatMax += 2;

             // Now gen
             for( size_t i = 0; i < numPixels * 4; i++ )
             {
                 imageData[ i ] = random_in_range( formatMin, (int)formatMax, d );
             }
             break;
         }
         default:
             // Unsupported source format
             delete [] data;
             return NULL;
     }

     return data;
 }

 /*
     deprecated
 bool clamp_image_coord( image_sampler_data *imageSampler, float value, size_t max, int &outValue )
 {
     int v = (int)value;

     switch(imageSampler->addressing_mode)
     {
         case CL_ADDRESS_REPEAT:
             outValue = v;
             while( v < 0 )
                 v += (int)max;
             while( v >= (int)max )
                 v -= (int)max;
             if( v != outValue )
             {
                 outValue = v;
                 return true;
             }
             return false;

         case CL_ADDRESS_MIRRORED_REPEAT:
             log_info( "ERROR: unimplemented for CL_ADDRESS_MIRRORED_REPEAT. Do we ever use this?
             exit(-1);

         default:
             if( v < 0 )
             {
                 outValue = 0;
                 return true;
             }
             if( v >= (int)max )
             {
                 outValue = (int)max - 1;
                 return true;
             }
             outValue = v;
             return false;
     }

 }
 */

 void get_sampler_kernel_code( image_sampler_data *imageSampler, char *outLine )
 {
     const char *normalized;
     const char *addressMode;
     const char *filterMode;

     if( imageSampler->addressing_mode == CL_ADDRESS_CLAMP )
         addressMode = "CLK_ADDRESS_CLAMP";
     else if( imageSampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE )
         addressMode = "CLK_ADDRESS_CLAMP_TO_EDGE";
     else if( imageSampler->addressing_mode == CL_ADDRESS_REPEAT )
         addressMode = "CLK_ADDRESS_REPEAT";
     else if( imageSampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT )
         addressMode = "CLK_ADDRESS_MIRRORED_REPEAT";
     else if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
         addressMode = "CLK_ADDRESS_NONE";
     else
     {
         log_error( "**Error: Unknown addressing mode! Aborting...\n" );
         abort();
     }

     if( imageSampler->normalized_coords )
         normalized = "CLK_NORMALIZED_COORDS_TRUE";
     else
         normalized = "CLK_NORMALIZED_COORDS_FALSE";

     if( imageSampler->filter_mode == CL_FILTER_LINEAR )
         filterMode = "CLK_FILTER_LINEAR";
     else
         filterMode = "CLK_FILTER_NEAREST";

     sprintf( outLine, "    const sampler_t imageSampler = %s | %s | %s;\n", addressMode, filterMode, normalized );
 }

 void copy_image_data( image_descriptor *srcImageInfo, image_descriptor *dstImageInfo, void *imageValues, void *destImageValues,
                      const size_t sourcePos[], const size_t destPos[], const size_t regionSize[] )
 {
   //  assert( srcImageInfo->format == dstImageInfo->format );

   size_t src_mip_level_offset = 0, dst_mip_level_offset = 0;
   size_t sourcePos_lod[3], destPos_lod[3], src_lod, dst_lod;
   size_t src_row_pitch_lod, src_slice_pitch_lod;
   size_t dst_row_pitch_lod, dst_slice_pitch_lod;

   size_t pixelSize = get_pixel_size( srcImageInfo->format );

   sourcePos_lod[0] = sourcePos[0];
   sourcePos_lod[1] = sourcePos[1];
   sourcePos_lod[2] = sourcePos[2];
   destPos_lod[0] = destPos[0];
   destPos_lod[1] = destPos[1];
   destPos_lod[2] = destPos[2];
   src_row_pitch_lod = srcImageInfo->rowPitch;
   dst_row_pitch_lod = dstImageInfo->rowPitch;
   src_slice_pitch_lod = srcImageInfo->slicePitch;
   dst_slice_pitch_lod = dstImageInfo->slicePitch;

   if( srcImageInfo->num_mip_levels > 1)
   {
     size_t src_width_lod = 1/*srcImageInfo->width*/;
     size_t src_height_lod = 1/*srcImageInfo->height*/;
     size_t src_depth_lod = 1/*srcImageInfo->depth*/;

     switch( srcImageInfo->type )
     {
     case CL_MEM_OBJECT_IMAGE1D:
       src_lod = sourcePos[1];
       sourcePos_lod[1] = sourcePos_lod[2] = 0;
       src_width_lod = (srcImageInfo->width >> src_lod ) ? ( srcImageInfo->width >> src_lod ): 1;
       break;
     case CL_MEM_OBJECT_IMAGE1D_ARRAY:
     case CL_MEM_OBJECT_IMAGE2D:
       src_lod = sourcePos[2];
       sourcePos_lod[1] = sourcePos[1];
       sourcePos_lod[2] = 0;
       src_width_lod = (srcImageInfo->width >> src_lod ) ? ( srcImageInfo->width >> src_lod ): 1;
       if( srcImageInfo->type == CL_MEM_OBJECT_IMAGE2D )
         src_height_lod = (srcImageInfo->height >> src_lod ) ? ( srcImageInfo->height >> src_lod ): 1;
       break;
     case CL_MEM_OBJECT_IMAGE2D_ARRAY:
     case CL_MEM_OBJECT_IMAGE3D:
       src_lod = sourcePos[3];
       sourcePos_lod[1] = sourcePos[1];
       sourcePos_lod[2] = sourcePos[2];
       src_width_lod = (srcImageInfo->width >> src_lod ) ? ( srcImageInfo->width >> src_lod ): 1;
       src_height_lod = (srcImageInfo->height >> src_lod ) ? ( srcImageInfo->height >> src_lod ): 1;
       if( srcImageInfo->type == CL_MEM_OBJECT_IMAGE3D )
         src_depth_lod = (srcImageInfo->depth >> src_lod ) ? ( srcImageInfo->depth >> src_lod ): 1;
       break;

     }
     src_mip_level_offset = compute_mip_level_offset( srcImageInfo, src_lod );
     src_row_pitch_lod = src_width_lod * get_pixel_size( srcImageInfo->format );
     src_slice_pitch_lod = src_row_pitch_lod * src_height_lod;
   }

   if( dstImageInfo->num_mip_levels > 1)
   {
     size_t dst_width_lod = 1/*dstImageInfo->width*/;
     size_t dst_height_lod = 1/*dstImageInfo->height*/;
     size_t dst_depth_lod = 1 /*dstImageInfo->depth*/;
     switch( dstImageInfo->type )
     {
     case CL_MEM_OBJECT_IMAGE1D:
       dst_lod = destPos[1];
       destPos_lod[1] = destPos_lod[2] = 0;
       dst_width_lod = (dstImageInfo->width >> dst_lod ) ? ( dstImageInfo->width >> dst_lod ): 1;
       break;
     case CL_MEM_OBJECT_IMAGE1D_ARRAY:
     case CL_MEM_OBJECT_IMAGE2D:
       dst_lod = destPos[2];
       destPos_lod[1] = destPos[1];
       destPos_lod[2] = 0;
       dst_width_lod = (dstImageInfo->width >> dst_lod ) ? ( dstImageInfo->width >> dst_lod ): 1;
       if( dstImageInfo->type == CL_MEM_OBJECT_IMAGE2D )
         dst_height_lod = (dstImageInfo->height >> dst_lod ) ? ( dstImageInfo->height >> dst_lod ): 1;
       break;
     case CL_MEM_OBJECT_IMAGE2D_ARRAY:
     case CL_MEM_OBJECT_IMAGE3D:
       dst_lod = destPos[3];
       destPos_lod[1] = destPos[1];
       destPos_lod[2] = destPos[2];
       dst_width_lod = (dstImageInfo->width >> dst_lod ) ? ( dstImageInfo->width >> dst_lod ): 1;
       dst_height_lod = (dstImageInfo->height >> dst_lod ) ? ( dstImageInfo->height >> dst_lod ): 1;
       if( dstImageInfo->type == CL_MEM_OBJECT_IMAGE3D )
         dst_depth_lod = (dstImageInfo->depth >> dst_lod ) ? ( dstImageInfo->depth >> dst_lod ): 1;
       break;

     }
     dst_mip_level_offset = compute_mip_level_offset( dstImageInfo, dst_lod );
     dst_row_pitch_lod = dst_width_lod * get_pixel_size( dstImageInfo->format);
     dst_slice_pitch_lod = dst_row_pitch_lod * dst_height_lod;
   }

   // Get initial pointers
   char *sourcePtr = (char *)imageValues + sourcePos_lod[ 2 ] * src_slice_pitch_lod + sourcePos_lod[ 1 ] * src_row_pitch_lod + pixelSize * sourcePos_lod[ 0 ] + src_mip_level_offset;
   char *destPtr = (char *)destImageValues + destPos_lod[ 2 ] * dst_slice_pitch_lod + destPos_lod[ 1 ] * dst_row_pitch_lod + pixelSize * destPos_lod[ 0 ] + dst_mip_level_offset;

   for( size_t z = 0; z < ( regionSize[ 2 ] > 0 ? regionSize[ 2 ] : 1 ); z++ )
   {
     char *rowSourcePtr = sourcePtr;
     char *rowDestPtr = destPtr;
     for( size_t y = 0; y < regionSize[ 1 ]; y++ )
     {
       memcpy( rowDestPtr, rowSourcePtr, pixelSize * regionSize[ 0 ] );
       rowSourcePtr += src_row_pitch_lod;
       rowDestPtr += dst_row_pitch_lod;
     }

     sourcePtr += src_slice_pitch_lod;
     destPtr += dst_slice_pitch_lod;
   }
 }

 float random_float(float low, float high, MTdata d)
 {
     float t = (float) genrand_real1(d);
     return (1.0f - t) * low + t * high;
 }

 CoordWalker::CoordWalker( void * coords, bool useFloats, size_t vecSize )
 {
     if( useFloats )
     {
         mFloatCoords = (cl_float *)coords;
         mIntCoords = NULL;
     }
     else
     {
         mFloatCoords = NULL;
         mIntCoords = (cl_int *)coords;
     }
     mVecSize = vecSize;
 }

 CoordWalker::~CoordWalker()
 {
 }

 cl_float CoordWalker::Get( size_t idx, size_t el )
 {
     if( mIntCoords != NULL )
         return (cl_float)mIntCoords[ idx * mVecSize + el ];
     else
         return mFloatCoords[ idx * mVecSize + el ];
 }


 void print_read_header( cl_image_format *format, image_sampler_data *sampler, bool err, int t )
 {
     const char *addressMode = NULL;
     const char *normalizedNames[2] = { "UNNORMALIZED", "NORMALIZED" };

     if( sampler->addressing_mode == CL_ADDRESS_CLAMP )
         addressMode = "CL_ADDRESS_CLAMP";
     else if( sampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE )
         addressMode = "CL_ADDRESS_CLAMP_TO_EDGE";
     else if( sampler->addressing_mode == CL_ADDRESS_REPEAT )
         addressMode = "CL_ADDRESS_REPEAT";
     else if( sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT )
         addressMode = "CL_ADDRESS_MIRRORED_REPEAT";
     else
         addressMode = "CL_ADDRESS_NONE";

     if( t )
     {
         if( err )
             log_error( "[%-7s %-24s %d] - %s - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
                       GetChannelTypeName( format->image_channel_data_type ),
                       (int)get_format_channel_count( format ),
                       sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
                       addressMode,
                       normalizedNames[sampler->normalized_coords ? 1 : 0],
                       t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED" );
         else
             log_info( "[%-7s %-24s %d] - %s - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
                      GetChannelTypeName( format->image_channel_data_type ),
                      (int)get_format_channel_count( format ),
                      sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
                      addressMode,
                      normalizedNames[sampler->normalized_coords ? 1 : 0],
                      t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED" );
     }
     else
     {
         if( err )
             log_error( "[%-7s %-24s %d] - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
                       GetChannelTypeName( format->image_channel_data_type ),
                       (int)get_format_channel_count( format ),
                       sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
                       addressMode,
                       normalizedNames[sampler->normalized_coords ? 1 : 0] );
         else
             log_info( "[%-7s %-24s %d] - %s - %s - %s\n", GetChannelOrderName( format->image_channel_order ),
                      GetChannelTypeName( format->image_channel_data_type ),
                      (int)get_format_channel_count( format ),
                      sampler->filter_mode == CL_FILTER_NEAREST ? "CL_FILTER_NEAREST" : "CL_FILTER_LINEAR",
                      addressMode,
                      normalizedNames[sampler->normalized_coords ? 1 : 0] );
     }

 }

 void print_write_header( cl_image_format *format, bool err = false)
 {
     if( err )
         log_error( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
                   GetChannelTypeName( format->image_channel_data_type ),
                   (int)get_format_channel_count( format ) );
     else
         log_info( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
                  GetChannelTypeName( format->image_channel_data_type ),
                  (int)get_format_channel_count( format ) );
 }


 void print_header( cl_image_format *format, bool err = false )
 {
     if (err) {
         log_error( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
                   GetChannelTypeName( format->image_channel_data_type ),
                   (int)get_format_channel_count( format ) );
     } else {
         log_info( "[%-7s %-24s %d]\n", GetChannelOrderName( format->image_channel_order ),
                  GetChannelTypeName( format->image_channel_data_type ),
                  (int)get_format_channel_count( format ) );
     }
 }

 bool find_format( cl_image_format *formatList, unsigned int numFormats, cl_image_format *formatToFind )
 {
     for( unsigned int i = 0; i < numFormats; i++ )
     {
         if( formatList[ i ].image_channel_order == formatToFind->image_channel_order &&
            formatList[ i ].image_channel_data_type == formatToFind->image_channel_data_type )
             return true;
     }
     return false;
 }

 void build_required_image_formats(cl_mem_flags flags,
                                   cl_mem_object_type image_type,
                                   cl_device_id device,
                                   std::vector<cl_image_format>& formatsToSupport)
 {
 	Version version = get_device_cl_version(device);

 	formatsToSupport.clear();

 	// Required embedded formats.
 	static std::vector<cl_image_format> embeddedProfReadOrWriteFormats
 	{
 		{ CL_RGBA, CL_UNORM_INT8 },
 		{ CL_RGBA, CL_UNORM_INT16 },
 		{ CL_RGBA, CL_SIGNED_INT8 },
 		{ CL_RGBA, CL_SIGNED_INT16 },
 		{ CL_RGBA, CL_SIGNED_INT32 },
 		{ CL_RGBA, CL_UNSIGNED_INT8 },
 		{ CL_RGBA, CL_UNSIGNED_INT16 },
 		{ CL_RGBA, CL_UNSIGNED_INT32 },
 		{ CL_RGBA, CL_HALF_FLOAT },
 		{ CL_RGBA, CL_FLOAT },
 	};

 	/*
 		Required full profile formats.
 		This array does not contain any full profile
 		formats that have restrictions on when they
 		are required.
 	*/
 	static std::vector<cl_image_format> fullProfReadOrWriteFormats
 	{
 		{ CL_RGBA, CL_UNORM_INT8 },
 		{ CL_RGBA, CL_UNORM_INT16 },
 		{ CL_RGBA, CL_SIGNED_INT8 },
 		{ CL_RGBA, CL_SIGNED_INT16 },
 		{ CL_RGBA, CL_SIGNED_INT32 },
 		{ CL_RGBA, CL_UNSIGNED_INT8 },
 		{ CL_RGBA, CL_UNSIGNED_INT16 },
 		{ CL_RGBA, CL_UNSIGNED_INT32 },
 		{ CL_RGBA, CL_HALF_FLOAT },
 		{ CL_RGBA, CL_FLOAT },
 		{ CL_BGRA, CL_UNORM_INT8 },
 	};

 	/*
 		Required full profile formats specifically for 2.x.
 		This array does not contain any full profile
 		formats that have restrictions on when they
 		are required.
 	*/
 	static std::vector<cl_image_format> fullProf2XReadOrWriteFormats
 	{
 		{ CL_R, CL_UNORM_INT8 },
 		{ CL_R, CL_UNORM_INT16 },
 		{ CL_R, CL_SNORM_INT8 },
 		{ CL_R, CL_SNORM_INT16 },
 		{ CL_R, CL_SIGNED_INT8 },
 		{ CL_R, CL_SIGNED_INT16 },
 		{ CL_R, CL_SIGNED_INT32 },
 		{ CL_R, CL_UNSIGNED_INT8 },
 		{ CL_R, CL_UNSIGNED_INT16 },
 		{ CL_R, CL_UNSIGNED_INT32 },
 		{ CL_R, CL_HALF_FLOAT },
 		{ CL_R, CL_FLOAT },
 		{ CL_RG, CL_UNORM_INT8 },
 		{ CL_RG, CL_UNORM_INT16 },
 		{ CL_RG, CL_SNORM_INT8 },
 		{ CL_RG, CL_SNORM_INT16 },
 		{ CL_RG, CL_SIGNED_INT8 },
 		{ CL_RG, CL_SIGNED_INT16 },
 		{ CL_RG, CL_SIGNED_INT32 },
 		{ CL_RG, CL_UNSIGNED_INT8 },
 		{ CL_RG, CL_UNSIGNED_INT16 },
 		{ CL_RG, CL_UNSIGNED_INT32 },
 		{ CL_RG, CL_HALF_FLOAT },
 		{ CL_RG, CL_FLOAT },
 		{ CL_RGBA, CL_SNORM_INT8 },
 		{ CL_RGBA, CL_SNORM_INT16 },
 	};

 	/*
 		Required full profile formats for CL_DEPTH
 		(specifically 2.x).
 		There are cases whereby the format isn't required.
 	*/
 	static std::vector<cl_image_format> fullProf2XReadOrWriteDepthFormats
 	{
 		{ CL_DEPTH, CL_UNORM_INT16 },
 		{ CL_DEPTH, CL_FLOAT },
 	};

 	/*
 		Required full profile formats for CL_sRGB
 		(specifically 2.x).
 		There are cases whereby the format isn't required.
 	*/
 	static std::vector<cl_image_format> fullProf2XSRGBFormats
 	{
 		{ CL_sRGBA, CL_UNORM_INT8 },
 	};

 	// Embedded profile
 	if (gIsEmbedded)
 	{
 		copy(embeddedProfReadOrWriteFormats.begin(),
 		     embeddedProfReadOrWriteFormats.end(),
 		     back_inserter(formatsToSupport));
 	}
 	// Full profile
 	else
 	{
 		copy(fullProfReadOrWriteFormats.begin(),
 		     fullProfReadOrWriteFormats.end(),
 		     back_inserter(formatsToSupport));
 	}

 	// Full profile, OpenCL 2.0, 2.1, 2.2
 	if (!gIsEmbedded && version >= Version(2, 0) && version <= Version(2, 2))
 	{
 		copy(fullProf2XReadOrWriteFormats.begin(),
 		     fullProf2XReadOrWriteFormats.end(),
 		     back_inserter(formatsToSupport));

 		// Depth images are only required for 2DArray and 2D images
 		if (image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D)
 		{
 			copy(fullProf2XReadOrWriteDepthFormats.begin(),
 			     fullProf2XReadOrWriteDepthFormats.end(),
 			     back_inserter(formatsToSupport));
 		}

 		// sRGB is not required for 1DImage Buffers
 		if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER)
 		{
 			// sRGB is only required for reading
 			if (flags == CL_MEM_READ_ONLY)
 			{
 				copy(fullProf2XSRGBFormats.begin(),
 				     fullProf2XSRGBFormats.end(),
 				     back_inserter(formatsToSupport));
 			}
 		}
 	}
 }

 bool is_image_format_required(cl_image_format format,
                               cl_mem_flags flags,
                               cl_mem_object_type image_type,
                               cl_device_id device)
 {
 	std::vector<cl_image_format> formatsToSupport;
 	build_required_image_formats(flags, image_type, device, formatsToSupport);

 	for (auto &formatItr: formatsToSupport)
 	{
 		if (formatItr.image_channel_order == format.image_channel_order &&
 		    formatItr.image_channel_data_type == format.image_channel_data_type)
 		{
 			return true;
 		}
 	}

 	return false;
 }

 cl_uint compute_max_mip_levels( size_t width, size_t height, size_t depth)
 {
   cl_uint retMaxMipLevels=0, max_dim = 0;

   max_dim = width;
   max_dim = height > max_dim ? height : max_dim;
   max_dim = depth > max_dim ? depth : max_dim;

   while(max_dim) {
     retMaxMipLevels++;
     max_dim >>= 1;
   }
   return retMaxMipLevels;
 }

 cl_ulong compute_mipmapped_image_size( image_descriptor imageInfo)
 {
   cl_ulong retSize = 0;
   size_t curr_width, curr_height, curr_depth, curr_array_size;
   curr_width = imageInfo.width;
   curr_height = imageInfo.height;
   curr_depth = imageInfo.depth;
   curr_array_size = imageInfo.arraySize;

   for (int i=0; i < (int) imageInfo.num_mip_levels; i++)
   {
     switch ( imageInfo.type )
     {
     case CL_MEM_OBJECT_IMAGE3D :
       retSize += (cl_ulong)curr_width * curr_height * curr_depth * get_pixel_size(imageInfo.format);
       break;
     case CL_MEM_OBJECT_IMAGE2D :
       retSize += (cl_ulong)curr_width * curr_height * get_pixel_size(imageInfo.format);
       break;
     case CL_MEM_OBJECT_IMAGE1D :
       retSize += (cl_ulong)curr_width * get_pixel_size(imageInfo.format);
       break;
     case CL_MEM_OBJECT_IMAGE1D_ARRAY :
       retSize += (cl_ulong)curr_width * curr_array_size * get_pixel_size(imageInfo.format);
       break;
     case CL_MEM_OBJECT_IMAGE2D_ARRAY :
       retSize += (cl_ulong)curr_width * curr_height * curr_array_size * get_pixel_size(imageInfo.format);
       break;
     }

     switch ( imageInfo.type )
     {
     case CL_MEM_OBJECT_IMAGE3D :
       curr_depth = curr_depth >> 1 ? curr_depth >> 1: 1;
     case CL_MEM_OBJECT_IMAGE2D :
     case CL_MEM_OBJECT_IMAGE2D_ARRAY :
       curr_height = curr_height >> 1? curr_height >> 1 : 1;
     case CL_MEM_OBJECT_IMAGE1D :
     case CL_MEM_OBJECT_IMAGE1D_ARRAY :
       curr_width = curr_width >> 1? curr_width >> 1 : 1;
     }
   }

   return retSize;
 }

 size_t compute_mip_level_offset( image_descriptor * imageInfo , size_t lod)
 {
   size_t retOffset = 0;
   size_t width, height,  depth;
   width = imageInfo->width;
   height = imageInfo->height;
   depth = imageInfo->depth;

   for(size_t i=0; i < lod; i++)
   {
     switch(imageInfo->type)
     {
     case CL_MEM_OBJECT_IMAGE2D_ARRAY:
       retOffset += (size_t) width * height * imageInfo->arraySize * get_pixel_size( imageInfo->format );
       break;
     case CL_MEM_OBJECT_IMAGE3D:
       retOffset += (size_t) width * height * depth * get_pixel_size( imageInfo->format );
       break;
     case CL_MEM_OBJECT_IMAGE1D_ARRAY:
       retOffset += (size_t) width * imageInfo->arraySize * get_pixel_size( imageInfo->format );
       break;
     case CL_MEM_OBJECT_IMAGE2D:
       retOffset += (size_t) width * height * get_pixel_size( imageInfo->format );
       break;
     case CL_MEM_OBJECT_IMAGE1D:
       retOffset += (size_t) width * get_pixel_size( imageInfo->format );
       break;
     }

     // Compute next lod dimensions
     switch(imageInfo->type)
     {
     case CL_MEM_OBJECT_IMAGE3D:
       depth = ( depth >> 1 ) ? ( depth >> 1 ) : 1;
     case CL_MEM_OBJECT_IMAGE2D:
     case CL_MEM_OBJECT_IMAGE2D_ARRAY:
       height = ( height >> 1 ) ? ( height >> 1 ) : 1;
     case CL_MEM_OBJECT_IMAGE1D_ARRAY:
     case CL_MEM_OBJECT_IMAGE1D:
       width = ( width >> 1 ) ? ( width >> 1 ) : 1;
     }

   }
   return retOffset;
 }