tests/tests/renderscript/src/android/renderscript/cts/reduce.rs - platform/cts - Git at Google

 /*
  * Copyright (C) 2016 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include "shared.rsh"

 float negInf, posInf;

 static half negInfHalf, posInfHalf;

 // At present, no support for global of type half, or for invokable
 // taking an argument of type half.
 static void translate(half *tgt, const short src) {
   for (int i = 0; i < sizeof(half); ++i)
     ((char *)tgt)[i] = ((const char *)&src)[i];
 }
 void setInfsHalf(short forNegInfHalf, short forPosInfHalf) {
   translate(&negInfHalf, forNegInfHalf);
   translate(&posInfHalf, forPosInfHalf);
 }

 /////////////////////////////////////////////////////////////////////////

 #pragma rs reduce(addint) \
   accumulator(aiAccum)

 static void aiAccum(int *accum, int val) { *accum += val; }

 /////////////////////////////////////////////////////////////////////////

 // Finds LOCATION of min and max float values

 #pragma rs reduce(findMinAndMax) \
   initializer(fMMInit) accumulator(fMMAccumulator) \
   combiner(fMMCombiner) outconverter(fMMOutConverter)

 typedef struct {
   float val;
   int idx;
 } IndexedVal;

 typedef struct {
   IndexedVal min, max;
 } MinAndMax;

 static void fMMInit(MinAndMax *accum) {
   accum->min.val = posInf;
   accum->min.idx = -1;
   accum->max.val = negInf;
   accum->max.idx = -1;
 }

 static void fMMAccumulator(MinAndMax *accum, float in, int x) {
   IndexedVal me;
   me.val = in;
   me.idx = x;

   if (me.val <= accum->min.val)
     accum->min = me;
   if (me.val >= accum->max.val)
     accum->max = me;
 }

 static void fMMCombiner(MinAndMax *accum,
                         const MinAndMax *val) {
   if ((accum->min.idx < 0) || (val->min.val < accum->min.val))
     accum->min = val->min;
   if ((accum->max.idx < 0) || (val->max.val > accum->max.val))
     accum->max = val->max;
 }

 static void fMMOutConverter(int2 *result,
                             const MinAndMax *val) {
   result->x = val->min.idx;
   result->y = val->max.idx;
 }

 /////////////////////////////////////////////////////////////////////////

 // finds min and max half values (not their locations)

 // tests half input and half2 result

 // .. reduction form

 #pragma rs reduce(findMinAndMaxHalf) \
   initializer(fMMHalfInit) accumulator(fMMHalfAccumulator) \
   combiner(fMMHalfCombiner) outconverter(fMMHalfOutConverter)

 typedef struct {
   half min, max;
 } MinAndMaxHalf;

 static void fMMHalfInit(MinAndMaxHalf *accum) {
   accum->min = posInfHalf;
   accum->max = negInfHalf;
 }

 static void fMMHalfAccumulator(MinAndMaxHalf *accum, half in) {
   accum->min = fmin(accum->min, in);
   accum->max = fmax(accum->max, in);
 }

 static void fMMHalfCombiner(MinAndMaxHalf *accum,
                             const MinAndMaxHalf *val) {
   accum->min = fmin(accum->min, val->min);
   accum->max = fmax(accum->max, val->max);
 }

 static void fMMHalfOutConverter(half2 *result,
                                 const MinAndMaxHalf *val) {
   result->x = val->min;
   result->y = val->max;
 }

 // .. invokable (non reduction) form (no support for half computations in Java)

 void findMinAndMaxHalf(rs_allocation out, rs_allocation in) {
   half min = posInfHalf, max = negInfHalf;

   const uint32_t len = rsAllocationGetDimX(in);
   for (uint32_t idx = 0; idx < len; ++idx) {
     const half val = rsGetElementAt_half(in, idx);
     min = fmin(min, val);
     max = fmax(max, val);
   }

   half2 result;
   result.x = min;
   result.y = max;
   rsSetElementAt_half2(out, result, 0);
 }

 // tests half input and array of half result;
 //   reuses functions of findMinAndMaxHalf reduction kernel

 #pragma rs reduce(findMinAndMaxHalfIntoArray) \
   initializer(fMMHalfInit) accumulator(fMMHalfAccumulator) \
   combiner(fMMHalfCombiner) outconverter(fMMHalfOutConverterIntoArray)

 static void fMMHalfOutConverterIntoArray(half (*result)[2],
                                          const MinAndMaxHalf *val) {
   (*result)[0] = val->min;
   (*result)[1] = val->max;
 }

 /////////////////////////////////////////////////////////////////////////

 // finds min and max half2 values (not their locations), element-wise:
 //   result[0].x = fmin(input[...].x)
 //   result[0].y = fmin(input[...].y)
 //   result[1].x = fmax(input[...].x)
 //   result[1].y = fmax(input[...].y)

 // tests half2 input and half2[] result

 // .. reduction form

 #pragma rs reduce(findMinAndMaxHalf2) \
   initializer(fMMHalf2Init) accumulator(fMMHalf2Accumulator) \
   combiner(fMMHalf2Combiner) outconverter(fMMHalf2OutConverter)

 typedef struct {
   half2 min, max;
 } MinAndMaxHalf2;

 static void fMMHalf2Init(MinAndMaxHalf2 *accum) {
   accum->min.x = posInfHalf;
   accum->min.y = posInfHalf;
   accum->max.x = negInfHalf;
   accum->max.y = negInfHalf;
 }

 static void fMMHalf2Accumulator(MinAndMaxHalf2 *accum, half2 in) {
   accum->min.x = fmin(accum->min.x, in.x);
   accum->min.y = fmin(accum->min.y, in.y);
   accum->max.x = fmax(accum->max.x, in.x);
   accum->max.y = fmax(accum->max.y, in.y);
 }

 static void fMMHalf2Combiner(MinAndMaxHalf2 *accum,
                             const MinAndMaxHalf2 *val) {
   accum->min.x = fmin(accum->min.x, val->min.x);
   accum->min.y = fmin(accum->min.y, val->min.y);
   accum->max.x = fmax(accum->max.x, val->max.x);
   accum->max.y = fmax(accum->max.y, val->max.y);
 }

 typedef half2 ArrayOf2Half2[2];

 static void fMMHalf2OutConverter(ArrayOf2Half2 *result,
                                 const MinAndMaxHalf2 *val) {
   (*result)[0] = val->min;
   (*result)[1] = val->max;
 }

 // .. invokable (non reduction) form (no support for half computations in Java)

 void findMinAndMaxHalf2(rs_allocation out, rs_allocation in) {
   half2 min = { posInfHalf, posInfHalf }, max = { negInfHalf, negInfHalf };

   const uint32_t len = rsAllocationGetDimX(in);
   for (uint32_t idx = 0; idx < len; ++idx) {
     const half2 val = rsGetElementAt_half2(in, idx);
     min.x = fmin(min.x, val.x);
     min.y = fmin(min.y, val.y);
     max.x = fmax(max.x, val.x);
     max.y = fmax(max.y, val.y);
   }

   rsSetElementAt_half2(out, min, 0);
   rsSetElementAt_half2(out, max, 1);
 }

 /////////////////////////////////////////////////////////////////////////

 // finds min values (not their locations) from matrix input

 // tests matrix input and matrix accumulator

 #pragma rs reduce(findMinMat) \
   initializer(fMinMatInit) accumulator(fMinMatAccumulator) \
   outconverter(fMinMatOutConverter)

 static void fMinMatInit(rs_matrix2x2 *accum) {
   for (int i = 0; i < 2; ++i)
     for (int j = 0; j < 2; ++j)
       rsMatrixSet(accum, i, j, posInf);
 }

 static void fMinMatAccumulator(rs_matrix2x2 *accum, rs_matrix2x2 val) {
   for (int i = 0; i < 2; ++i) {
     for (int j = 0; j < 2; ++j) {
       const float accumElt = rsMatrixGet(accum, i, j);
       const float valElt = rsMatrixGet(&val, i, j);
       if (valElt < accumElt)
         rsMatrixSet(accum, i, j, valElt);
     }
   }
 }

 // reduction does not support matrix result, so use array instead
 static void fMinMatOutConverter(float (*result)[4],  const rs_matrix2x2 *accum) {
   for (int i = 0; i < 4; ++i)
     (*result)[i] = accum->m[i];
 }

 /////////////////////////////////////////////////////////////////////////

 // finds min and max values (not their locations) from matrix input

 // tests matrix input and array of matrix accumulator (0 = min, 1 = max)

 #pragma rs reduce(findMinAndMaxMat) \
   initializer(fMinMaxMatInit) accumulator(fMinMaxMatAccumulator) \
   combiner(fMinMaxMatCombiner) outconverter(fMinMaxMatOutConverter)

 typedef rs_matrix2x2 MatrixPair[2];
 enum MatrixPairEntry { MPE_Min = 0, MPE_Max = 1 };  // indices into MatrixPair

 static void fMinMaxMatInit(MatrixPair *accum) {
   for (int i = 0; i < 2; ++i) {
     for (int j = 0; j < 2; ++j) {
       rsMatrixSet(&(*accum)[MPE_Min], i, j, posInf);
       rsMatrixSet(&(*accum)[MPE_Max], i, j, negInf);
     }
   }
 }

 static void fMinMaxMatAccumulator(MatrixPair *accum, rs_matrix2x2 val) {
   for (int i = 0; i < 2; ++i) {
     for (int j = 0; j < 2; ++j) {
       const float valElt = rsMatrixGet(&val, i, j);

       const float minElt = rsMatrixGet(&(*accum)[MPE_Min], i, j);
       rsMatrixSet(&(*accum)[MPE_Min], i, j, fmin(minElt, valElt));

       const float maxElt = rsMatrixGet(&(*accum)[MPE_Max], i, j);
       rsMatrixSet(&(*accum)[MPE_Max], i, j, fmax(maxElt, valElt));
     }
   }
 }

 static void fMinMaxMatCombiner(MatrixPair *accum, const MatrixPair *other) {
   for (int i = 0; i < 2; ++i) {
     for (int j = 0; j < 2; ++j) {
       const float minElt = rsMatrixGet(&(*accum)[MPE_Min], i, j);
       const float minEltOther = rsMatrixGet(&(*other)[MPE_Min], i, j);
       rsMatrixSet(&(*accum)[MPE_Min], i, j, fmin(minElt, minEltOther));

       const float maxElt = rsMatrixGet(&(*accum)[MPE_Max], i, j);
       const float maxEltOther = rsMatrixGet(&(*other)[MPE_Max], i, j);
       rsMatrixSet(&(*accum)[MPE_Max], i, j, fmax(maxElt, maxEltOther));
     }
   }
 }

 // reduction does not support matrix result, so use array instead
 static void fMinMaxMatOutConverter(float (*result)[8],  const MatrixPair *accum) {
   for (int i = 0; i < 4; ++i) {
     (*result)[i+0] = (*accum)[MPE_Min].m[i];
     (*result)[i+4] = (*accum)[MPE_Max].m[i];
   }
 }

 /////////////////////////////////////////////////////////////////////////

 #pragma rs reduce(fz) \
   initializer(fzInit) \
   accumulator(fzAccum) combiner(fzCombine)

 static void fzInit(int *accumIdx) { *accumIdx = -1; }

 static void fzAccum(int *accumIdx,
                     int inVal, int x /* special arg */) {
   if (inVal==0) *accumIdx = x;
 }

 static void fzCombine(int *accumIdx, const int *accumIdx2) {
   if (*accumIdx2 >= 0) *accumIdx = *accumIdx2;
 }

 /////////////////////////////////////////////////////////////////////////

 #pragma rs reduce(fz2) \
   initializer(fz2Init) \
   accumulator(fz2Accum) combiner(fz2Combine)

 static void fz2Init(int2 *accum) { accum->x = accum->y = -1; }

 static void fz2Accum(int2 *accum,
                      int inVal,
                      int x /* special arg */,
                      int y /* special arg */) {
   if (inVal==0) {
     accum->x = x;
     accum->y = y;
   }
 }

 static void fz2Combine(int2 *accum, const int2 *accum2) {
   if (accum2->x >= 0) *accum = *accum2;
 }

 /////////////////////////////////////////////////////////////////////////

 #pragma rs reduce(fz3) \
   initializer(fz3Init) \
   accumulator(fz3Accum) combiner(fz3Combine)

 static void fz3Init(int3 *accum) { accum->x = accum->y = accum->z = -1; }

 static void fz3Accum(int3 *accum,
                      int inVal,
                      int x /* special arg */,
                      int y /* special arg */,
                      int z /* special arg */) {
   if (inVal==0) {
     accum->x = x;
     accum->y = y;
     accum->z = z;
   }
 }

 static void fz3Combine(int3 *accum, const int3 *accum2) {
   if (accum2->x >= 0) *accum = *accum2;
 }

 /////////////////////////////////////////////////////////////////////////

 #pragma rs reduce(histogram) \
   accumulator(hsgAccum) combiner(hsgCombine)

 #define BUCKETS 256
 typedef uint32_t Histogram[BUCKETS];

 static void hsgAccum(Histogram *h, uchar in) { ++(*h)[in]; }

 static void hsgCombine(Histogram *accum, const Histogram *addend) {
   for (int i = 0; i < BUCKETS; ++i)
     (*accum)[i] += (*addend)[i];
 }

 #pragma rs reduce(mode) \
   accumulator(hsgAccum) combiner(hsgCombine) \
   outconverter(modeOutConvert)

 static void modeOutConvert(int2 *result, const Histogram *h) {
   uint32_t mode = 0;
   for (int i = 1; i < BUCKETS; ++i)
     if ((*h)[i] > (*h)[mode]) mode = i;
   result->x = mode;
   result->y = (*h)[mode];
 }

 /////////////////////////////////////////////////////////////////////////

 // Simple test case where there are two inputs
 #pragma rs reduce(sumxor) accumulator(sxAccum) combiner(sxCombine)

 static void sxAccum(int *accum, int inVal1, int inVal2) { *accum += (inVal1 ^ inVal2); }

 static void sxCombine(int *accum, const int *accum2) { *accum += *accum2; }

 /////////////////////////////////////////////////////////////////////////

 // Test case where inputs are of different types
 #pragma rs reduce(sillysum) accumulator(ssAccum) combiner(ssCombine)

 static void ssAccum(long *accum, char c, float f, int3 i3) {
   *accum += ((((c + (long)ceil(log(f))) + i3.x) + i3.y) + i3.z);
 }

 static void ssCombine(long *accum, const long *accum2) { *accum += *accum2; }

 /////////////////////////////////////////////////////////////////////////

 // Test out-of-range result.

 // When a result is ulong, it can take on values not representable on
 // the Java side, where there are no unsigned integral types and long
 // is the largest integral type -- i.e., all values in the range
 // (MAX_LONG, MAX_ULONG] are not representable in Java.  The reflected
 // result_*.get() methods throw an exception if the result value is
 // out of range.  The globals and reduction kernels below allow a test
 // case on the Java side to describe what kind of result we should
 // produce -- in particular, what to use for an in-range value and an
 // out-of-range value, and where (if anywhere) to put an out-of-range
 // value within the result (which might be scalar, vector, array of
 // scalar, or array of vector).

 // We don't care about the input at all.
 // We use these globals to configure the generation of the result.
 // A kernel puts 2*oorrBadResultHalf in the position (if any) of the result
 // given by oorrBadResult, and oorrGoodResult everywhere else.
 // The oorrBadPos encoding is as follows:
 // - For scalar result, 0 = scalar; anything else = nowhere
 // - For vector result, 0..length(vector)-1 = corresponding vector component
 //     (0 = x, 1 = y, 2 = z, 3 = w); anything else = nowhere
 // - For array of scalar result, 0..length(array)-1 = corresponding array element;
 //     anything else = nowhere
 // - For array of vector result, 0..length(vector)*length(array)-1 = corresponding
 //     vector component C of corresponding array element E; anything else = nowhere
 //     (encoding is C + length(vector)*E)
 ulong oorrGoodResult;     // the value of a good result
 ulong oorrBadResultHalf;  // half the value of a bad result
                           //   ("half" because Java can only set the global from long not from ulong)
 int   oorrBadPos;         // position of bad result

 #define oorrBadResult (2*oorrBadResultHalf)

 static void oorrAccum(int *accum, int val) { }

 #pragma rs reduce(oorrSca) accumulator(oorrAccum) outconverter(oorrScaOut)
 static void oorrScaOut(ulong *out, const int *accum) {
   *out = (oorrBadPos ? oorrGoodResult : oorrBadResult);
 }

 #pragma rs reduce(oorrVec4) accumulator(oorrAccum) outconverter(oorrVec4Out)
 static void oorrVec4Out(ulong4 *out, const int *accum) {
   out->x = (oorrBadPos==0 ? oorrBadResult : oorrGoodResult);
   out->y = (oorrBadPos==1 ? oorrBadResult : oorrGoodResult);
   out->z = (oorrBadPos==2 ? oorrBadResult : oorrGoodResult);
   out->w = (oorrBadPos==3 ? oorrBadResult : oorrGoodResult);
 }

 #pragma rs reduce(oorrArr9) accumulator(oorrAccum) outconverter(oorrArr9Out)
 typedef ulong Arr9[9];
 static void oorrArr9Out(Arr9 *out, const int *accum) {
   for (int i = 0; i < 9; ++i)
     (*out)[i] = (i == oorrBadPos ? oorrBadResult : oorrGoodResult);
 }

 #pragma rs reduce(oorrArr9Vec4) accumulator(oorrAccum) outconverter(oorrArr9Vec4Out)
 typedef ulong4 Arr9Vec4[9];
 static void oorrArr9Vec4Out(Arr9Vec4 *out, const int *accum) {
   const int badIdx = (oorrBadPos >= 0 ? oorrBadPos / 4: -1);
   const int badComp = (oorrBadPos >= 0 ? oorrBadPos % 4: -1);
   for (int i = 0; i < 9; ++i) {
     (*out)[i].x = ((i==badIdx) && (0==badComp)) ? oorrBadResult : oorrGoodResult;
     (*out)[i].y = ((i==badIdx) && (1==badComp)) ? oorrBadResult : oorrGoodResult;
     (*out)[i].z = ((i==badIdx) && (2==badComp)) ? oorrBadResult : oorrGoodResult;
     (*out)[i].w = ((i==badIdx) && (3==badComp)) ? oorrBadResult : oorrGoodResult;
   }
 }
	/*
	* Copyright (C) 2016 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include "shared.rsh"

	float negInf, posInf;

	static half negInfHalf, posInfHalf;

	// At present, no support for global of type half, or for invokable
	// taking an argument of type half.
	static void translate(half *tgt, const short src) {
	for (int i = 0; i < sizeof(half); ++i)
	((char )tgt)[i] = ((const char )&src)[i];
	}
	void setInfsHalf(short forNegInfHalf, short forPosInfHalf) {
	translate(&negInfHalf, forNegInfHalf);
	translate(&posInfHalf, forPosInfHalf);
	}

	/////////////////////////////////////////////////////////////////////////

	#pragma rs reduce(addint) \
	accumulator(aiAccum)

	static void aiAccum(int accum, int val) { accum += val; }

	/////////////////////////////////////////////////////////////////////////

	// Finds LOCATION of min and max float values

	#pragma rs reduce(findMinAndMax) \
	initializer(fMMInit) accumulator(fMMAccumulator) \
	combiner(fMMCombiner) outconverter(fMMOutConverter)

	typedef struct {
	float val;
	int idx;
	} IndexedVal;

	typedef struct {
	IndexedVal min, max;
	} MinAndMax;

	static void fMMInit(MinAndMax *accum) {
	accum->min.val = posInf;
	accum->min.idx = -1;
	accum->max.val = negInf;
	accum->max.idx = -1;
	}

	static void fMMAccumulator(MinAndMax *accum, float in, int x) {
	IndexedVal me;
	me.val = in;
	me.idx = x;

	if (me.val <= accum->min.val)
	accum->min = me;
	if (me.val >= accum->max.val)
	accum->max = me;
	}

	static void fMMCombiner(MinAndMax *accum,
	const MinAndMax *val) {
	if ((accum->min.idx < 0) \|\| (val->min.val < accum->min.val))
	accum->min = val->min;
	if ((accum->max.idx < 0) \|\| (val->max.val > accum->max.val))
	accum->max = val->max;
	}

	static void fMMOutConverter(int2 *result,
	const MinAndMax *val) {
	result->x = val->min.idx;
	result->y = val->max.idx;
	}

	/////////////////////////////////////////////////////////////////////////

	// finds min and max half values (not their locations)

	// tests half input and half2 result

	// .. reduction form

	#pragma rs reduce(findMinAndMaxHalf) \
	initializer(fMMHalfInit) accumulator(fMMHalfAccumulator) \
	combiner(fMMHalfCombiner) outconverter(fMMHalfOutConverter)

	typedef struct {
	half min, max;
	} MinAndMaxHalf;

	static void fMMHalfInit(MinAndMaxHalf *accum) {
	accum->min = posInfHalf;
	accum->max = negInfHalf;
	}

	static void fMMHalfAccumulator(MinAndMaxHalf *accum, half in) {
	accum->min = fmin(accum->min, in);
	accum->max = fmax(accum->max, in);
	}

	static void fMMHalfCombiner(MinAndMaxHalf *accum,
	const MinAndMaxHalf *val) {
	accum->min = fmin(accum->min, val->min);
	accum->max = fmax(accum->max, val->max);
	}

	static void fMMHalfOutConverter(half2 *result,
	const MinAndMaxHalf *val) {
	result->x = val->min;
	result->y = val->max;
	}

	// .. invokable (non reduction) form (no support for half computations in Java)

	void findMinAndMaxHalf(rs_allocation out, rs_allocation in) {
	half min = posInfHalf, max = negInfHalf;

	const uint32_t len = rsAllocationGetDimX(in);
	for (uint32_t idx = 0; idx < len; ++idx) {
	const half val = rsGetElementAt_half(in, idx);
	min = fmin(min, val);
	max = fmax(max, val);
	}

	half2 result;
	result.x = min;
	result.y = max;
	rsSetElementAt_half2(out, result, 0);
	}

	// tests half input and array of half result;
	// reuses functions of findMinAndMaxHalf reduction kernel

	#pragma rs reduce(findMinAndMaxHalfIntoArray) \
	initializer(fMMHalfInit) accumulator(fMMHalfAccumulator) \
	combiner(fMMHalfCombiner) outconverter(fMMHalfOutConverterIntoArray)

	static void fMMHalfOutConverterIntoArray(half (*result)[2],
	const MinAndMaxHalf *val) {
	(*result)[0] = val->min;
	(*result)[1] = val->max;
	}

	/////////////////////////////////////////////////////////////////////////

	// finds min and max half2 values (not their locations), element-wise:
	// result[0].x = fmin(input[...].x)
	// result[0].y = fmin(input[...].y)
	// result[1].x = fmax(input[...].x)
	// result[1].y = fmax(input[...].y)

	// tests half2 input and half2[] result

	// .. reduction form

	#pragma rs reduce(findMinAndMaxHalf2) \
	initializer(fMMHalf2Init) accumulator(fMMHalf2Accumulator) \
	combiner(fMMHalf2Combiner) outconverter(fMMHalf2OutConverter)

	typedef struct {
	half2 min, max;
	} MinAndMaxHalf2;

	static void fMMHalf2Init(MinAndMaxHalf2 *accum) {
	accum->min.x = posInfHalf;
	accum->min.y = posInfHalf;
	accum->max.x = negInfHalf;
	accum->max.y = negInfHalf;
	}

	static void fMMHalf2Accumulator(MinAndMaxHalf2 *accum, half2 in) {
	accum->min.x = fmin(accum->min.x, in.x);
	accum->min.y = fmin(accum->min.y, in.y);
	accum->max.x = fmax(accum->max.x, in.x);
	accum->max.y = fmax(accum->max.y, in.y);
	}

	static void fMMHalf2Combiner(MinAndMaxHalf2 *accum,
	const MinAndMaxHalf2 *val) {
	accum->min.x = fmin(accum->min.x, val->min.x);
	accum->min.y = fmin(accum->min.y, val->min.y);
	accum->max.x = fmax(accum->max.x, val->max.x);
	accum->max.y = fmax(accum->max.y, val->max.y);
	}

	typedef half2 ArrayOf2Half2[2];

	static void fMMHalf2OutConverter(ArrayOf2Half2 *result,
	const MinAndMaxHalf2 *val) {
	(*result)[0] = val->min;
	(*result)[1] = val->max;
	}

	// .. invokable (non reduction) form (no support for half computations in Java)

	void findMinAndMaxHalf2(rs_allocation out, rs_allocation in) {
	half2 min = { posInfHalf, posInfHalf }, max = { negInfHalf, negInfHalf };

	const uint32_t len = rsAllocationGetDimX(in);
	for (uint32_t idx = 0; idx < len; ++idx) {
	const half2 val = rsGetElementAt_half2(in, idx);
	min.x = fmin(min.x, val.x);
	min.y = fmin(min.y, val.y);
	max.x = fmax(max.x, val.x);
	max.y = fmax(max.y, val.y);
	}

	rsSetElementAt_half2(out, min, 0);
	rsSetElementAt_half2(out, max, 1);
	}

	/////////////////////////////////////////////////////////////////////////

	// finds min values (not their locations) from matrix input

	// tests matrix input and matrix accumulator

	#pragma rs reduce(findMinMat) \
	initializer(fMinMatInit) accumulator(fMinMatAccumulator) \
	outconverter(fMinMatOutConverter)

	static void fMinMatInit(rs_matrix2x2 *accum) {
	for (int i = 0; i < 2; ++i)
	for (int j = 0; j < 2; ++j)
	rsMatrixSet(accum, i, j, posInf);
	}

	static void fMinMatAccumulator(rs_matrix2x2 *accum, rs_matrix2x2 val) {
	for (int i = 0; i < 2; ++i) {
	for (int j = 0; j < 2; ++j) {
	const float accumElt = rsMatrixGet(accum, i, j);
	const float valElt = rsMatrixGet(&val, i, j);
	if (valElt < accumElt)
	rsMatrixSet(accum, i, j, valElt);
	}
	}
	}

	// reduction does not support matrix result, so use array instead
	static void fMinMatOutConverter(float (result)[4], const rs_matrix2x2 accum) {
	for (int i = 0; i < 4; ++i)
	(*result)[i] = accum->m[i];
	}

	/////////////////////////////////////////////////////////////////////////

	// finds min and max values (not their locations) from matrix input

	// tests matrix input and array of matrix accumulator (0 = min, 1 = max)

	#pragma rs reduce(findMinAndMaxMat) \
	initializer(fMinMaxMatInit) accumulator(fMinMaxMatAccumulator) \
	combiner(fMinMaxMatCombiner) outconverter(fMinMaxMatOutConverter)

	typedef rs_matrix2x2 MatrixPair[2];
	enum MatrixPairEntry { MPE_Min = 0, MPE_Max = 1 }; // indices into MatrixPair

	static void fMinMaxMatInit(MatrixPair *accum) {
	for (int i = 0; i < 2; ++i) {
	for (int j = 0; j < 2; ++j) {
	rsMatrixSet(&(*accum)[MPE_Min], i, j, posInf);
	rsMatrixSet(&(*accum)[MPE_Max], i, j, negInf);
	}
	}
	}

	static void fMinMaxMatAccumulator(MatrixPair *accum, rs_matrix2x2 val) {
	for (int i = 0; i < 2; ++i) {
	for (int j = 0; j < 2; ++j) {
	const float valElt = rsMatrixGet(&val, i, j);

	const float minElt = rsMatrixGet(&(*accum)[MPE_Min], i, j);
	rsMatrixSet(&(*accum)[MPE_Min], i, j, fmin(minElt, valElt));

	const float maxElt = rsMatrixGet(&(*accum)[MPE_Max], i, j);
	rsMatrixSet(&(*accum)[MPE_Max], i, j, fmax(maxElt, valElt));
	}
	}
	}

	static void fMinMaxMatCombiner(MatrixPair accum, const MatrixPair other) {
	for (int i = 0; i < 2; ++i) {
	for (int j = 0; j < 2; ++j) {
	const float minElt = rsMatrixGet(&(*accum)[MPE_Min], i, j);
	const float minEltOther = rsMatrixGet(&(*other)[MPE_Min], i, j);
	rsMatrixSet(&(*accum)[MPE_Min], i, j, fmin(minElt, minEltOther));

	const float maxElt = rsMatrixGet(&(*accum)[MPE_Max], i, j);
	const float maxEltOther = rsMatrixGet(&(*other)[MPE_Max], i, j);
	rsMatrixSet(&(*accum)[MPE_Max], i, j, fmax(maxElt, maxEltOther));
	}
	}
	}

	// reduction does not support matrix result, so use array instead
	static void fMinMaxMatOutConverter(float (result)[8], const MatrixPair accum) {
	for (int i = 0; i < 4; ++i) {
	(result)[i+0] = (accum)[MPE_Min].m[i];
	(result)[i+4] = (accum)[MPE_Max].m[i];
	}
	}

	/////////////////////////////////////////////////////////////////////////

	#pragma rs reduce(fz) \
	initializer(fzInit) \
	accumulator(fzAccum) combiner(fzCombine)

	static void fzInit(int accumIdx) { accumIdx = -1; }

	static void fzAccum(int *accumIdx,
	int inVal, int x /* special arg */) {
	if (inVal==0) *accumIdx = x;
	}

	static void fzCombine(int accumIdx, const int accumIdx2) {
	if (accumIdx2 >= 0) accumIdx = *accumIdx2;
	}

	/////////////////////////////////////////////////////////////////////////

	#pragma rs reduce(fz2) \
	initializer(fz2Init) \
	accumulator(fz2Accum) combiner(fz2Combine)

	static void fz2Init(int2 *accum) { accum->x = accum->y = -1; }

	static void fz2Accum(int2 *accum,
	int inVal,
	int x /* special arg */,
	int y /* special arg */) {
	if (inVal==0) {
	accum->x = x;
	accum->y = y;
	}
	}

	static void fz2Combine(int2 accum, const int2 accum2) {
	if (accum2->x >= 0) accum = accum2;
	}

	/////////////////////////////////////////////////////////////////////////

	#pragma rs reduce(fz3) \
	initializer(fz3Init) \
	accumulator(fz3Accum) combiner(fz3Combine)

	static void fz3Init(int3 *accum) { accum->x = accum->y = accum->z = -1; }

	static void fz3Accum(int3 *accum,
	int inVal,
	int x /* special arg */,
	int y /* special arg */,
	int z /* special arg */) {
	if (inVal==0) {
	accum->x = x;
	accum->y = y;
	accum->z = z;
	}
	}

	static void fz3Combine(int3 accum, const int3 accum2) {
	if (accum2->x >= 0) accum = accum2;
	}

	/////////////////////////////////////////////////////////////////////////

	#pragma rs reduce(histogram) \
	accumulator(hsgAccum) combiner(hsgCombine)

	#define BUCKETS 256
	typedef uint32_t Histogram[BUCKETS];

	static void hsgAccum(Histogram h, uchar in) { ++(h)[in]; }

	static void hsgCombine(Histogram accum, const Histogram addend) {
	for (int i = 0; i < BUCKETS; ++i)
	(accum)[i] += (addend)[i];
	}

	#pragma rs reduce(mode) \
	accumulator(hsgAccum) combiner(hsgCombine) \
	outconverter(modeOutConvert)

	static void modeOutConvert(int2 result, const Histogram h) {
	uint32_t mode = 0;
	for (int i = 1; i < BUCKETS; ++i)
	if ((h)[i] > (h)[mode]) mode = i;
	result->x = mode;
	result->y = (*h)[mode];
	}

	/////////////////////////////////////////////////////////////////////////

	// Simple test case where there are two inputs
	#pragma rs reduce(sumxor) accumulator(sxAccum) combiner(sxCombine)

	static void sxAccum(int accum, int inVal1, int inVal2) { accum += (inVal1 ^ inVal2); }

	static void sxCombine(int accum, const int accum2) { accum += accum2; }

	/////////////////////////////////////////////////////////////////////////

	// Test case where inputs are of different types
	#pragma rs reduce(sillysum) accumulator(ssAccum) combiner(ssCombine)

	static void ssAccum(long *accum, char c, float f, int3 i3) {
	*accum += ((((c + (long)ceil(log(f))) + i3.x) + i3.y) + i3.z);
	}

	static void ssCombine(long accum, const long accum2) { accum += accum2; }

	/////////////////////////////////////////////////////////////////////////

	// Test out-of-range result.

	// When a result is ulong, it can take on values not representable on
	// the Java side, where there are no unsigned integral types and long
	// is the largest integral type -- i.e., all values in the range
	// (MAX_LONG, MAX_ULONG] are not representable in Java. The reflected
	// result_*.get() methods throw an exception if the result value is
	// out of range. The globals and reduction kernels below allow a test
	// case on the Java side to describe what kind of result we should
	// produce -- in particular, what to use for an in-range value and an
	// out-of-range value, and where (if anywhere) to put an out-of-range
	// value within the result (which might be scalar, vector, array of
	// scalar, or array of vector).

	// We don't care about the input at all.
	// We use these globals to configure the generation of the result.
	// A kernel puts 2*oorrBadResultHalf in the position (if any) of the result
	// given by oorrBadResult, and oorrGoodResult everywhere else.
	// The oorrBadPos encoding is as follows:
	// - For scalar result, 0 = scalar; anything else = nowhere
	// - For vector result, 0..length(vector)-1 = corresponding vector component
	// (0 = x, 1 = y, 2 = z, 3 = w); anything else = nowhere
	// - For array of scalar result, 0..length(array)-1 = corresponding array element;
	// anything else = nowhere
	// - For array of vector result, 0..length(vector)*length(array)-1 = corresponding
	// vector component C of corresponding array element E; anything else = nowhere
	// (encoding is C + length(vector)*E)
	ulong oorrGoodResult; // the value of a good result
	ulong oorrBadResultHalf; // half the value of a bad result
	// ("half" because Java can only set the global from long not from ulong)
	int oorrBadPos; // position of bad result

	#define oorrBadResult (2*oorrBadResultHalf)

	static void oorrAccum(int *accum, int val) { }

	#pragma rs reduce(oorrSca) accumulator(oorrAccum) outconverter(oorrScaOut)
	static void oorrScaOut(ulong out, const int accum) {
	*out = (oorrBadPos ? oorrGoodResult : oorrBadResult);
	}

	#pragma rs reduce(oorrVec4) accumulator(oorrAccum) outconverter(oorrVec4Out)
	static void oorrVec4Out(ulong4 out, const int accum) {
	out->x = (oorrBadPos==0 ? oorrBadResult : oorrGoodResult);
	out->y = (oorrBadPos==1 ? oorrBadResult : oorrGoodResult);
	out->z = (oorrBadPos==2 ? oorrBadResult : oorrGoodResult);
	out->w = (oorrBadPos==3 ? oorrBadResult : oorrGoodResult);
	}

	#pragma rs reduce(oorrArr9) accumulator(oorrAccum) outconverter(oorrArr9Out)
	typedef ulong Arr9[9];
	static void oorrArr9Out(Arr9 out, const int accum) {
	for (int i = 0; i < 9; ++i)
	(*out)[i] = (i == oorrBadPos ? oorrBadResult : oorrGoodResult);
	}

	#pragma rs reduce(oorrArr9Vec4) accumulator(oorrAccum) outconverter(oorrArr9Vec4Out)
	typedef ulong4 Arr9Vec4[9];
	static void oorrArr9Vec4Out(Arr9Vec4 out, const int accum) {
	const int badIdx = (oorrBadPos >= 0 ? oorrBadPos / 4: -1);
	const int badComp = (oorrBadPos >= 0 ? oorrBadPos % 4: -1);
	for (int i = 0; i < 9; ++i) {
	(*out)[i].x = ((i==badIdx) && (0==badComp)) ? oorrBadResult : oorrGoodResult;
	(*out)[i].y = ((i==badIdx) && (1==badComp)) ? oorrBadResult : oorrGoodResult;
	(*out)[i].z = ((i==badIdx) && (2==badComp)) ? oorrBadResult : oorrGoodResult;
	(*out)[i].w = ((i==badIdx) && (3==badComp)) ? oorrBadResult : oorrGoodResult;
	}
	}