| /* |
| * Copyright (C) 2016 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "shared.rsh" |
| |
| float negInf, posInf; |
| |
| static half negInfHalf, posInfHalf; |
| |
| // At present, no support for global of type half, or for invokable |
| // taking an argument of type half. |
| static void translate(half *tgt, const short src) { |
| for (int i = 0; i < sizeof(half); ++i) |
| ((char *)tgt)[i] = ((const char *)&src)[i]; |
| } |
| void setInfsHalf(short forNegInfHalf, short forPosInfHalf) { |
| translate(&negInfHalf, forNegInfHalf); |
| translate(&posInfHalf, forPosInfHalf); |
| } |
| |
| ///////////////////////////////////////////////////////////////////////// |
| |
| #pragma rs reduce(addint) \ |
| accumulator(aiAccum) |
| |
| static void aiAccum(int *accum, int val) { *accum += val; } |
| |
| ///////////////////////////////////////////////////////////////////////// |
| |
| // Finds LOCATION of min and max float values |
| |
| #pragma rs reduce(findMinAndMax) \ |
| initializer(fMMInit) accumulator(fMMAccumulator) \ |
| combiner(fMMCombiner) outconverter(fMMOutConverter) |
| |
| typedef struct { |
| float val; |
| int idx; |
| } IndexedVal; |
| |
| typedef struct { |
| IndexedVal min, max; |
| } MinAndMax; |
| |
| static void fMMInit(MinAndMax *accum) { |
| accum->min.val = posInf; |
| accum->min.idx = -1; |
| accum->max.val = negInf; |
| accum->max.idx = -1; |
| } |
| |
| static void fMMAccumulator(MinAndMax *accum, float in, int x) { |
| IndexedVal me; |
| me.val = in; |
| me.idx = x; |
| |
| if (me.val <= accum->min.val) |
| accum->min = me; |
| if (me.val >= accum->max.val) |
| accum->max = me; |
| } |
| |
| static void fMMCombiner(MinAndMax *accum, |
| const MinAndMax *val) { |
| if ((accum->min.idx < 0) || (val->min.val < accum->min.val)) |
| accum->min = val->min; |
| if ((accum->max.idx < 0) || (val->max.val > accum->max.val)) |
| accum->max = val->max; |
| } |
| |
| static void fMMOutConverter(int2 *result, |
| const MinAndMax *val) { |
| result->x = val->min.idx; |
| result->y = val->max.idx; |
| } |
| |
| ///////////////////////////////////////////////////////////////////////// |
| |
| // finds min and max half values (not their locations) |
| |
| // tests half input and half2 result |
| |
| // .. reduction form |
| |
| #pragma rs reduce(findMinAndMaxHalf) \ |
| initializer(fMMHalfInit) accumulator(fMMHalfAccumulator) \ |
| combiner(fMMHalfCombiner) outconverter(fMMHalfOutConverter) |
| |
| typedef struct { |
| half min, max; |
| } MinAndMaxHalf; |
| |
| static void fMMHalfInit(MinAndMaxHalf *accum) { |
| accum->min = posInfHalf; |
| accum->max = negInfHalf; |
| } |
| |
| static void fMMHalfAccumulator(MinAndMaxHalf *accum, half in) { |
| accum->min = fmin(accum->min, in); |
| accum->max = fmax(accum->max, in); |
| } |
| |
| static void fMMHalfCombiner(MinAndMaxHalf *accum, |
| const MinAndMaxHalf *val) { |
| accum->min = fmin(accum->min, val->min); |
| accum->max = fmax(accum->max, val->max); |
| } |
| |
| static void fMMHalfOutConverter(half2 *result, |
| const MinAndMaxHalf *val) { |
| result->x = val->min; |
| result->y = val->max; |
| } |
| |
| // .. invokable (non reduction) form (no support for half computations in Java) |
| |
| void findMinAndMaxHalf(rs_allocation out, rs_allocation in) { |
| half min = posInfHalf, max = negInfHalf; |
| |
| const uint32_t len = rsAllocationGetDimX(in); |
| for (uint32_t idx = 0; idx < len; ++idx) { |
| const half val = rsGetElementAt_half(in, idx); |
| min = fmin(min, val); |
| max = fmax(max, val); |
| } |
| |
| half2 result; |
| result.x = min; |
| result.y = max; |
| rsSetElementAt_half2(out, result, 0); |
| } |
| |
| // tests half input and array of half result; |
| // reuses functions of findMinAndMaxHalf reduction kernel |
| |
| #pragma rs reduce(findMinAndMaxHalfIntoArray) \ |
| initializer(fMMHalfInit) accumulator(fMMHalfAccumulator) \ |
| combiner(fMMHalfCombiner) outconverter(fMMHalfOutConverterIntoArray) |
| |
| static void fMMHalfOutConverterIntoArray(half (*result)[2], |
| const MinAndMaxHalf *val) { |
| (*result)[0] = val->min; |
| (*result)[1] = val->max; |
| } |
| |
| ///////////////////////////////////////////////////////////////////////// |
| |
| // finds min and max half2 values (not their locations), element-wise: |
| // result[0].x = fmin(input[...].x) |
| // result[0].y = fmin(input[...].y) |
| // result[1].x = fmax(input[...].x) |
| // result[1].y = fmax(input[...].y) |
| |
| // tests half2 input and half2[] result |
| |
| // .. reduction form |
| |
| #pragma rs reduce(findMinAndMaxHalf2) \ |
| initializer(fMMHalf2Init) accumulator(fMMHalf2Accumulator) \ |
| combiner(fMMHalf2Combiner) outconverter(fMMHalf2OutConverter) |
| |
| typedef struct { |
| half2 min, max; |
| } MinAndMaxHalf2; |
| |
| static void fMMHalf2Init(MinAndMaxHalf2 *accum) { |
| accum->min.x = posInfHalf; |
| accum->min.y = posInfHalf; |
| accum->max.x = negInfHalf; |
| accum->max.y = negInfHalf; |
| } |
| |
| static void fMMHalf2Accumulator(MinAndMaxHalf2 *accum, half2 in) { |
| accum->min.x = fmin(accum->min.x, in.x); |
| accum->min.y = fmin(accum->min.y, in.y); |
| accum->max.x = fmax(accum->max.x, in.x); |
| accum->max.y = fmax(accum->max.y, in.y); |
| } |
| |
| static void fMMHalf2Combiner(MinAndMaxHalf2 *accum, |
| const MinAndMaxHalf2 *val) { |
| accum->min.x = fmin(accum->min.x, val->min.x); |
| accum->min.y = fmin(accum->min.y, val->min.y); |
| accum->max.x = fmax(accum->max.x, val->max.x); |
| accum->max.y = fmax(accum->max.y, val->max.y); |
| } |
| |
| typedef half2 ArrayOf2Half2[2]; |
| |
| static void fMMHalf2OutConverter(ArrayOf2Half2 *result, |
| const MinAndMaxHalf2 *val) { |
| (*result)[0] = val->min; |
| (*result)[1] = val->max; |
| } |
| |
| // .. invokable (non reduction) form (no support for half computations in Java) |
| |
| void findMinAndMaxHalf2(rs_allocation out, rs_allocation in) { |
| half2 min = { posInfHalf, posInfHalf }, max = { negInfHalf, negInfHalf }; |
| |
| const uint32_t len = rsAllocationGetDimX(in); |
| for (uint32_t idx = 0; idx < len; ++idx) { |
| const half2 val = rsGetElementAt_half2(in, idx); |
| min.x = fmin(min.x, val.x); |
| min.y = fmin(min.y, val.y); |
| max.x = fmax(max.x, val.x); |
| max.y = fmax(max.y, val.y); |
| } |
| |
| rsSetElementAt_half2(out, min, 0); |
| rsSetElementAt_half2(out, max, 1); |
| } |
| |
| ///////////////////////////////////////////////////////////////////////// |
| |
| // finds min values (not their locations) from matrix input |
| |
| // tests matrix input and matrix accumulator |
| |
| #pragma rs reduce(findMinMat) \ |
| initializer(fMinMatInit) accumulator(fMinMatAccumulator) \ |
| outconverter(fMinMatOutConverter) |
| |
| static void fMinMatInit(rs_matrix2x2 *accum) { |
| for (int i = 0; i < 2; ++i) |
| for (int j = 0; j < 2; ++j) |
| rsMatrixSet(accum, i, j, posInf); |
| } |
| |
| static void fMinMatAccumulator(rs_matrix2x2 *accum, rs_matrix2x2 val) { |
| for (int i = 0; i < 2; ++i) { |
| for (int j = 0; j < 2; ++j) { |
| const float accumElt = rsMatrixGet(accum, i, j); |
| const float valElt = rsMatrixGet(&val, i, j); |
| if (valElt < accumElt) |
| rsMatrixSet(accum, i, j, valElt); |
| } |
| } |
| } |
| |
| // reduction does not support matrix result, so use array instead |
| static void fMinMatOutConverter(float (*result)[4], const rs_matrix2x2 *accum) { |
| for (int i = 0; i < 4; ++i) |
| (*result)[i] = accum->m[i]; |
| } |
| |
| ///////////////////////////////////////////////////////////////////////// |
| |
| // finds min and max values (not their locations) from matrix input |
| |
| // tests matrix input and array of matrix accumulator (0 = min, 1 = max) |
| |
| #pragma rs reduce(findMinAndMaxMat) \ |
| initializer(fMinMaxMatInit) accumulator(fMinMaxMatAccumulator) \ |
| combiner(fMinMaxMatCombiner) outconverter(fMinMaxMatOutConverter) |
| |
| typedef rs_matrix2x2 MatrixPair[2]; |
| enum MatrixPairEntry { MPE_Min = 0, MPE_Max = 1 }; // indices into MatrixPair |
| |
| static void fMinMaxMatInit(MatrixPair *accum) { |
| for (int i = 0; i < 2; ++i) { |
| for (int j = 0; j < 2; ++j) { |
| rsMatrixSet(&(*accum)[MPE_Min], i, j, posInf); |
| rsMatrixSet(&(*accum)[MPE_Max], i, j, negInf); |
| } |
| } |
| } |
| |
| static void fMinMaxMatAccumulator(MatrixPair *accum, rs_matrix2x2 val) { |
| for (int i = 0; i < 2; ++i) { |
| for (int j = 0; j < 2; ++j) { |
| const float valElt = rsMatrixGet(&val, i, j); |
| |
| const float minElt = rsMatrixGet(&(*accum)[MPE_Min], i, j); |
| rsMatrixSet(&(*accum)[MPE_Min], i, j, fmin(minElt, valElt)); |
| |
| const float maxElt = rsMatrixGet(&(*accum)[MPE_Max], i, j); |
| rsMatrixSet(&(*accum)[MPE_Max], i, j, fmax(maxElt, valElt)); |
| } |
| } |
| } |
| |
| static void fMinMaxMatCombiner(MatrixPair *accum, const MatrixPair *other) { |
| for (int i = 0; i < 2; ++i) { |
| for (int j = 0; j < 2; ++j) { |
| const float minElt = rsMatrixGet(&(*accum)[MPE_Min], i, j); |
| const float minEltOther = rsMatrixGet(&(*other)[MPE_Min], i, j); |
| rsMatrixSet(&(*accum)[MPE_Min], i, j, fmin(minElt, minEltOther)); |
| |
| const float maxElt = rsMatrixGet(&(*accum)[MPE_Max], i, j); |
| const float maxEltOther = rsMatrixGet(&(*other)[MPE_Max], i, j); |
| rsMatrixSet(&(*accum)[MPE_Max], i, j, fmax(maxElt, maxEltOther)); |
| } |
| } |
| } |
| |
| // reduction does not support matrix result, so use array instead |
| static void fMinMaxMatOutConverter(float (*result)[8], const MatrixPair *accum) { |
| for (int i = 0; i < 4; ++i) { |
| (*result)[i+0] = (*accum)[MPE_Min].m[i]; |
| (*result)[i+4] = (*accum)[MPE_Max].m[i]; |
| } |
| } |
| |
| ///////////////////////////////////////////////////////////////////////// |
| |
| #pragma rs reduce(fz) \ |
| initializer(fzInit) \ |
| accumulator(fzAccum) combiner(fzCombine) |
| |
| static void fzInit(int *accumIdx) { *accumIdx = -1; } |
| |
| static void fzAccum(int *accumIdx, |
| int inVal, int x /* special arg */) { |
| if (inVal==0) *accumIdx = x; |
| } |
| |
| static void fzCombine(int *accumIdx, const int *accumIdx2) { |
| if (*accumIdx2 >= 0) *accumIdx = *accumIdx2; |
| } |
| |
| ///////////////////////////////////////////////////////////////////////// |
| |
| #pragma rs reduce(fz2) \ |
| initializer(fz2Init) \ |
| accumulator(fz2Accum) combiner(fz2Combine) |
| |
| static void fz2Init(int2 *accum) { accum->x = accum->y = -1; } |
| |
| static void fz2Accum(int2 *accum, |
| int inVal, |
| int x /* special arg */, |
| int y /* special arg */) { |
| if (inVal==0) { |
| accum->x = x; |
| accum->y = y; |
| } |
| } |
| |
| static void fz2Combine(int2 *accum, const int2 *accum2) { |
| if (accum2->x >= 0) *accum = *accum2; |
| } |
| |
| ///////////////////////////////////////////////////////////////////////// |
| |
| #pragma rs reduce(fz3) \ |
| initializer(fz3Init) \ |
| accumulator(fz3Accum) combiner(fz3Combine) |
| |
| static void fz3Init(int3 *accum) { accum->x = accum->y = accum->z = -1; } |
| |
| static void fz3Accum(int3 *accum, |
| int inVal, |
| int x /* special arg */, |
| int y /* special arg */, |
| int z /* special arg */) { |
| if (inVal==0) { |
| accum->x = x; |
| accum->y = y; |
| accum->z = z; |
| } |
| } |
| |
| static void fz3Combine(int3 *accum, const int3 *accum2) { |
| if (accum2->x >= 0) *accum = *accum2; |
| } |
| |
| ///////////////////////////////////////////////////////////////////////// |
| |
| #pragma rs reduce(histogram) \ |
| accumulator(hsgAccum) combiner(hsgCombine) |
| |
| #define BUCKETS 256 |
| typedef uint32_t Histogram[BUCKETS]; |
| |
| static void hsgAccum(Histogram *h, uchar in) { ++(*h)[in]; } |
| |
| static void hsgCombine(Histogram *accum, const Histogram *addend) { |
| for (int i = 0; i < BUCKETS; ++i) |
| (*accum)[i] += (*addend)[i]; |
| } |
| |
| #pragma rs reduce(mode) \ |
| accumulator(hsgAccum) combiner(hsgCombine) \ |
| outconverter(modeOutConvert) |
| |
| static void modeOutConvert(int2 *result, const Histogram *h) { |
| uint32_t mode = 0; |
| for (int i = 1; i < BUCKETS; ++i) |
| if ((*h)[i] > (*h)[mode]) mode = i; |
| result->x = mode; |
| result->y = (*h)[mode]; |
| } |
| |
| ///////////////////////////////////////////////////////////////////////// |
| |
| // Simple test case where there are two inputs |
| #pragma rs reduce(sumxor) accumulator(sxAccum) combiner(sxCombine) |
| |
| static void sxAccum(int *accum, int inVal1, int inVal2) { *accum += (inVal1 ^ inVal2); } |
| |
| static void sxCombine(int *accum, const int *accum2) { *accum += *accum2; } |
| |
| ///////////////////////////////////////////////////////////////////////// |
| |
| // Test case where inputs are of different types |
| #pragma rs reduce(sillysum) accumulator(ssAccum) combiner(ssCombine) |
| |
| static void ssAccum(long *accum, char c, float f, int3 i3) { |
| *accum += ((((c + (long)ceil(log(f))) + i3.x) + i3.y) + i3.z); |
| } |
| |
| static void ssCombine(long *accum, const long *accum2) { *accum += *accum2; } |
| |
| ///////////////////////////////////////////////////////////////////////// |
| |
| // Test out-of-range result. |
| |
| // When a result is ulong, it can take on values not representable on |
| // the Java side, where there are no unsigned integral types and long |
| // is the largest integral type -- i.e., all values in the range |
| // (MAX_LONG, MAX_ULONG] are not representable in Java. The reflected |
| // result_*.get() methods throw an exception if the result value is |
| // out of range. The globals and reduction kernels below allow a test |
| // case on the Java side to describe what kind of result we should |
| // produce -- in particular, what to use for an in-range value and an |
| // out-of-range value, and where (if anywhere) to put an out-of-range |
| // value within the result (which might be scalar, vector, array of |
| // scalar, or array of vector). |
| |
| // We don't care about the input at all. |
| // We use these globals to configure the generation of the result. |
| // A kernel puts 2*oorrBadResultHalf in the position (if any) of the result |
| // given by oorrBadResult, and oorrGoodResult everywhere else. |
| // The oorrBadPos encoding is as follows: |
| // - For scalar result, 0 = scalar; anything else = nowhere |
| // - For vector result, 0..length(vector)-1 = corresponding vector component |
| // (0 = x, 1 = y, 2 = z, 3 = w); anything else = nowhere |
| // - For array of scalar result, 0..length(array)-1 = corresponding array element; |
| // anything else = nowhere |
| // - For array of vector result, 0..length(vector)*length(array)-1 = corresponding |
| // vector component C of corresponding array element E; anything else = nowhere |
| // (encoding is C + length(vector)*E) |
| ulong oorrGoodResult; // the value of a good result |
| ulong oorrBadResultHalf; // half the value of a bad result |
| // ("half" because Java can only set the global from long not from ulong) |
| int oorrBadPos; // position of bad result |
| |
| #define oorrBadResult (2*oorrBadResultHalf) |
| |
| static void oorrAccum(int *accum, int val) { } |
| |
| #pragma rs reduce(oorrSca) accumulator(oorrAccum) outconverter(oorrScaOut) |
| static void oorrScaOut(ulong *out, const int *accum) { |
| *out = (oorrBadPos ? oorrGoodResult : oorrBadResult); |
| } |
| |
| #pragma rs reduce(oorrVec4) accumulator(oorrAccum) outconverter(oorrVec4Out) |
| static void oorrVec4Out(ulong4 *out, const int *accum) { |
| out->x = (oorrBadPos==0 ? oorrBadResult : oorrGoodResult); |
| out->y = (oorrBadPos==1 ? oorrBadResult : oorrGoodResult); |
| out->z = (oorrBadPos==2 ? oorrBadResult : oorrGoodResult); |
| out->w = (oorrBadPos==3 ? oorrBadResult : oorrGoodResult); |
| } |
| |
| #pragma rs reduce(oorrArr9) accumulator(oorrAccum) outconverter(oorrArr9Out) |
| typedef ulong Arr9[9]; |
| static void oorrArr9Out(Arr9 *out, const int *accum) { |
| for (int i = 0; i < 9; ++i) |
| (*out)[i] = (i == oorrBadPos ? oorrBadResult : oorrGoodResult); |
| } |
| |
| #pragma rs reduce(oorrArr9Vec4) accumulator(oorrAccum) outconverter(oorrArr9Vec4Out) |
| typedef ulong4 Arr9Vec4[9]; |
| static void oorrArr9Vec4Out(Arr9Vec4 *out, const int *accum) { |
| const int badIdx = (oorrBadPos >= 0 ? oorrBadPos / 4: -1); |
| const int badComp = (oorrBadPos >= 0 ? oorrBadPos % 4: -1); |
| for (int i = 0; i < 9; ++i) { |
| (*out)[i].x = ((i==badIdx) && (0==badComp)) ? oorrBadResult : oorrGoodResult; |
| (*out)[i].y = ((i==badIdx) && (1==badComp)) ? oorrBadResult : oorrGoodResult; |
| (*out)[i].z = ((i==badIdx) && (2==badComp)) ? oorrBadResult : oorrGoodResult; |
| (*out)[i].w = ((i==badIdx) && (3==badComp)) ? oorrBadResult : oorrGoodResult; |
| } |
| } |