blob: 6eae6c71cf3a19016b91086b170451f5e7ca95d0 [file] [log] [blame]
/*
* Copyright (C) 2009 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//#define LOG_NDEBUG 0
#define LOG_TAG "ColorConverter"
#include <android-base/macros.h>
#include <utils/Log.h>
#include <media/stagefright/foundation/ADebug.h>
#include <media/stagefright/foundation/ALooper.h>
#include <media/stagefright/foundation/ColorUtils.h>
#include <media/stagefright/ColorConverter.h>
#include <media/stagefright/MediaCodecConstants.h>
#include <media/stagefright/MediaErrors.h>
#include "libyuv/convert_from.h"
#include "libyuv/convert_argb.h"
#include "libyuv/planar_functions.h"
#include "libyuv/video_common.h"
#include <functional>
#include <sys/time.h>
#define USE_LIBYUV
#define PERF_PROFILING 0
#if defined(__aarch64__) || defined(__ARM_NEON__)
#define USE_NEON_Y410 1
#else
#define USE_NEON_Y410 0
#endif
#if USE_NEON_Y410
#include <arm_neon.h>
#endif
namespace android {
static bool isRGB(OMX_COLOR_FORMATTYPE colorFormat) {
return colorFormat == OMX_COLOR_Format16bitRGB565
|| colorFormat == OMX_COLOR_Format32BitRGBA8888
|| colorFormat == OMX_COLOR_Format32bitBGRA8888
|| colorFormat == COLOR_Format32bitABGR2101010;
}
bool ColorConverter::ColorSpace::isBt2020() const {
return (mStandard == ColorUtils::kColorStandardBT2020);
}
bool ColorConverter::ColorSpace::isH420() const {
return (mStandard == ColorUtils::kColorStandardBT709)
&& (mRange == ColorUtils::kColorRangeLimited);
}
// the matrix coefficients are the same for both 601.625 and 601.525 standards
bool ColorConverter::ColorSpace::isI420() const {
return ((mStandard == ColorUtils::kColorStandardBT601_625)
|| (mStandard == ColorUtils::kColorStandardBT601_525))
&& (mRange == ColorUtils::kColorRangeLimited);
}
bool ColorConverter::ColorSpace::isJ420() const {
return ((mStandard == ColorUtils::kColorStandardBT601_625)
|| (mStandard == ColorUtils::kColorStandardBT601_525))
&& (mRange == ColorUtils::kColorRangeFull);
}
/**
* This class approximates the standard YUV to RGB conversions by factoring the matrix
* coefficients to 1/256th-s (as dividing by 256 is easy to do with right shift). The chosen value
* of 256 is somewhat arbitrary and was not dependent on the bit-depth, but it does limit the
* precision of the matrix coefficients (KR & KB).
*
* The maximum color error after clipping from using 256 is a distance of:
* 0.4 (8-bit) / 1.4 (10-bit) for greens in BT.601
* 0.5 (8-bit) / 1.9 (10-bit) for cyans in BT.709, and
* 0.3 (8-bit) / 1.3 (10-bit) for violets in BT.2020 (it is 0.4 for 10-bit BT.2020 limited)
*
* Note for reference: libyuv is using a divisor of 64 instead of 256 to ensure no overflow in
* 16-bit math. The maximum color error for libyuv is 3.5 / 14.
*
* The clamping is done using a lookup vector where negative indices are mapped to 0
* and indices > 255 are mapped to 255. (For 10-bit these are clamped to 0 to 1023)
*
* The matrices are assumed to be of the following format (note the sign on the 2nd row):
*
* [ R ] [ _y 0 _r_v ] [ Y - C16 ]
* [ G ] = [ _y -_g_u -_g_v ] * [ U - C128 ]
* [ B ] [ _y _b_u 0 ] [ V - C128 ]
*
* C16 is 1 << (bitdepth - 4) for limited range, and 0 for full range
* C128 is 1 << (bitdepth - 1)
* C255 is (1 << bitdepth) - 1
*
* The min and max values from these equations determine the clip range needed for clamping:
*
* min = - (_y * C16 + max((_g_u + _g_v) * (C255-C128), max(_r_v, _b_u) * C128)) / 256
* max = (_y * (C255 - C16) + max((_g_u + _g_v) * C128, max(_r_v, _b_u) * (C255-C128)) + 128) / 256
*/
struct ColorConverter::Coeffs {
int32_t _y;
int32_t _r_v;
int32_t _g_u;
int32_t _g_v;
int32_t _b_u;
};
/*
Color conversion rules are dictated by ISO (e.g. ISO:IEC 23008:2)
Limited range means Y is in [16, 235], U and V are in [16, 224] corresponding to [-0.5 to 0.5].
Full range means Y is in [0, 255], U and V are in [0.5, 255.5] corresponding to [-0.5 to .5].
RGB is always in full range ([0, 255])
The color primaries determine the KR and KB values:
For full range (assuming 8-bits) ISO defines:
( Y ) ( KR 1-KR-KB KB )
( ) ( ) (R)
( ) (-KR/2 -(1-KR-KB)/2 ) ( )
(U - 128) = (----- ------------ 0.5 ) * (G)
( ) ((1-KB) (1-KB) ) ( )
( ) ( ) (B)
( ) ( -(1-KR-KB)/2 -KB/2 )
(V - 128) ( 0.5 ------------ ----- )
( (1-KR) (1-KR))
(the math is rounded, 128 is (1 << (bitdepth - 1)) )
From this
(R) ( 1 0 2*(1-KR) ) ( Y )
( ) ( ) ( )
( ) ( 2*KB*(KB-1) 2*KR*(KR-1) ) ( )
(G) = ( 1 ----------- ----------- ) * (U - 128)
( ) ( 1-KR-KB 1-KR-KB ) ( )
( ) ( ) ( )
(B) ( 1 2*(1-KB) 0 ) (V - 128)
For limited range, this becomes
(R) ( 1 0 2*(1-KR) ) (255/219 0 0) (Y - 16)
( ) ( ) ( ) ( )
( ) ( 2*KB*(KB-1) 2*KR*(KR-1) ) ( ) ( )
(G) = ( 1 ----------- ----------- ) * (0 255/224 0) * (U - 128)
( ) ( 1-KR-KB 1-KR-KB ) ( ) ( )
( ) ( ) ( ) ( )
(B) ( 1 2*(1-KB) 0 ) (0 0 255/224) (V - 128)
( For non-8-bit, 16 is (1 << (bitdepth - 4)), 128 is (1 << (bitdepth - 1)),
255 is ((1 << bitdepth) - 1), 219 is (219 << (bitdepth - 8)) and
224 is (224 << (bitdepth - 8)), so the matrix coefficients slightly change. )
*/
namespace {
/**
* BT.601: K_R = 0.299; K_B = 0.114
*
* clip range 8-bit: [-277, 535], 10-bit: [-1111, 2155]
*/
const struct ColorConverter::Coeffs BT601_FULL = { 256, 359, 88, 183, 454 };
const struct ColorConverter::Coeffs BT601_LIMITED = { 298, 409, 100, 208, 516 };
const struct ColorConverter::Coeffs BT601_LTD_10BIT = { 299, 410, 101, 209, 518 };
/**
* BT.709: K_R = 0.2126; K_B = 0.0722
*
* clip range 8-bit: [-289, 547], 10-bit: [-1159, 2202]
*/
const struct ColorConverter::Coeffs BT709_FULL = { 256, 403, 48, 120, 475 };
const struct ColorConverter::Coeffs BT709_LIMITED = { 298, 459, 55, 136, 541 };
const struct ColorConverter::Coeffs BT709_LTD_10BIT = { 290, 460, 55, 137, 542 };
/**
* BT.2020: K_R = 0.2627; K_B = 0.0593
*
* clip range 8-bit: [-294, 552], 10-bit: [-1175, 2218]
*
* This is the largest clip range.
*/
const struct ColorConverter::Coeffs BT2020_FULL = { 256, 377, 42, 146, 482 };
const struct ColorConverter::Coeffs BT2020_LIMITED = { 298, 430, 48, 167, 548 };
const struct ColorConverter::Coeffs BT2020_LTD_10BIT = { 299, 431, 48, 167, 550 };
constexpr int CLIP_RANGE_MIN_8BIT = -294;
constexpr int CLIP_RANGE_MAX_8BIT = 552;
constexpr int CLIP_RANGE_MIN_10BIT = -1175;
constexpr int CLIP_RANGE_MAX_10BIT = 2218;
}
ColorConverter::ColorConverter(
OMX_COLOR_FORMATTYPE from, OMX_COLOR_FORMATTYPE to)
: mSrcFormat(from),
mDstFormat(to),
mSrcColorSpace({0, 0, 0}),
mClip(NULL),
mClip10Bit(NULL) {
}
ColorConverter::~ColorConverter() {
delete[] mClip;
mClip = NULL;
delete[] mClip10Bit;
mClip10Bit = NULL;
}
bool ColorConverter::isValid() const {
switch ((int32_t)mSrcFormat) {
case OMX_COLOR_FormatYUV420Planar16:
if (mDstFormat == OMX_COLOR_FormatYUV444Y410) {
return true;
}
FALLTHROUGH_INTENDED;
case OMX_COLOR_FormatYUV420Planar:
return mDstFormat == OMX_COLOR_Format16bitRGB565
|| mDstFormat == OMX_COLOR_Format32BitRGBA8888
|| mDstFormat == OMX_COLOR_Format32bitBGRA8888;
case OMX_COLOR_FormatCbYCrY:
case OMX_QCOM_COLOR_FormatYVU420SemiPlanar:
case OMX_TI_COLOR_FormatYUV420PackedSemiPlanar:
return mDstFormat == OMX_COLOR_Format16bitRGB565;
case OMX_COLOR_FormatYUV420SemiPlanar:
#ifdef USE_LIBYUV
return mDstFormat == OMX_COLOR_Format16bitRGB565
|| mDstFormat == OMX_COLOR_Format32BitRGBA8888
|| mDstFormat == OMX_COLOR_Format32bitBGRA8888;
#else
return mDstFormat == OMX_COLOR_Format16bitRGB565;
#endif
case COLOR_FormatYUVP010:
return mDstFormat == COLOR_Format32bitABGR2101010;
default:
return false;
}
}
bool ColorConverter::isDstRGB() const {
return isRGB(mDstFormat);
}
void ColorConverter::setSrcColorSpace(
uint32_t standard, uint32_t range, uint32_t transfer) {
if (isRGB(mSrcFormat)) {
ALOGW("Can't set color space on RGB source");
return;
}
mSrcColorSpace.mStandard = standard;
mSrcColorSpace.mRange = range;
mSrcColorSpace.mTransfer = transfer;
}
/*
* If stride is non-zero, client's stride will be used. For planar
* or semi-planar YUV formats, stride must be even numbers.
* If stride is zero, it will be calculated based on width and bpp
* of the format, assuming no padding on the right edge.
*/
ColorConverter::BitmapParams::BitmapParams(
void *bits,
size_t width, size_t height, size_t stride,
size_t cropLeft, size_t cropTop,
size_t cropRight, size_t cropBottom,
OMX_COLOR_FORMATTYPE colorFromat)
: mBits(bits),
mColorFormat(colorFromat),
mWidth(width),
mHeight(height),
mCropLeft(cropLeft),
mCropTop(cropTop),
mCropRight(cropRight),
mCropBottom(cropBottom) {
switch((int32_t)mColorFormat) {
case OMX_COLOR_Format16bitRGB565:
case OMX_COLOR_FormatYUV420Planar16:
case COLOR_FormatYUVP010:
case OMX_COLOR_FormatCbYCrY:
mBpp = 2;
mStride = 2 * mWidth;
break;
case OMX_COLOR_Format32bitBGRA8888:
case OMX_COLOR_Format32BitRGBA8888:
case COLOR_Format32bitABGR2101010:
case OMX_COLOR_FormatYUV444Y410:
mBpp = 4;
mStride = 4 * mWidth;
break;
case OMX_COLOR_FormatYUV420Planar:
case OMX_QCOM_COLOR_FormatYVU420SemiPlanar:
case OMX_COLOR_FormatYUV420SemiPlanar:
case OMX_TI_COLOR_FormatYUV420PackedSemiPlanar:
mBpp = 1;
mStride = mWidth;
break;
default:
ALOGE("Unsupported color format %d", mColorFormat);
mBpp = 1;
mStride = mWidth;
break;
}
// use client's stride if it's specified.
if (stride != 0) {
mStride = stride;
}
}
size_t ColorConverter::BitmapParams::cropWidth() const {
return mCropRight - mCropLeft + 1;
}
size_t ColorConverter::BitmapParams::cropHeight() const {
return mCropBottom - mCropTop + 1;
}
status_t ColorConverter::convert(
const void *srcBits,
size_t srcWidth, size_t srcHeight, size_t srcStride,
size_t srcCropLeft, size_t srcCropTop,
size_t srcCropRight, size_t srcCropBottom,
void *dstBits,
size_t dstWidth, size_t dstHeight, size_t dstStride,
size_t dstCropLeft, size_t dstCropTop,
size_t dstCropRight, size_t dstCropBottom) {
BitmapParams src(
const_cast<void *>(srcBits),
srcWidth, srcHeight, srcStride,
srcCropLeft, srcCropTop, srcCropRight, srcCropBottom, mSrcFormat);
BitmapParams dst(
dstBits,
dstWidth, dstHeight, dstStride,
dstCropLeft, dstCropTop, dstCropRight, dstCropBottom, mDstFormat);
if (!((src.mCropLeft & 1) == 0
&& src.cropWidth() == dst.cropWidth()
&& src.cropHeight() == dst.cropHeight())) {
return ERROR_UNSUPPORTED;
}
status_t err;
switch ((int32_t)mSrcFormat) {
case OMX_COLOR_FormatYUV420Planar:
#ifdef USE_LIBYUV
err = convertYUV420PlanarUseLibYUV(src, dst);
#else
err = convertYUV420Planar(src, dst);
#endif
break;
case OMX_COLOR_FormatYUV420Planar16:
{
#if PERF_PROFILING
int64_t startTimeUs = ALooper::GetNowUs();
#endif
err = convertYUV420Planar16(src, dst);
#if PERF_PROFILING
int64_t endTimeUs = ALooper::GetNowUs();
ALOGD("convertYUV420Planar16 took %lld us", (long long) (endTimeUs - startTimeUs));
#endif
break;
}
case COLOR_FormatYUVP010:
{
#if PERF_PROFILING
int64_t startTimeUs = ALooper::GetNowUs();
#endif
err = convertYUVP010(src, dst);
#if PERF_PROFILING
int64_t endTimeUs = ALooper::GetNowUs();
ALOGD("convertYUVP010 took %lld us", (long long) (endTimeUs - startTimeUs));
#endif
break;
}
case OMX_COLOR_FormatCbYCrY:
err = convertCbYCrY(src, dst);
break;
case OMX_QCOM_COLOR_FormatYVU420SemiPlanar:
err = convertQCOMYUV420SemiPlanar(src, dst);
break;
case OMX_COLOR_FormatYUV420SemiPlanar:
#ifdef USE_LIBYUV
err = convertYUV420SemiPlanarUseLibYUV(src, dst);
#else
err = convertYUV420SemiPlanar(src, dst);
#endif
break;
case OMX_TI_COLOR_FormatYUV420PackedSemiPlanar:
err = convertTIYUV420PackedSemiPlanar(src, dst);
break;
default:
{
CHECK(!"Should not be here. Unknown color conversion.");
break;
}
}
return err;
}
const struct ColorConverter::Coeffs *ColorConverter::getMatrix() const {
const bool isFullRange = mSrcColorSpace.mRange == ColorUtils::kColorRangeFull;
const bool is10Bit = (mSrcFormat == COLOR_FormatYUVP010
|| mSrcFormat == OMX_COLOR_FormatYUV420Planar16);
switch (mSrcColorSpace.mStandard) {
case ColorUtils::kColorStandardBT601_525:
case ColorUtils::kColorStandardBT601_625:
return (isFullRange ? &BT601_FULL :
is10Bit ? &BT601_LTD_10BIT : &BT601_LIMITED);
case ColorUtils::kColorStandardBT709:
return (isFullRange ? &BT709_FULL :
is10Bit ? &BT709_LTD_10BIT : &BT709_LIMITED);
case ColorUtils::kColorStandardBT2020:
return (isFullRange ? &BT2020_FULL :
is10Bit ? &BT2020_LTD_10BIT : &BT2020_LIMITED);
default:
// for now use the default matrices for unhandled color spaces
// TODO: fail?
// return nullptr;
[[fallthrough]];
case ColorUtils::kColorStandardUnspecified:
return is10Bit ? &BT2020_LTD_10BIT : &BT601_LIMITED;
}
}
status_t ColorConverter::convertCbYCrY(
const BitmapParams &src, const BitmapParams &dst) {
// XXX Untested
const struct Coeffs *matrix = getMatrix();
if (!matrix) {
return ERROR_UNSUPPORTED;
}
signed _b_u = matrix->_b_u;
signed _neg_g_u = -matrix->_g_u;
signed _neg_g_v = -matrix->_g_v;
signed _r_v = matrix->_r_v;
signed _y = matrix->_y;
signed _c16 = mSrcColorSpace.mRange == ColorUtils::kColorRangeLimited ? 16 : 0;
uint8_t *kAdjustedClip = initClip();
uint16_t *dst_ptr = (uint16_t *)dst.mBits
+ dst.mCropTop * dst.mWidth + dst.mCropLeft;
const uint8_t *src_ptr = (const uint8_t *)src.mBits
+ (src.mCropTop * dst.mWidth + src.mCropLeft) * 2;
for (size_t y = 0; y < src.cropHeight(); ++y) {
for (size_t x = 0; x < src.cropWidth(); x += 2) {
signed y1 = (signed)src_ptr[2 * x + 1] - _c16;
signed y2 = (signed)src_ptr[2 * x + 3] - _c16;
signed u = (signed)src_ptr[2 * x] - 128;
signed v = (signed)src_ptr[2 * x + 2] - 128;
signed u_b = u * _b_u;
signed u_g = u * _neg_g_u;
signed v_g = v * _neg_g_v;
signed v_r = v * _r_v;
signed tmp1 = y1 * _y + 128;
signed b1 = (tmp1 + u_b) / 256;
signed g1 = (tmp1 + v_g + u_g) / 256;
signed r1 = (tmp1 + v_r) / 256;
signed tmp2 = y2 * _y + 128;
signed b2 = (tmp2 + u_b) / 256;
signed g2 = (tmp2 + v_g + u_g) / 256;
signed r2 = (tmp2 + v_r) / 256;
uint32_t rgb1 =
((kAdjustedClip[r1] >> 3) << 11)
| ((kAdjustedClip[g1] >> 2) << 5)
| (kAdjustedClip[b1] >> 3);
uint32_t rgb2 =
((kAdjustedClip[r2] >> 3) << 11)
| ((kAdjustedClip[g2] >> 2) << 5)
| (kAdjustedClip[b2] >> 3);
if (x + 1 < src.cropWidth()) {
*(uint32_t *)(&dst_ptr[x]) = (rgb2 << 16) | rgb1;
} else {
dst_ptr[x] = rgb1;
}
}
src_ptr += src.mWidth * 2;
dst_ptr += dst.mWidth;
}
return OK;
}
/*
libyuv supports the following color spaces:
I420: BT.601 limited range
J420: BT.601 full range (jpeg)
H420: BT.709 limited range
*/
#define DECLARE_YUV2RGBFUNC(func, rgb) int (*func)( \
const uint8_t*, int, const uint8_t*, int, \
const uint8_t*, int, uint8_t*, int, int, int) \
= mSrcColorSpace.isH420() ? libyuv::H420To##rgb \
: mSrcColorSpace.isJ420() ? libyuv::J420To##rgb \
: libyuv::I420To##rgb
status_t ColorConverter::convertYUV420PlanarUseLibYUV(
const BitmapParams &src, const BitmapParams &dst) {
// Fall back to our conversion if libyuv does not support the color space.
// I420 (BT.601 limited) is default, so don't fall back if we end up using it anyway.
if (!mSrcColorSpace.isH420() && !mSrcColorSpace.isJ420()
// && !mSrcColorSpace.isI420() /* same as line below */
&& getMatrix() != &BT601_LIMITED) {
return convertYUV420Planar(src, dst);
}
uint8_t *dst_ptr = (uint8_t *)dst.mBits
+ dst.mCropTop * dst.mStride + dst.mCropLeft * dst.mBpp;
const uint8_t *src_y =
(const uint8_t *)src.mBits + src.mCropTop * src.mStride + src.mCropLeft;
const uint8_t *src_u =
(const uint8_t *)src.mBits + src.mStride * src.mHeight
+ (src.mCropTop / 2) * (src.mStride / 2) + (src.mCropLeft / 2);
const uint8_t *src_v =
src_u + (src.mStride / 2) * (src.mHeight / 2);
switch (mDstFormat) {
case OMX_COLOR_Format16bitRGB565:
{
DECLARE_YUV2RGBFUNC(func, RGB565);
(*func)(src_y, src.mStride, src_u, src.mStride / 2, src_v, src.mStride / 2,
(uint8_t *)dst_ptr, dst.mStride, src.cropWidth(), src.cropHeight());
break;
}
case OMX_COLOR_Format32BitRGBA8888:
{
DECLARE_YUV2RGBFUNC(func, ABGR);
(*func)(src_y, src.mStride, src_u, src.mStride / 2, src_v, src.mStride / 2,
(uint8_t *)dst_ptr, dst.mStride, src.cropWidth(), src.cropHeight());
break;
}
case OMX_COLOR_Format32bitBGRA8888:
{
DECLARE_YUV2RGBFUNC(func, ARGB);
(*func)(src_y, src.mStride, src_u, src.mStride / 2, src_v, src.mStride / 2,
(uint8_t *)dst_ptr, dst.mStride, src.cropWidth(), src.cropHeight());
break;
}
default:
return ERROR_UNSUPPORTED;
}
return OK;
}
status_t ColorConverter::convertYUV420SemiPlanarUseLibYUV(
const BitmapParams &src, const BitmapParams &dst) {
// Fall back to our conversion if libyuv does not support the color space.
// libyuv only supports BT.601 limited range NV12. Don't fall back if we end up using it anyway.
if (// !mSrcColorSpace.isI420() && /* same as below */
getMatrix() != &BT601_LIMITED) {
return convertYUV420SemiPlanar(src, dst);
}
uint8_t *dst_ptr = (uint8_t *)dst.mBits
+ dst.mCropTop * dst.mStride + dst.mCropLeft * dst.mBpp;
const uint8_t *src_y =
(const uint8_t *)src.mBits + src.mCropTop * src.mStride + src.mCropLeft;
const uint8_t *src_u =
(const uint8_t *)src.mBits + src.mStride * src.mHeight
+ (src.mCropTop / 2) * src.mStride + src.mCropLeft;
switch (mDstFormat) {
case OMX_COLOR_Format16bitRGB565:
libyuv::NV12ToRGB565(src_y, src.mStride, src_u, src.mStride, (uint8_t *)dst_ptr,
dst.mStride, src.cropWidth(), src.cropHeight());
break;
case OMX_COLOR_Format32bitBGRA8888:
libyuv::NV12ToARGB(src_y, src.mStride, src_u, src.mStride, (uint8_t *)dst_ptr,
dst.mStride, src.cropWidth(), src.cropHeight());
break;
case OMX_COLOR_Format32BitRGBA8888:
libyuv::NV12ToABGR(src_y, src.mStride, src_u, src.mStride, (uint8_t *)dst_ptr,
dst.mStride, src.cropWidth(), src.cropHeight());
break;
default:
return ERROR_UNSUPPORTED;
}
return OK;
}
std::function<void (void *, void *, void *, size_t,
signed *, signed *, signed *, signed *)>
getReadFromSrc(OMX_COLOR_FORMATTYPE srcFormat) {
switch(srcFormat) {
case OMX_COLOR_FormatYUV420Planar:
return [](void *src_y, void *src_u, void *src_v, size_t x,
signed *y1, signed *y2, signed *u, signed *v) {
*y1 = ((uint8_t*)src_y)[x];
*y2 = ((uint8_t*)src_y)[x + 1];
*u = ((uint8_t*)src_u)[x / 2] - 128;
*v = ((uint8_t*)src_v)[x / 2] - 128;
};
case OMX_COLOR_FormatYUV420Planar16:
return [](void *src_y, void *src_u, void *src_v, size_t x,
signed *y1, signed *y2, signed *u, signed *v) {
*y1 = (signed)(((uint16_t*)src_y)[x] >> 2);
*y2 = (signed)(((uint16_t*)src_y)[x + 1] >> 2);
*u = (signed)(((uint16_t*)src_u)[x / 2] >> 2) - 128;
*v = (signed)(((uint16_t*)src_v)[x / 2] >> 2) - 128;
};
default:
TRESPASS();
}
return nullptr;
}
// TRICKY: this method only supports RGBA_1010102 output for 10-bit sources, and all other outputs
// for 8-bit sources as the type of kAdjustedClip is hardcoded based on output, not input.
std::function<void (void *, bool, signed, signed, signed, signed, signed, signed)>
getWriteToDst(OMX_COLOR_FORMATTYPE dstFormat, void *kAdjustedClip) {
switch ((int)dstFormat) {
case OMX_COLOR_Format16bitRGB565:
{
return [kAdjustedClip](void *dst_ptr, bool uncropped,
signed r1, signed g1, signed b1,
signed r2, signed g2, signed b2) {
uint32_t rgb1 =
((((uint8_t *)kAdjustedClip)[r1] >> 3) << 11)
| ((((uint8_t *)kAdjustedClip)[g1] >> 2) << 5)
| (((uint8_t *)kAdjustedClip)[b1] >> 3);
if (uncropped) {
uint32_t rgb2 =
((((uint8_t *)kAdjustedClip)[r2] >> 3) << 11)
| ((((uint8_t *)kAdjustedClip)[g2] >> 2) << 5)
| (((uint8_t *)kAdjustedClip)[b2] >> 3);
*(uint32_t *)dst_ptr = (rgb2 << 16) | rgb1;
} else {
*(uint16_t *)dst_ptr = rgb1;
}
};
}
case OMX_COLOR_Format32BitRGBA8888:
{
return [kAdjustedClip](void *dst_ptr, bool uncropped,
signed r1, signed g1, signed b1,
signed r2, signed g2, signed b2) {
((uint32_t *)dst_ptr)[0] =
(((uint8_t *)kAdjustedClip)[r1])
| (((uint8_t *)kAdjustedClip)[g1] << 8)
| (((uint8_t *)kAdjustedClip)[b1] << 16)
| (0xFF << 24);
if (uncropped) {
((uint32_t *)dst_ptr)[1] =
(((uint8_t *)kAdjustedClip)[r2])
| (((uint8_t *)kAdjustedClip)[g2] << 8)
| (((uint8_t *)kAdjustedClip)[b2] << 16)
| (0xFF << 24);
}
};
}
case OMX_COLOR_Format32bitBGRA8888:
{
return [kAdjustedClip](void *dst_ptr, bool uncropped,
signed r1, signed g1, signed b1,
signed r2, signed g2, signed b2) {
((uint32_t *)dst_ptr)[0] =
(((uint8_t *)kAdjustedClip)[b1])
| (((uint8_t *)kAdjustedClip)[g1] << 8)
| (((uint8_t *)kAdjustedClip)[r1] << 16)
| (0xFF << 24);
if (uncropped) {
((uint32_t *)dst_ptr)[1] =
(((uint8_t *)kAdjustedClip)[b2])
| (((uint8_t *)kAdjustedClip)[g2] << 8)
| (((uint8_t *)kAdjustedClip)[r2] << 16)
| (0xFF << 24);
}
};
}
case COLOR_Format32bitABGR2101010:
{
return [kAdjustedClip](void *dst_ptr, bool uncropped,
signed r1, signed g1, signed b1,
signed r2, signed g2, signed b2) {
((uint32_t *)dst_ptr)[0] =
(((uint16_t *)kAdjustedClip)[r1])
| (((uint16_t *)kAdjustedClip)[g1] << 10)
| (((uint16_t *)kAdjustedClip)[b1] << 20)
| (3 << 30);
if (uncropped) {
((uint32_t *)dst_ptr)[1] =
(((uint16_t *)kAdjustedClip)[r2])
| (((uint16_t *)kAdjustedClip)[g2] << 10)
| (((uint16_t *)kAdjustedClip)[b2] << 20)
| (3 << 30);
}
};
}
default:
TRESPASS();
}
return nullptr;
}
status_t ColorConverter::convertYUV420Planar(
const BitmapParams &src, const BitmapParams &dst) {
const struct Coeffs *matrix = getMatrix();
if (!matrix) {
return ERROR_UNSUPPORTED;
}
signed _b_u = matrix->_b_u;
signed _neg_g_u = -matrix->_g_u;
signed _neg_g_v = -matrix->_g_v;
signed _r_v = matrix->_r_v;
signed _y = matrix->_y;
signed _c16 = mSrcColorSpace.mRange == ColorUtils::kColorRangeLimited ? 16 : 0;
uint8_t *kAdjustedClip = initClip();
auto readFromSrc = getReadFromSrc(mSrcFormat);
auto writeToDst = getWriteToDst(mDstFormat, (void *)kAdjustedClip);
uint8_t *dst_ptr = (uint8_t *)dst.mBits
+ dst.mCropTop * dst.mStride + dst.mCropLeft * dst.mBpp;
uint8_t *src_y = (uint8_t *)src.mBits
+ src.mCropTop * src.mStride + src.mCropLeft * src.mBpp;
uint8_t *src_u = (uint8_t *)src.mBits + src.mStride * src.mHeight
+ (src.mCropTop / 2) * (src.mStride / 2) + src.mCropLeft / 2 * src.mBpp;
uint8_t *src_v = src_u + (src.mStride / 2) * (src.mHeight / 2);
for (size_t y = 0; y < src.cropHeight(); ++y) {
for (size_t x = 0; x < src.cropWidth(); x += 2) {
signed y1, y2, u, v;
readFromSrc(src_y, src_u, src_v, x, &y1, &y2, &u, &v);
signed u_b = u * _b_u;
signed u_g = u * _neg_g_u;
signed v_g = v * _neg_g_v;
signed v_r = v * _r_v;
signed tmp1 = (y1 - _c16) * _y + 128;
signed b1 = (tmp1 + u_b) / 256;
signed g1 = (tmp1 + v_g + u_g) / 256;
signed r1 = (tmp1 + v_r) / 256;
signed tmp2 = (y2 - _c16) * _y + 128;
signed b2 = (tmp2 + u_b) / 256;
signed g2 = (tmp2 + v_g + u_g) / 256;
signed r2 = (tmp2 + v_r) / 256;
bool uncropped = x + 1 < src.cropWidth();
writeToDst(dst_ptr + x * dst.mBpp, uncropped, r1, g1, b1, r2, g2, b2);
}
src_y += src.mStride;
if (y & 1) {
src_u += src.mStride / 2;
src_v += src.mStride / 2;
}
dst_ptr += dst.mStride;
}
return OK;
}
status_t ColorConverter::convertYUV420Planar16(
const BitmapParams &src, const BitmapParams &dst) {
if (mDstFormat == OMX_COLOR_FormatYUV444Y410) {
return convertYUV420Planar16ToY410(src, dst);
}
return convertYUV420Planar(src, dst);
}
status_t ColorConverter::convertYUVP010(
const BitmapParams &src, const BitmapParams &dst) {
if (mDstFormat == COLOR_Format32bitABGR2101010) {
return convertYUVP010ToRGBA1010102(src, dst);
}
return ERROR_UNSUPPORTED;
}
status_t ColorConverter::convertYUVP010ToRGBA1010102(
const BitmapParams &src, const BitmapParams &dst) {
const struct Coeffs *matrix = getMatrix();
if (!matrix) {
return ERROR_UNSUPPORTED;
}
signed _b_u = matrix->_b_u;
signed _neg_g_u = -matrix->_g_u;
signed _neg_g_v = -matrix->_g_v;
signed _r_v = matrix->_r_v;
signed _y = matrix->_y;
signed _c16 = mSrcColorSpace.mRange == ColorUtils::kColorRangeLimited ? 64 : 0;
uint16_t *kAdjustedClip10bit = initClip10Bit();
// auto readFromSrc = getReadFromSrc(mSrcFormat);
auto writeToDst = getWriteToDst(mDstFormat, (void *)kAdjustedClip10bit);
uint8_t *dst_ptr = (uint8_t *)dst.mBits
+ dst.mCropTop * dst.mStride + dst.mCropLeft * dst.mBpp;
uint16_t *src_y = (uint16_t *)((uint8_t *)src.mBits
+ src.mCropTop * src.mStride + src.mCropLeft * src.mBpp);
uint16_t *src_uv = (uint16_t *)((uint8_t *)src.mBits
+ src.mStride * src.mHeight
+ (src.mCropTop / 2) * src.mStride + src.mCropLeft * src.mBpp);
for (size_t y = 0; y < src.cropHeight(); ++y) {
for (size_t x = 0; x < src.cropWidth(); x += 2) {
signed y1, y2, u, v;
y1 = (src_y[x] >> 6) - _c16;
y2 = (src_y[x + 1] >> 6) - _c16;
u = int(src_uv[x] >> 6) - 512;
v = int(src_uv[x + 1] >> 6) - 512;
signed u_b = u * _b_u;
signed u_g = u * _neg_g_u;
signed v_g = v * _neg_g_v;
signed v_r = v * _r_v;
signed tmp1 = y1 * _y + 128;
signed b1 = (tmp1 + u_b) / 256;
signed g1 = (tmp1 + v_g + u_g) / 256;
signed r1 = (tmp1 + v_r) / 256;
signed tmp2 = y2 * _y + 128;
signed b2 = (tmp2 + u_b) / 256;
signed g2 = (tmp2 + v_g + u_g) / 256;
signed r2 = (tmp2 + v_r) / 256;
bool uncropped = x + 1 < src.cropWidth();
writeToDst(dst_ptr + x * dst.mBpp, uncropped, r1, g1, b1, r2, g2, b2);
}
src_y += src.mStride / 2;
if (y & 1) {
src_uv += src.mStride / 2;
}
dst_ptr += dst.mStride;
}
return OK;
}
#if !USE_NEON_Y410
status_t ColorConverter::convertYUV420Planar16ToY410(
const BitmapParams &src, const BitmapParams &dst) {
uint8_t *dst_ptr = (uint8_t *)dst.mBits
+ dst.mCropTop * dst.mStride + dst.mCropLeft * dst.mBpp;
const uint8_t *src_y =
(const uint8_t *)src.mBits + src.mCropTop * src.mStride + src.mCropLeft * src.mBpp;
const uint8_t *src_u =
(const uint8_t *)src.mBits + src.mStride * src.mHeight
+ (src.mCropTop / 2) * (src.mStride / 2) + (src.mCropLeft / 2) * src.mBpp;
const uint8_t *src_v =
src_u + (src.mStride / 2) * (src.mHeight / 2);
// Converting two lines at a time, slightly faster
for (size_t y = 0; y < src.cropHeight(); y += 2) {
uint32_t *dst_top = (uint32_t *) dst_ptr;
uint32_t *dst_bot = (uint32_t *) (dst_ptr + dst.mStride);
uint16_t *ptr_ytop = (uint16_t*) src_y;
uint16_t *ptr_ybot = (uint16_t*) (src_y + src.mStride);
uint16_t *ptr_u = (uint16_t*) src_u;
uint16_t *ptr_v = (uint16_t*) src_v;
uint32_t u01, v01, y01, y23, y45, y67, uv0, uv1;
size_t x = 0;
// x % 4 is always 0 so x + 3 will never overflow.
for (; x + 3 < src.cropWidth(); x += 4) {
u01 = *((uint32_t*)ptr_u); ptr_u += 2;
v01 = *((uint32_t*)ptr_v); ptr_v += 2;
y01 = *((uint32_t*)ptr_ytop); ptr_ytop += 2;
y23 = *((uint32_t*)ptr_ytop); ptr_ytop += 2;
y45 = *((uint32_t*)ptr_ybot); ptr_ybot += 2;
y67 = *((uint32_t*)ptr_ybot); ptr_ybot += 2;
uv0 = (u01 & 0x3FF) | ((v01 & 0x3FF) << 20);
uv1 = (u01 >> 16) | ((v01 >> 16) << 20);
*dst_top++ = ((y01 & 0x3FF) << 10) | uv0;
*dst_top++ = ((y01 >> 16) << 10) | uv0;
*dst_top++ = ((y23 & 0x3FF) << 10) | uv1;
*dst_top++ = ((y23 >> 16) << 10) | uv1;
*dst_bot++ = ((y45 & 0x3FF) << 10) | uv0;
*dst_bot++ = ((y45 >> 16) << 10) | uv0;
*dst_bot++ = ((y67 & 0x3FF) << 10) | uv1;
*dst_bot++ = ((y67 >> 16) << 10) | uv1;
}
// There should be at most 2 more pixels to process. Note that we don't
// need to consider odd case as the buffer is always aligned to even.
if (x < src.cropWidth()) {
u01 = *ptr_u;
v01 = *ptr_v;
y01 = *((uint32_t*)ptr_ytop);
y45 = *((uint32_t*)ptr_ybot);
uv0 = (u01 & 0x3FF) | ((v01 & 0x3FF) << 20);
*dst_top++ = ((y01 & 0x3FF) << 10) | uv0;
*dst_top++ = ((y01 >> 16) << 10) | uv0;
*dst_bot++ = ((y45 & 0x3FF) << 10) | uv0;
*dst_bot++ = ((y45 >> 16) << 10) | uv0;
}
src_y += src.mStride * 2;
src_u += src.mStride / 2;
src_v += src.mStride / 2;
dst_ptr += dst.mStride * 2;
}
return OK;
}
#else
status_t ColorConverter::convertYUV420Planar16ToY410(
const BitmapParams &src, const BitmapParams &dst) {
uint8_t *out = (uint8_t *)dst.mBits
+ dst.mCropTop * dst.mStride + dst.mCropLeft * dst.mBpp;
const uint8_t *src_y =
(const uint8_t *)src.mBits + src.mCropTop * src.mStride + src.mCropLeft * src.mBpp;
const uint8_t *src_u =
(const uint8_t *)src.mBits + src.mStride * src.mHeight
+ (src.mCropTop / 2) * (src.mStride / 2) + (src.mCropLeft / 2) * src.mBpp;
const uint8_t *src_v =
src_u + (src.mStride / 2) * (src.mHeight / 2);
for (size_t y = 0; y < src.cropHeight(); y++) {
uint16_t *ptr_y = (uint16_t*) src_y;
uint16_t *ptr_u = (uint16_t*) src_u;
uint16_t *ptr_v = (uint16_t*) src_v;
uint32_t *ptr_out = (uint32_t *) out;
// Process 16-pixel at a time.
uint32_t *ptr_limit = ptr_out + (src.cropWidth() & ~15);
while (ptr_out < ptr_limit) {
uint16x4_t u0123 = vld1_u16(ptr_u); ptr_u += 4;
uint16x4_t u4567 = vld1_u16(ptr_u); ptr_u += 4;
uint16x4_t v0123 = vld1_u16(ptr_v); ptr_v += 4;
uint16x4_t v4567 = vld1_u16(ptr_v); ptr_v += 4;
uint16x4_t y0123 = vld1_u16(ptr_y); ptr_y += 4;
uint16x4_t y4567 = vld1_u16(ptr_y); ptr_y += 4;
uint16x4_t y89ab = vld1_u16(ptr_y); ptr_y += 4;
uint16x4_t ycdef = vld1_u16(ptr_y); ptr_y += 4;
uint32x2_t uvtempl;
uint32x4_t uvtempq;
uvtempq = vaddw_u16(vshll_n_u16(v0123, 20), u0123);
uvtempl = vget_low_u32(uvtempq);
uint32x4_t uv0011 = vreinterpretq_u32_u64(
vaddw_u32(vshll_n_u32(uvtempl, 32), uvtempl));
uvtempl = vget_high_u32(uvtempq);
uint32x4_t uv2233 = vreinterpretq_u32_u64(
vaddw_u32(vshll_n_u32(uvtempl, 32), uvtempl));
uvtempq = vaddw_u16(vshll_n_u16(v4567, 20), u4567);
uvtempl = vget_low_u32(uvtempq);
uint32x4_t uv4455 = vreinterpretq_u32_u64(
vaddw_u32(vshll_n_u32(uvtempl, 32), uvtempl));
uvtempl = vget_high_u32(uvtempq);
uint32x4_t uv6677 = vreinterpretq_u32_u64(
vaddw_u32(vshll_n_u32(uvtempl, 32), uvtempl));
uint32x4_t dsttemp;
dsttemp = vorrq_u32(uv0011, vshll_n_u16(y0123, 10));
vst1q_u32(ptr_out, dsttemp); ptr_out += 4;
dsttemp = vorrq_u32(uv2233, vshll_n_u16(y4567, 10));
vst1q_u32(ptr_out, dsttemp); ptr_out += 4;
dsttemp = vorrq_u32(uv4455, vshll_n_u16(y89ab, 10));
vst1q_u32(ptr_out, dsttemp); ptr_out += 4;
dsttemp = vorrq_u32(uv6677, vshll_n_u16(ycdef, 10));
vst1q_u32(ptr_out, dsttemp); ptr_out += 4;
}
src_y += src.mStride;
if (y & 1) {
src_u += src.mStride / 2;
src_v += src.mStride / 2;
}
out += dst.mStride;
}
// Process the left-overs out-of-loop, 2-pixel at a time. Note that we don't
// need to consider odd case as the buffer is always aligned to even.
if (src.cropWidth() & 15) {
size_t xstart = (src.cropWidth() & ~15);
uint8_t *out = (uint8_t *)dst.mBits + dst.mCropTop * dst.mStride
+ (dst.mCropLeft + xstart) * dst.mBpp;
const uint8_t *src_y = (const uint8_t *)src.mBits + src.mCropTop * src.mStride
+ (src.mCropLeft + xstart) * src.mBpp;
const uint8_t *src_u = (const uint8_t *)src.mBits + src.mStride * src.mHeight
+ (src.mCropTop / 2) * (src.mStride / 2)
+ ((src.mCropLeft + xstart) / 2) * src.mBpp;
const uint8_t *src_v = src_u + (src.mStride / 2) * (src.mHeight / 2);
for (size_t y = 0; y < src.cropHeight(); y++) {
uint16_t *ptr_y = (uint16_t*) src_y;
uint16_t *ptr_u = (uint16_t*) src_u;
uint16_t *ptr_v = (uint16_t*) src_v;
uint32_t *ptr_out = (uint32_t *) out;
for (size_t x = xstart; x < src.cropWidth(); x += 2) {
uint16_t u = *ptr_u++;
uint16_t v = *ptr_v++;
uint32_t y01 = *((uint32_t*)ptr_y); ptr_y += 2;
uint32_t uv = u | (((uint32_t)v) << 20);
*ptr_out++ = ((y01 & 0x3FF) << 10) | uv;
*ptr_out++ = ((y01 >> 16) << 10) | uv;
}
src_y += src.mStride;
if (y & 1) {
src_u += src.mStride / 2;
src_v += src.mStride / 2;
}
out += dst.mStride;
}
}
return OK;
}
#endif // USE_NEON_Y410
status_t ColorConverter::convertQCOMYUV420SemiPlanar(
const BitmapParams &src, const BitmapParams &dst) {
const uint8_t *src_y =
(const uint8_t *)src.mBits + src.mCropTop * src.mWidth + src.mCropLeft;
const uint8_t *src_u =
(const uint8_t *)src_y + src.mWidth * src.mHeight
+ src.mCropTop * src.mWidth + src.mCropLeft;
/* QCOMYUV420SemiPlanar is NV21, while MediaCodec uses NV12 */
return convertYUV420SemiPlanarBase(
src, dst, src_y, src_u, src.mWidth /* row_inc */, true /* isNV21 */);
}
status_t ColorConverter::convertTIYUV420PackedSemiPlanar(
const BitmapParams &src, const BitmapParams &dst) {
const uint8_t *src_y =
(const uint8_t *)src.mBits + src.mCropTop * src.mWidth + src.mCropLeft;
const uint8_t *src_u =
(const uint8_t *)src_y + src.mWidth * (src.mHeight - src.mCropTop / 2);
return convertYUV420SemiPlanarBase(
src, dst, src_y, src_u, src.mWidth /* row_inc */);
}
status_t ColorConverter::convertYUV420SemiPlanar(
const BitmapParams &src, const BitmapParams &dst) {
const uint8_t *src_y =
(const uint8_t *)src.mBits + src.mCropTop * src.mStride + src.mCropLeft;
const uint8_t *src_u =
(const uint8_t *)src.mBits + src.mHeight * src.mStride +
(src.mCropTop / 2) * src.mStride + src.mCropLeft;
return convertYUV420SemiPlanarBase(
src, dst, src_y, src_u, src.mStride /* row_inc */);
}
status_t ColorConverter::convertYUV420SemiPlanarBase(
const BitmapParams &src, const BitmapParams &dst,
const uint8_t *src_y, const uint8_t *src_u, size_t row_inc, bool isNV21) {
const struct Coeffs *matrix = getMatrix();
if (!matrix) {
return ERROR_UNSUPPORTED;
}
signed _b_u = matrix->_b_u;
signed _neg_g_u = -matrix->_g_u;
signed _neg_g_v = -matrix->_g_v;
signed _r_v = matrix->_r_v;
signed _y = matrix->_y;
signed _c16 = mSrcColorSpace.mRange == ColorUtils::kColorRangeLimited ? 16 : 0;
uint8_t *kAdjustedClip = initClip();
uint16_t *dst_ptr = (uint16_t *)((uint8_t *)
dst.mBits + dst.mCropTop * dst.mStride + dst.mCropLeft * dst.mBpp);
for (size_t y = 0; y < src.cropHeight(); ++y) {
for (size_t x = 0; x < src.cropWidth(); x += 2) {
signed y1 = (signed)src_y[x] - _c16;
signed y2 = (signed)src_y[x + 1] - _c16;
signed u = (signed)src_u[(x & ~1) + isNV21] - 128;
signed v = (signed)src_u[(x & ~1) + !isNV21] - 128;
signed u_b = u * _b_u;
signed u_g = u * _neg_g_u;
signed v_g = v * _neg_g_v;
signed v_r = v * _r_v;
signed tmp1 = y1 * _y + 128;
signed b1 = (tmp1 + u_b) / 256;
signed g1 = (tmp1 + v_g + u_g) / 256;
signed r1 = (tmp1 + v_r) / 256;
signed tmp2 = y2 * _y + 128;
signed b2 = (tmp2 + u_b) / 256;
signed g2 = (tmp2 + v_g + u_g) / 256;
signed r2 = (tmp2 + v_r) / 256;
uint32_t rgb1 =
((kAdjustedClip[r1] >> 3) << 11)
| ((kAdjustedClip[g1] >> 2) << 5)
| (kAdjustedClip[b1] >> 3);
uint32_t rgb2 =
((kAdjustedClip[r2] >> 3) << 11)
| ((kAdjustedClip[g2] >> 2) << 5)
| (kAdjustedClip[b2] >> 3);
if (x + 1 < src.cropWidth()) {
*(uint32_t *)(&dst_ptr[x]) = (rgb2 << 16) | rgb1;
} else {
dst_ptr[x] = rgb1;
}
}
src_y += row_inc;
if (y & 1) {
src_u += row_inc;
}
dst_ptr = (uint16_t*)((uint8_t*)dst_ptr + dst.mStride);
}
return OK;
}
uint8_t *ColorConverter::initClip() {
if (mClip == NULL) {
mClip = new uint8_t[CLIP_RANGE_MAX_8BIT - CLIP_RANGE_MIN_8BIT + 1];
for (signed i = CLIP_RANGE_MIN_8BIT; i <= CLIP_RANGE_MAX_8BIT; ++i) {
mClip[i - CLIP_RANGE_MIN_8BIT] = (i < 0) ? 0 : (i > 255) ? 255 : (uint8_t)i;
}
}
return &mClip[-CLIP_RANGE_MIN_8BIT];
}
uint16_t *ColorConverter::initClip10Bit() {
if (mClip10Bit == NULL) {
mClip10Bit = new uint16_t[CLIP_RANGE_MAX_10BIT - CLIP_RANGE_MIN_10BIT + 1];
for (signed i = CLIP_RANGE_MIN_10BIT; i <= CLIP_RANGE_MAX_10BIT; ++i) {
mClip10Bit[i - CLIP_RANGE_MIN_10BIT] = (i < 0) ? 0 : (i > 1023) ? 1023 : (uint16_t)i;
}
}
return &mClip10Bit[-CLIP_RANGE_MIN_10BIT];
}
} // namespace android