Merge "Snap for 10763390 from 08f747f785e3032936ed9357560bc0ad39658287 to androidx-benchmark-release" into androidx-benchmark-release
diff --git a/Android.bp b/Android.bp
index 8a94da2..2dd8561 100644
--- a/Android.bp
+++ b/Android.bp
@@ -72,7 +72,6 @@
"source/scale_neon.cc",
"source/scale_neon64.cc",
"source/scale_rgb.cc",
- "source/scale_rvv.cc",
"source/scale_uv.cc",
"source/video_common.cc",
],
@@ -83,7 +82,6 @@
"-Wno-unused-parameter",
"-fexceptions",
"-DHAVE_JPEG",
- "-DLIBYUV_UNLIMITED_DATA",
],
arch: {
diff --git a/BUILD b/BUILD
new file mode 100644
index 0000000..3145e36
--- /dev/null
+++ b/BUILD
@@ -0,0 +1,14 @@
+# Copyright 2011 Google Inc. All Rights Reserved.
+#
+# Description:
+# The libyuv package provides implementation yuv image conversion and
+# scaling.
+#
+# This library is used by Talk Video and WebRTC.
+#
+
+licenses(['notice']) # 3-clause BSD
+
+exports_files(['LICENSE'])
+
+package(default_visibility = ['//visibility:public'])
diff --git a/BUILD.gn b/BUILD.gn
index df019b8..adaae9d 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -36,12 +36,6 @@
if (libyuv_disable_rvv) {
defines += [ "LIBYUV_DISABLE_RVV" ]
}
- if (!libyuv_use_lsx) {
- defines += [ "LIBYUV_DISABLE_LSX" ]
- }
- if (!libyuv_use_lasx) {
- defines += [ "LIBYUV_DISABLE_LASX" ]
- }
}
# This target is built when no specific target is specified on the command line.
@@ -80,14 +74,6 @@
deps += [ ":libyuv_msa" ]
}
- if (libyuv_use_lsx) {
- deps += [ ":libyuv_lsx"]
- }
-
- if (libyuv_use_lasx) {
- deps += [ ":libyuv_lasx"]
- }
-
if (!is_ios && !libyuv_disable_jpeg) {
# Make sure that clients of libyuv link with libjpeg. This can't go in
# libyuv_internal because in Windows x64 builds that will generate a clang
@@ -156,7 +142,6 @@
"source/scale_common.cc",
"source/scale_gcc.cc",
"source/scale_rgb.cc",
- "source/scale_rvv.cc",
"source/scale_uv.cc",
"source/scale_win.cc",
"source/video_common.cc",
@@ -250,44 +235,6 @@
}
}
-if (libyuv_use_lsx) {
- static_library("libyuv_lsx") {
- sources = [
- # LSX Source Files
- "source/row_lsx.cc",
- "source/rotate_lsx.cc",
- "source/scale_lsx.cc",
- ]
-
- cflags_cc = [
- "-mlsx",
- "-Wno-c++11-narrowing",
- ]
-
- deps = [ ":libyuv_internal" ]
-
- public_configs = [ ":libyuv_config" ]
- }
-}
-
-if (libyuv_use_lasx) {
- static_library("libyuv_lasx") {
- sources = [
- # LASX Source Files
- "source/row_lasx.cc",
- ]
-
- cflags_cc = [
- "-mlasx",
- "-Wno-c++11-narrowing",
- ]
-
- deps = [ ":libyuv_internal" ]
-
- public_configs = [ ":libyuv_config" ]
- }
-}
-
if (libyuv_include_tests) {
config("libyuv_unittest_warnings_config") {
if (!is_win) {
diff --git a/CM_linux_packages.cmake b/CM_linux_packages.cmake
index a073edf..5f676f8 100644
--- a/CM_linux_packages.cmake
+++ b/CM_linux_packages.cmake
@@ -8,7 +8,7 @@
SET ( YUV_VER_MINOR 0 )
SET ( YUV_VER_PATCH ${YUV_VERSION_NUMBER} )
SET ( YUV_VERSION ${YUV_VER_MAJOR}.${YUV_VER_MINOR}.${YUV_VER_PATCH} )
-MESSAGE ( VERBOSE "Building ver.: ${YUV_VERSION}" )
+MESSAGE ( "Building ver.: ${YUV_VERSION}" )
# is this a 32-bit or 64-bit build?
IF ( CMAKE_SIZEOF_VOID_P EQUAL 8 )
@@ -45,7 +45,7 @@
SET ( YUV_SYSTEM_NAME "amd-${YUV_BIT_SIZE}" )
ENDIF ()
ENDIF ()
-MESSAGE ( VERBOSE "Packaging for: ${YUV_SYSTEM_NAME}" )
+MESSAGE ( "Packaging for: ${YUV_SYSTEM_NAME}" )
# define all the variables needed by CPack to create .deb and .rpm packages
SET ( CPACK_PACKAGE_VENDOR "Frank Barchard" )
diff --git a/DEPS b/DEPS
index b6a7bc7..a7bec8d 100644
--- a/DEPS
+++ b/DEPS
@@ -5,20 +5,20 @@
vars = {
'chromium_git': 'https://chromium.googlesource.com',
- 'chromium_revision': 'af3d01376bec75a68f90160bfd38057d60510a2b',
- 'gn_version': 'git_revision:fae280eabe5d31accc53100137459ece19a7a295',
+ 'chromium_revision': 'd1501576384de23ddf8d8815ee7c95be2f708de5',
+ 'gn_version': 'git_revision:e3978de3e8dafb50a2b11efa784e08699a43faf8',
# ninja CIPD package version.
# https://chrome-infra-packages.appspot.com/p/infra/3pp/tools/ninja
'ninja_version': 'version:2@1.11.1.chromium.6',
# reclient CIPD package version
- 'reclient_version': 're_client_version:0.110.0.43ec6b1-gomaip',
+ 'reclient_version': 're_client_version:0.107.1.0b39c4c-gomaip',
# Keep the Chromium default of generating location tags.
'generate_location_tags': True,
# By default, download the fuchsia sdk from the public sdk directory.
- 'fuchsia_sdk_cipd_prefix': 'fuchsia/sdk/core/',
- 'fuchsia_version': 'version:13.20230714.0.1',
+ 'fuchsia_sdk_cipd_prefix': 'fuchsia/sdk/gn/',
+ 'fuchsia_version': 'version:12.20230530.1.1',
# By default, download the fuchsia images from the fuchsia GCS bucket.
'fuchsia_images_bucket': 'fuchsia',
'checkout_fuchsia': False,
@@ -31,13 +31,13 @@
deps = {
'src/build':
- Var('chromium_git') + '/chromium/src/build' + '@' + '860dae780c100c2d001dc6ee16625b17bc84c10f',
+ Var('chromium_git') + '/chromium/src/build' + '@' + 'd0c2b4cf4fdd43866e066fb6722099aa8bf4ce79',
'src/buildtools':
- Var('chromium_git') + '/chromium/src/buildtools' + '@' + 'ca163845c76db63454f99436f6cd2bf03739dc24',
+ Var('chromium_git') + '/chromium/src/buildtools' + '@' + 'edbefcee3d2cc45cdb0c60c2b01b673f8ba728bc',
'src/testing':
- Var('chromium_git') + '/chromium/src/testing' + '@' + '184b068a94f24ddf0b4299d48062779e1fc1950e',
+ Var('chromium_git') + '/chromium/src/testing' + '@' + 'a13817e1ea0255a375d13aeb3bb2527bd528495b',
'src/third_party':
- Var('chromium_git') + '/chromium/src/third_party' + '@' + '2dc4b18abd1003ce7b1eda509dc96f12d49a9667',
+ Var('chromium_git') + '/chromium/src/third_party' + '@' + '824e26c9fcbd00fccf6cdb712f8f127aae133042',
'src/buildtools/linux64': {
'packages': [
@@ -82,19 +82,17 @@
'dep_type': 'cipd',
},
- # TODO(chromium:1458042): Remove these paths, when chromium builds files
- # have moved to third_party/lib*/src paths.
+ 'src/buildtools/clang_format/script':
+ Var('chromium_git') + '/external/github.com/llvm/llvm-project/clang/tools/clang-format.git' + '@' + 'f97059df7f8b205064625cdb5f97b56668a125ef',
'src/buildtools/third_party/libc++/trunk':
- Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + '84fb809dd6dae36d556dc0bb702c6cc2ce9d4b80',
+ Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + 'f8279b01085b800724f5c5629dc365b9f040dc53',
'src/buildtools/third_party/libc++abi/trunk':
- Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + 'd4760c0af99ccc9bce077960d5ddde4d66146c05',
+ Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + '5c8dbff7a4911fe1e0af0bc1628891e4187a3c90',
'src/buildtools/third_party/libunwind/trunk':
- Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + '6c0013015be8a2be9de4b1e54cdc9d576b1d0729',
+ Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + 'cd144ced35285edaa064a91561969e5b22c219b1',
'src/third_party/catapult':
- Var('chromium_git') + '/catapult.git' + '@' + 'fa05d995e152efdae488a2aeba397cd609fdbc9d',
- 'src/third_party/clang-format/script':
- Var('chromium_git') + '/external/github.com/llvm/llvm-project/clang/tools/clang-format.git' + '@' + 'f97059df7f8b205064625cdb5f97b56668a125ef',
+ Var('chromium_git') + '/catapult.git' + '@' + '9f3ef9c2eae9b1adabde88efe5dcc438ba76e205',
'src/third_party/colorama/src':
Var('chromium_git') + '/external/colorama.git' + '@' + '3de9f013df4b470069d03d250224062e8cf15c49',
'src/third_party/cpu_features/src': {
@@ -102,29 +100,19 @@
'condition': 'checkout_android',
},
'src/third_party/depot_tools':
- Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + 'd3e43dd4319ba169c0aaf44547eecf861f2fe5da',
+ Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + '05ab73be51774f098eb580eda6e96a49e1010b1b',
'src/third_party/freetype/src':
- Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + '9e3c5d7e183c1a8d5ed8868d7d28ef18d3ec9ec8',
- 'third_party/fuchsia-gn-sdk': {
- 'url': Var('chromium_git') + '/chromium/src/third_party/fuchsia-gn-sdk.git' + '@' + '0d6902558d92fe3d49ba9a8f638ddea829be595b',
- 'condition': 'checkout_fuchsia',
- },
+ Var('chromium_git') + '/chromium/src/third_party/freetype2.git' + '@' + '80a507a6b8e3d2906ad2c8ba69329bd2fb2a85ef',
'src/third_party/googletest/src':
Var('chromium_git') + '/external/github.com/google/googletest.git' + '@' + 'af29db7ec28d6df1c7f0f745186884091e602e07',
'src/third_party/harfbuzz-ng/src':
- Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + 'db700b5670d9475cc8ed4880cc9447b232c5e432',
- 'src/third_party/libc++/src':
- Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxx.git' + '@' + '84fb809dd6dae36d556dc0bb702c6cc2ce9d4b80',
- 'src/third_party/libc++abi/src':
- Var('chromium_git') + '/external/github.com/llvm/llvm-project/libcxxabi.git' + '@' + '8d21803b9076b16d46c32e2f10da191ee758520c',
- 'src/third_party/libunwind/src':
- Var('chromium_git') + '/external/github.com/llvm/llvm-project/libunwind.git' + '@' + 'f1c687e0aaf0d70b9a53a150e9be5cb63af9215f',
+ Var('chromium_git') + '/external/github.com/harfbuzz/harfbuzz.git' + '@' + '8df5cdbcda495a582e72a7e2ce35d6106401edce',
'src/third_party/libjpeg_turbo':
- Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + '30bdb85e302ecfc52593636b2f44af438e05e784',
+ Var('chromium_git') + '/chromium/deps/libjpeg_turbo.git' + '@' + 'aa4075f116e4312537d0d3e9dbd5e31096539f94',
'src/third_party/nasm':
Var('chromium_git') + '/chromium/deps/nasm.git' + '@' + '7fc833e889d1afda72c06220e5bed8fb43b2e5ce',
'src/tools':
- Var('chromium_git') + '/chromium/src/tools' + '@' + 'a76c0dbb64c603a0d45e0c6dfae3a351b6e1adf1',
+ Var('chromium_git') + '/chromium/src/tools' + '@' + '916dfffd61cbf61075c47d7b480425d7de1483fd',
# libyuv-only dependencies (not present in Chromium).
'src/third_party/gtest-parallel':
@@ -151,7 +139,7 @@
'packages': [
{
'package': 'chromium/third_party/kotlin_stdlib',
- 'version': 'Z1gsqhL967kFQecxKrRwXHbl-vwQjpv0l7PMUZ0EVO8C',
+ 'version': 'z4_AYYz2Tw5GKikuiDLTuxxf0NJVGLkC3CVcyiIpc-gC',
},
],
'condition': 'checkout_android',
@@ -161,7 +149,7 @@
'packages': [
{
'package': 'chromium/third_party/kotlinc',
- 'version': 'Rr02Gf2EkaeSs3EhSUHhPqDHSd1AzimrM6cRYUJCPjQC',
+ 'version': 'J3BAlA7yf4corBopDhlwuT9W4jR1Z9R55KD3BUTVldQC',
},
],
'condition': 'checkout_android',
@@ -169,9 +157,9 @@
},
'src/third_party/boringssl/src':
- 'https://boringssl.googlesource.com/boringssl.git' + '@' + '20a06474c0b4a16779311bfe98ba69dc2402101d',
+ 'https://boringssl.googlesource.com/boringssl.git' + '@' + 'dd5219451c3ce26221762a15d867edf43b463bb2',
'src/base': {
- 'url': Var('chromium_git') + '/chromium/src/base' + '@' + 'd407b7061bce341bb6e11b539ea86c46c949ac4c',
+ 'url': Var('chromium_git') + '/chromium/src/base' + '@' + 'b4c5ce6cb1a7c90de3fdddc80ed439fe87eab443',
'condition': 'checkout_android',
},
'src/third_party/bazel': {
@@ -194,22 +182,16 @@
'condition': 'checkout_android',
'dep_type': 'cipd',
},
- 'src/third_party/android_toolchain': {
- 'packages': [
- {
- 'package': 'chromium/third_party/android_toolchain/android_toolchain',
- 'version': 'R_8suM8m0oHbZ1awdxGXvKEFpAOETscbfZxkkMthyk8C',
- },
- ],
- 'condition': 'checkout_android',
- 'dep_type': 'cipd',
+ 'src/third_party/android_ndk': {
+ 'url': Var('chromium_git') + '/android_ndk.git' + '@' + '310956bd122ec2b96049f8d7398de6b717f3452e',
+ 'condition': 'checkout_android',
},
'src/third_party/androidx': {
'packages': [
{
'package': 'chromium/third_party/androidx',
- 'version': 'y7rF_rx56mD3FGhMiqnlbQ6HOqHJ95xUFNX1m-_a988C',
+ 'version': 'Wr5b9WJiFAzJcmjmvQIePIxk5IgpDl62kaGY_SiLxJEC',
},
],
'condition': 'checkout_android',
@@ -229,8 +211,8 @@
'src/third_party/android_sdk/public': {
'packages': [
{
- 'package': 'chromium/third_party/android_sdk/public/build-tools/34.0.0',
- 'version': 'YK9Rzw3fDzMHVzatNN6VlyoD_81amLZpN1AbmkdOd6AC',
+ 'package': 'chromium/third_party/android_sdk/public/build-tools/33.0.0',
+ 'version': '-VRKr36Uw8L_iFqqo9nevIBgNMggND5iWxjidyjnCgsC',
},
{
'package': 'chromium/third_party/android_sdk/public/emulator',
@@ -242,11 +224,11 @@
},
{
'package': 'chromium/third_party/android_sdk/public/platform-tools',
- 'version': 'HWVsGs2HCKgSVv41FsOcsfJbNcB0UFiNrF6Tc4yRArYC',
+ 'version': 'RSI3iwryh7URLGRgJHsCvUxj092woTPnKt4pwFcJ6L8C',
},
{
- 'package': 'chromium/third_party/android_sdk/public/platforms/android-34',
- 'version': 'u-bhWbTME6u-DjypTgr3ZikCyeAeU6txkR9ET6Uudc8C',
+ 'package': 'chromium/third_party/android_sdk/public/platforms/android-33',
+ 'version': 'eo5KvW6UVor92LwZai8Zulc624BQZoCu-yn7wa1z_YcC',
},
{
'package': 'chromium/third_party/android_sdk/public/platforms/android-tiramisuprivacysandbox',
@@ -348,7 +330,7 @@
},
'src/third_party/icu': {
- 'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + 'e8c3bc9ea97d4423ad0515e5f1c064f486dae8b1',
+ 'url': Var('chromium_git') + '/chromium/deps/icu.git' + '@' + 'a2961dc659b4ae847a9c6120718cc2517ee57d9e',
},
'src/third_party/icu4j': {
'packages': [
@@ -374,7 +356,7 @@
'packages': [
{
'package': 'chromium/third_party/jdk',
- 'version': 'IivIDwNBf73mf7UwCOBceRUuDdtizMCgSOQDfUGHArsC',
+ 'version': '2Of9Pe_OdO4xoAATuiLDiMVNebKTNO3WrwJGqil4RosC',
},
],
'condition': 'checkout_android',
@@ -429,7 +411,7 @@
'packages': [
{
'package': 'chromium/third_party/r8',
- 'version': 'O1BBWiBTIeNUcraX8STMtQXVaCleu6SJJjWCcnfhPLkC',
+ 'version': '4Oq32DG2vuDh7Frxj6tH5xyi77sVgBWpvvl4hwvZRR4C',
},
],
'condition': 'checkout_android',
@@ -442,7 +424,7 @@
'packages': [
{
'package': 'chromium/third_party/r8',
- 'version': 'vw5kLlW3-suSlCKSO9OQpFWpR8oDnvQ8k1RgKNUapQYC',
+ 'version': 'PwglNZFRNPkBBXdnY9NfrZFk2ULWDTRxhV9rl2kvkpUC',
},
],
'condition': 'checkout_android',
@@ -459,7 +441,7 @@
'dep_type': 'cipd',
},
'src/third_party/requests/src': {
- 'url': Var('chromium_git') + '/external/github.com/kennethreitz/requests.git' + '@' + 'c7e0fc087ceeadb8b4c84a0953a422c474093d6d',
+ 'url': Var('chromium_git') + '/external/github.com/kennethreitz/requests.git' + '@' + 'refs/tags/v2.23.0',
'condition': 'checkout_android',
},
'src/third_party/robolectric': {
@@ -486,7 +468,7 @@
'packages': [
{
'package': 'chromium/third_party/turbine',
- 'version': '2I2Nz480QsuCxpQ1lMfbigX8l5HAhX3_ykWU4TKRGo4C',
+ 'version': 'Foa7uRpVoKr4YoayCKc9EERkjpmGOE3DAUTWFLL7gKEC',
},
],
'condition': 'checkout_android',
@@ -499,7 +481,7 @@
# iOS deps:
'src/ios': {
- 'url': Var('chromium_git') + '/chromium/src/ios' + '@' + 'ddd58e86cf4ebdc0db60a5d0f3c323de49bb295c',
+ 'url': Var('chromium_git') + '/chromium/src/ios' + '@' + '241921896b64f85de9a32d461462913cbff4baeb',
'condition': 'checkout_ios'
},
@@ -1698,7 +1680,7 @@
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_squareup_okio_okio_jvm',
- 'version': 'version:2@3.3.0.cr1',
+ 'version': 'version:2@3.0.0.cr1',
},
],
'condition': 'checkout_android',
@@ -1709,7 +1691,7 @@
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/com_squareup_wire_wire_runtime_jvm',
- 'version': 'version:2@4.7.0.cr1',
+ 'version': 'version:2@4.5.1.cr1',
},
],
'condition': 'checkout_android',
@@ -1841,7 +1823,7 @@
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/net_bytebuddy_byte_buddy',
- 'version': 'version:2@1.14.5.cr1',
+ 'version': 'version:2@1.14.4.cr1',
},
],
'condition': 'checkout_android',
@@ -1852,7 +1834,7 @@
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/net_bytebuddy_byte_buddy_agent',
- 'version': 'version:2@1.14.5.cr1',
+ 'version': 'version:2@1.14.4.cr1',
},
],
'condition': 'checkout_android',
@@ -2061,7 +2043,7 @@
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_mockito_mockito_android',
- 'version': 'version:2@5.4.0.cr1',
+ 'version': 'version:2@5.3.1.cr1',
},
],
'condition': 'checkout_android',
@@ -2072,7 +2054,7 @@
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_mockito_mockito_core',
- 'version': 'version:2@5.4.0.cr1',
+ 'version': 'version:2@5.3.1.cr1',
},
],
'condition': 'checkout_android',
@@ -2083,7 +2065,7 @@
'packages': [
{
'package': 'chromium/third_party/android_deps/libs/org_mockito_mockito_subclass',
- 'version': 'version:2@5.4.0.cr1',
+ 'version': 'version:2@5.3.1.cr1',
},
],
'condition': 'checkout_android',
diff --git a/METADATA b/METADATA
index 7ceb7be..5508de2 100644
--- a/METADATA
+++ b/METADATA
@@ -1,19 +1,14 @@
-# This project was upgraded with external_updater.
-# Usage: tools/external_updater/updater.sh update libyuv
-# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md
-
name: "libyuv"
-description: "libyuv is an open source project that includes YUV scaling and conversion functionality."
+description:
+ "libyuv is an open source project that includes YUV scaling and conversion "
+ "functionality."
+
third_party {
url {
type: GIT
value: "https://chromium.googlesource.com/libyuv/libyuv/"
}
- version: "f0921806a293e3e008e6325a51d4ea760c39d2c1"
+ version: "2a6cb7431939faba1b40d3f08883847f0cf63572"
+ last_upgrade_date { year: 2023 month: 6 day: 1 }
license_type: NOTICE
- last_upgrade_date {
- year: 2023
- month: 9
- day: 5
- }
}
diff --git a/OWNERS b/OWNERS
index f11a7bf..e3ce5f2 100644
--- a/OWNERS
+++ b/OWNERS
@@ -1,11 +1 @@
-mbonadei@chromium.org
-fbarchard@chromium.org
-magjed@chromium.org
-wtc@google.com
-jansson@google.com
-
-per-file *.gn=mbonadei@chromium.org,jansson@google.com
-per-file .gitignore=*
-per-file AUTHORS=*
-per-file DEPS=*
-per-file PRESUBMIT.py=mbonadei@chromium.org,jansson@google.com
+include platform/system/core:/janitors/OWNERS
\ No newline at end of file
diff --git a/OWNERS.android b/OWNERS.android
deleted file mode 100644
index 7529cb9..0000000
--- a/OWNERS.android
+++ /dev/null
@@ -1 +0,0 @@
-include platform/system/core:/janitors/OWNERS
diff --git a/README.chromium b/README.chromium
index c68be17..880191e 100644
--- a/README.chromium
+++ b/README.chromium
@@ -1,9 +1,8 @@
Name: libyuv
-URL: https://chromium.googlesource.com/libyuv/libyuv/
-Version: 1875
+URL: http://code.google.com/p/libyuv/
+Version: 1871
License: BSD
License File: LICENSE
-Shipped: yes
Description:
libyuv is an open source project that includes YUV conversion and scaling functionality.
diff --git a/README.version b/README.version
new file mode 100644
index 0000000..6eb9dc8
--- /dev/null
+++ b/README.version
@@ -0,0 +1,8 @@
+Version: r1871
+BugComponent: 42195
+Owner: lajos
+Local Modifications:
+ * Remove files/Android.mk (it messes with the android build system).
+ * Remove OWNERS files within files/ and all the subdirectories (except for
+ files/fuzz). Having these files breaks repo presubmit hooks since they
+ contain non @google.com email addresses.
diff --git a/UPDATING b/UPDATING
new file mode 100644
index 0000000..2679284
--- /dev/null
+++ b/UPDATING
@@ -0,0 +1,36 @@
+To sync the libyuv checkout to an upstream revision, do the following:
+
+These commands are known to work from the external/libyuv directory of the
+Android tree's checkout.
+
+Step 1: Remove the files/ subdirectory.
+
+$ rm -rf files
+
+Step 2: Clone the libyuv repository from upstream.
+
+$ git clone https://chromium.googlesource.com/libyuv/libyuv files
+
+Step 3 (optional): Checkout a specific commit/tag.
+
+$ cd files
+$ git checkout <commit_or_tag>
+$ cd ..
+
+Step 4: Remove files that aren't necessary (Android.mk, .git and OWNERS).
+
+$ rm files/Android.mk
+$ rm -rf files/.git
+$ find files/ -name "OWNERS" | xargs rm
+
+Step 5: Update the version and last_upgrade_date fields in the METADATA file.
+
+Step 6: Update README.version with the version (can be found in
+ files/include/libyuv/version.h)
+
+Step 7: If any local modifications are being done, update README.version and
+ this file with updated instructions.
+
+Step 8: Ensure that libyuv builds and camera and media related CTS tests are
+ passing. If there are any linker errors about missing symbols, try
+ updating frameworks/av/media/libstagefright/export.lds.
diff --git a/docs/deprecated_builds.md b/docs/deprecated_builds.md
index 8edefd7..ba42966 100644
--- a/docs/deprecated_builds.md
+++ b/docs/deprecated_builds.md
@@ -165,11 +165,11 @@
arm32 disassembly:
- llvm-objdump -d out/Release/obj/source/libyuv.row_neon.o
+ third_party/android_ndk/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-objdump -d out/Release/obj/source/libyuv.row_neon.o
arm64 disassembly:
- llvm-objdump -d out/Release/obj/source/libyuv.row_neon64.o
+ third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d out/Release/obj/source/libyuv.row_neon64.o
Running tests:
diff --git a/docs/getting_started.md b/docs/getting_started.md
index f2f71b8..b19f000 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -139,11 +139,11 @@
arm disassembly:
- llvm-objdump -d ./out/Release/obj/libyuv/row_common.o >row_common.txt
+ third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv/row_common.o >row_common.txt
- llvm-objdump -d ./out/Release/obj/libyuv_neon/row_neon.o >row_neon.txt
+ third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon.o >row_neon.txt
- llvm-objdump -d ./out/Release/obj/libyuv_neon/row_neon64.o >row_neon64.txt
+ third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon64.o >row_neon64.txt
Caveat: Disassembly may require optimize_max be disabled in BUILD.gn
@@ -238,18 +238,6 @@
-DUSE_RVV=ON .
cmake --build out/Release/
-#### Customized Compiler Flags
-
-Customized compiler flags are supported by `-DRISCV_COMPILER_FLAGS="xxx"`.
-If `-DRISCV_COMPILER_FLAGS="xxx"` is manually assigned, other compile flags(e.g disable -march=xxx) will not be appended.
-
-Example:
-
- cmake -B out/Release/ -DUNIT_TEST=ON \
- -DCMAKE_BUILD_TYPE=Release \
- -DCMAKE_TOOLCHAIN_FILE="./riscv_script/riscv-clang.cmake" \
- -DRISCV_COMPILER_FLAGS="-mcpu=sifive-x280" \
- .
### Run on QEMU
diff --git a/include/libyuv/compare_row.h b/include/libyuv/compare_row.h
index 8293c91..d8e82d7 100644
--- a/include/libyuv/compare_row.h
+++ b/include/libyuv/compare_row.h
@@ -28,10 +28,7 @@
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
-#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
-#define LIBYUV_DISABLE_NEON
-#endif
-#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
+#if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86
#endif
#endif
diff --git a/include/libyuv/planar_functions.h b/include/libyuv/planar_functions.h
index f934472..154f2f2 100644
--- a/include/libyuv/planar_functions.h
+++ b/include/libyuv/planar_functions.h
@@ -30,10 +30,7 @@
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
-#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
-#define LIBYUV_DISABLE_NEON
-#endif
-#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
+#if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86
#endif
#endif
@@ -830,6 +827,15 @@
int width,
int height);
+typedef void (*ARGBBlendRow)(const uint8_t* src_argb0,
+ const uint8_t* src_argb1,
+ uint8_t* dst_argb,
+ int width);
+
+// Get function to Alpha Blend ARGB pixels and store to destination.
+LIBYUV_API
+ARGBBlendRow GetARGBBlend();
+
// Alpha Blend ARGB images and store to destination.
// Source is pre-multiplied by alpha using ARGBAttenuate.
// Alpha of destination is set to 255.
diff --git a/include/libyuv/rotate_row.h b/include/libyuv/rotate_row.h
index 3e6a2fe..2dd8c03 100644
--- a/include/libyuv/rotate_row.h
+++ b/include/libyuv/rotate_row.h
@@ -28,10 +28,7 @@
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
-#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
-#define LIBYUV_DISABLE_NEON
-#endif
-#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
+#if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86
#endif
#endif
diff --git a/include/libyuv/row.h b/include/libyuv/row.h
index 0455b4c..5b244d7 100644
--- a/include/libyuv/row.h
+++ b/include/libyuv/row.h
@@ -31,10 +31,7 @@
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
-#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
-#define LIBYUV_DISABLE_NEON
-#endif
-#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
+#if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86
#endif
#endif
@@ -164,6 +161,7 @@
#define HAS_ARGBSEPIAROW_SSSE3
#define HAS_ARGBSHADEROW_SSE2
#define HAS_ARGBSUBTRACTROW_SSE2
+#define HAS_ARGBUNATTENUATEROW_SSE2
#define HAS_BLENDPLANEROW_SSSE3
#define HAS_COMPUTECUMULATIVESUMROW_SSE2
#define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
@@ -173,6 +171,9 @@
#define HAS_SOBELXROW_SSE2
#define HAS_SOBELXYROW_SSE2
#define HAS_SOBELYROW_SSE2
+#if !defined(LIBYUV_BIT_EXACT)
+#define HAS_ARGBATTENUATEROW_SSSE3
+#endif
// The following functions fail on gcc/clang 32 bit with fpic and framepointer.
// caveat: clangcl uses row_win.cc which works.
@@ -240,7 +241,11 @@
#define HAS_ARGBADDROW_AVX2
#define HAS_ARGBMULTIPLYROW_AVX2
#define HAS_ARGBSUBTRACTROW_AVX2
+#define HAS_ARGBUNATTENUATEROW_AVX2
#define HAS_BLENDPLANEROW_AVX2
+#if !defined(LIBYUV_BIT_EXACT)
+#define HAS_ARGBATTENUATEROW_AVX2
+#endif
#if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
defined(_MSC_VER)
@@ -280,15 +285,14 @@
#define HAS_ABGRTOAR30ROW_SSSE3
#define HAS_ABGRTOYJROW_SSSE3
#define HAS_AR64TOARGBROW_SSSE3
-#define HAS_ARGBATTENUATEROW_SSSE3
#define HAS_ARGBTOAB64ROW_SSSE3
#define HAS_ARGBTOAR30ROW_SSSE3
#define HAS_ARGBTOAR64ROW_SSSE3
-#define HAS_ARGBUNATTENUATEROW_SSE2
#define HAS_CONVERT16TO8ROW_SSSE3
#define HAS_CONVERT8TO16ROW_SSE2
-#define HAS_DETILEROW_16_SSE2
#define HAS_DETILEROW_SSE2
+#define HAS_DETILEROW_16_SSE2
+#define HAS_DETILEROW_16_AVX
#define HAS_DETILESPLITUVROW_SSSE3
#define HAS_DETILETOYUY2_SSE2
#define HAS_HALFMERGEUVROW_SSSE3
@@ -341,16 +345,13 @@
#define HAS_ABGRTOYJROW_AVX2
#define HAS_ABGRTOYROW_AVX2
#define HAS_AR64TOARGBROW_AVX2
-#define HAS_ARGBATTENUATEROW_AVX2
#define HAS_ARGBTOAB64ROW_AVX2
#define HAS_ARGBTOAR30ROW_AVX2
#define HAS_ARGBTOAR64ROW_AVX2
#define HAS_ARGBTORAWROW_AVX2
#define HAS_ARGBTORGB24ROW_AVX2
-#define HAS_ARGBUNATTENUATEROW_AVX2
#define HAS_CONVERT16TO8ROW_AVX2
#define HAS_CONVERT8TO16ROW_AVX2
-#define HAS_DETILEROW_16_AVX
#define HAS_DIVIDEROW_16_AVX2
#define HAS_HALFMERGEUVROW_AVX2
#define HAS_I210TOAR30ROW_AVX2
@@ -794,25 +795,19 @@
#endif
#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
-#define HAS_COPYROW_RVV
-#if __riscv_v_intrinsic == 11000
#define HAS_AB64TOARGBROW_RVV
-#define HAS_ABGRTOYJROW_RVV
-#define HAS_ABGRTOYROW_RVV
#define HAS_AR64TOARGBROW_RVV
#define HAS_ARGBATTENUATEROW_RVV
-#define HAS_ARGBBLENDROW_RVV
-#define HAS_ARGBCOPYYTOALPHAROW_RVV
-#define HAS_ARGBEXTRACTALPHAROW_RVV
#define HAS_ARGBTOAB64ROW_RVV
#define HAS_ARGBTOAR64ROW_RVV
#define HAS_ARGBTORAWROW_RVV
#define HAS_ARGBTORGB24ROW_RVV
-#define HAS_ARGBTOYJROW_RVV
-#define HAS_ARGBTOYMATRIXROW_RVV
#define HAS_ARGBTOYROW_RVV
+#define HAS_ARGBTOYJROW_RVV
+#define HAS_ABGRTOYROW_RVV
+#define HAS_ABGRTOYJROW_RVV
#define HAS_BGRATOYROW_RVV
-#define HAS_BLENDPLANEROW_RVV
+#define HAS_COPYROW_RVV
#define HAS_I400TOARGBROW_RVV
#define HAS_I422ALPHATOARGBROW_RVV
#define HAS_I422TOARGBROW_RVV
@@ -827,10 +822,10 @@
#define HAS_MERGERGBROW_RVV
#define HAS_MERGEUVROW_RVV
#define HAS_MERGEXRGBROW_RVV
-#define HAS_NV12TOARGBROW_RVV
-#define HAS_NV12TORGB24ROW_RVV
-#define HAS_NV21TOARGBROW_RVV
-#define HAS_NV21TORGB24ROW_RVV
+#define HAS_SPLITARGBROW_RVV
+#define HAS_SPLITRGBROW_RVV
+#define HAS_SPLITUVROW_RVV
+#define HAS_SPLITXRGBROW_RVV
#define HAS_RAWTOARGBROW_RVV
#define HAS_RAWTORGB24ROW_RVV
#define HAS_RAWTORGBAROW_RVV
@@ -839,15 +834,8 @@
#define HAS_RGB24TOARGBROW_RVV
#define HAS_RGB24TOYJROW_RVV
#define HAS_RGB24TOYROW_RVV
-#define HAS_RGBATOYJROW_RVV
-#define HAS_RGBATOYMATRIXROW_RVV
#define HAS_RGBATOYROW_RVV
-#define HAS_RGBTOYMATRIXROW_RVV
-#define HAS_SPLITARGBROW_RVV
-#define HAS_SPLITRGBROW_RVV
-#define HAS_SPLITUVROW_RVV
-#define HAS_SPLITXRGBROW_RVV
-#endif
+#define HAS_RGBATOYJROW_RVV
#endif
#if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
@@ -1363,26 +1351,6 @@
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width);
-void NV12ToARGBRow_RVV(const uint8_t* src_y,
- const uint8_t* src_uv,
- uint8_t* dst_argb,
- const struct YuvConstants* yuvconstants,
- int width);
-void NV21ToARGBRow_RVV(const uint8_t* src_y,
- const uint8_t* src_vu,
- uint8_t* dst_argb,
- const struct YuvConstants* yuvconstants,
- int width);
-void NV12ToRGB24Row_RVV(const uint8_t* src_y,
- const uint8_t* src_uv,
- uint8_t* dst_rgb24,
- const struct YuvConstants* yuvconstants,
- int width);
-void NV21ToRGB24Row_RVV(const uint8_t* src_y,
- const uint8_t* src_vu,
- uint8_t* dst_rgb24,
- const struct YuvConstants* yuvconstants,
- int width);
void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width);
void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
@@ -3113,9 +3081,6 @@
void ARGBExtractAlphaRow_LSX(const uint8_t* src_argb,
uint8_t* dst_a,
int width);
-void ARGBExtractAlphaRow_RVV(const uint8_t* src_argb,
- uint8_t* dst_a,
- int width);
void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@@ -3135,7 +3100,6 @@
void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width);
void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
-void ARGBCopyYToAlphaRow_RVV(const uint8_t* src, uint8_t* dst, int width);
void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_ptr,
uint8_t* dst_ptr,
int width);
@@ -4551,10 +4515,6 @@
const uint8_t* src_argb1,
uint8_t* dst_argb,
int width);
-void ARGBBlendRow_RVV(const uint8_t* src_argb0,
- const uint8_t* src_argb1,
- uint8_t* dst_argb,
- int width);
void ARGBBlendRow_C(const uint8_t* src_argb,
const uint8_t* src_argb1,
uint8_t* dst_argb,
@@ -4581,11 +4541,6 @@
const uint8_t* v_buf,
uint8_t* dst_ptr,
int width);
-void BlendPlaneRow_RVV(const uint8_t* src0,
- const uint8_t* src1,
- const uint8_t* alpha,
- uint8_t* dst,
- int width);
void BlendPlaneRow_C(const uint8_t* src0,
const uint8_t* src1,
const uint8_t* alpha,
@@ -6225,19 +6180,7 @@
float* dst_ptr,
float param,
int width);
-// Convert FP16 Half Floats to FP32 Floats
-void ConvertFP16ToFP32Row_NEON(const uint16_t* src, // fp16
- float* dst,
- int width);
-// Convert a column of FP16 Half Floats to a row of FP32 Floats
-void ConvertFP16ToFP32Column_NEON(const uint16_t* src, // fp16
- int src_stride, // stride in elements
- float* dst,
- int width);
-// Convert FP32 Floats to FP16 Half Floats
-void ConvertFP32ToFP16Row_NEON(const float* src,
- uint16_t* dst, // fp16
- int width);
+
void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
uint8_t* dst_argb,
int width,
diff --git a/include/libyuv/scale_row.h b/include/libyuv/scale_row.h
index c015d77..a7957c3 100644
--- a/include/libyuv/scale_row.h
+++ b/include/libyuv/scale_row.h
@@ -29,10 +29,7 @@
#endif
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
#if defined(__has_feature)
-#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_NEON)
-#define LIBYUV_DISABLE_NEON
-#endif
-#if __has_feature(memory_sanitizer) && !defined(LIBYUV_DISABLE_X86)
+#if __has_feature(memory_sanitizer)
#define LIBYUV_DISABLE_X86
#endif
#endif
@@ -178,36 +175,6 @@
#define HAS_SCALEROWDOWN34_LSX
#endif
-#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
-#define HAS_SCALEADDROW_RVV
-#define HAS_SCALEUVROWDOWN4_RVV
-#define HAS_SCALEUVROWDOWNEVEN_RVV
-#if __riscv_v_intrinsic == 11000
-#define HAS_SCALEARGBROWDOWN2_RVV
-#define HAS_SCALEARGBROWDOWN2BOX_RVV
-#define HAS_SCALEARGBROWDOWN2LINEAR_RVV
-#define HAS_SCALEARGBROWDOWNEVENBOX_RVV
-#define HAS_SCALEROWDOWN2_RVV
-#define HAS_SCALEROWDOWN2BOX_RVV
-#define HAS_SCALEROWDOWN2LINEAR_RVV
-#define HAS_SCALEROWDOWN34_0_BOX_RVV
-#define HAS_SCALEROWDOWN34_1_BOX_RVV
-#define HAS_SCALEROWDOWN34_RVV
-#define HAS_SCALEROWDOWN38_2_BOX_RVV
-#define HAS_SCALEROWDOWN38_3_BOX_RVV
-#define HAS_SCALEROWDOWN38_RVV
-#define HAS_SCALEROWDOWN4_RVV
-#define HAS_SCALEROWDOWN4BOX_RVV
-#define HAS_SCALEROWUP2_BILINEAR_RVV
-#define HAS_SCALEROWUP2_LINEAR_RVV
-#define HAS_SCALEUVROWDOWN2_RVV
-#define HAS_SCALEUVROWDOWN2BOX_RVV
-#define HAS_SCALEUVROWDOWN2LINEAR_RVV
-#define HAS_SCALEUVROWUP2_BILINEAR_RVV
-#define HAS_SCALEUVROWUP2_LINEAR_RVV
-#endif
-#endif
-
// Scale ARGB vertically with bilinear interpolation.
void ScalePlaneVertical(int src_height,
int dst_width,
@@ -982,18 +949,6 @@
ptrdiff_t src_stride,
uint8_t* dst,
int dst_width);
-void ScaleARGBRowDown2_RVV(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- uint8_t* dst_argb,
- int dst_width);
-void ScaleARGBRowDown2Linear_RVV(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- uint8_t* dst_argb,
- int dst_width);
-void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- uint8_t* dst_argb,
- int dst_width);
void ScaleARGBRowDown2_MSA(const uint8_t* src_argb,
ptrdiff_t src_stride,
uint8_t* dst_argb,
@@ -1106,16 +1061,6 @@
int src_stepx,
uint8_t* dst_argb,
int dst_width);
-void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- int32_t src_stepx,
- uint8_t* dst_argb,
- int dst_width);
-void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8_t* dst_argb,
- int dst_width);
void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int src_stepx,
@@ -1198,18 +1143,6 @@
ptrdiff_t src_stride,
uint8_t* dst_uv,
int dst_width);
-void ScaleUVRowDown2_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_uv,
- int dst_width);
-void ScaleUVRowDown2Linear_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_uv,
- int dst_width);
-void ScaleUVRowDown2Box_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width);
void ScaleUVRowDown2_Any_SSSE3(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst_ptr,
@@ -1270,16 +1203,6 @@
int src_stepx,
uint8_t* dst_uv,
int dst_width);
-void ScaleUVRowDown4_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- int32_t src_stepx,
- uint8_t* dst_uv,
- int dst_width);
-void ScaleUVRowDownEven_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- int32_t src_stepx,
- uint8_t* dst_uv,
- int dst_width);
void ScaleUVRowDownEven_MSA(const uint8_t* src_ptr,
ptrdiff_t src_stride,
int32_t src_stepx,
@@ -1369,14 +1292,6 @@
uint8_t* dst_ptr,
ptrdiff_t dst_stride,
int dst_width);
-void ScaleUVRowUp2_Linear_RVV(const uint8_t* src_ptr,
- uint8_t* dst_ptr,
- int dst_width);
-void ScaleUVRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- ptrdiff_t dst_stride,
- int dst_width);
void ScaleUVRowUp2_Linear_16_SSE41(const uint16_t* src_ptr,
uint16_t* dst_ptr,
int dst_width);
@@ -1829,61 +1744,6 @@
uint8_t* dst_ptr,
int dst_width);
-void ScaleAddRow_RVV(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width);
-void ScaleRowDown2_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width);
-void ScaleRowDown2Linear_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width);
-void ScaleRowDown2Box_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width);
-
-void ScaleRowDown4_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- int dst_width);
-void ScaleRowDown4Box_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- int dst_width);
-void ScaleRowDown34_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- int dst_width);
-void ScaleRowDown34_0_Box_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- int dst_width);
-void ScaleRowDown34_1_Box_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- int dst_width);
-void ScaleRowDown38_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width);
-void ScaleRowDown38_3_Box_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- int dst_width);
-void ScaleRowDown38_2_Box_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- int dst_width);
-
-void ScaleRowUp2_Linear_RVV(const uint8_t* src_ptr,
- uint8_t* dst_ptr,
- int dst_width);
-void ScaleRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- ptrdiff_t dst_stride,
- int dst_width);
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/include/libyuv/version.h b/include/libyuv/version.h
index d45ef09..b6623db 100644
--- a/include/libyuv/version.h
+++ b/include/libyuv/version.h
@@ -11,6 +11,6 @@
#ifndef INCLUDE_LIBYUV_VERSION_H_
#define INCLUDE_LIBYUV_VERSION_H_
-#define LIBYUV_VERSION 1875
+#define LIBYUV_VERSION 1871
#endif // INCLUDE_LIBYUV_VERSION_H_
diff --git a/infra/config/OWNERS b/infra/config/OWNERS
deleted file mode 100644
index 2c4f90a..0000000
--- a/infra/config/OWNERS
+++ /dev/null
@@ -1,3 +0,0 @@
-fbarchard@chromium.org
-mbonadei@chromium.org
-jansson@google.com
diff --git a/infra/config/cr-buildbucket.cfg b/infra/config/cr-buildbucket.cfg
index 7415851..be9d1d2 100644
--- a/infra/config/cr-buildbucket.cfg
+++ b/infra/config/cr-buildbucket.cfg
@@ -29,6 +29,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -59,6 +64,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -89,6 +99,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -117,6 +132,10 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -145,6 +164,10 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -173,6 +196,10 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -203,6 +230,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -233,6 +265,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -263,6 +300,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -293,6 +335,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -323,6 +370,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -353,6 +405,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -383,6 +440,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -413,6 +475,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -443,6 +510,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -473,6 +545,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -503,6 +580,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -532,6 +614,10 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -561,6 +647,10 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -590,6 +680,10 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -620,6 +714,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -650,6 +749,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -680,6 +784,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -710,6 +819,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -740,6 +854,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -770,6 +889,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -800,6 +924,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -830,6 +959,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -859,6 +993,10 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -888,6 +1026,10 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-trusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -972,6 +1114,10 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1000,6 +1146,10 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1028,6 +1178,10 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1058,6 +1212,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1088,6 +1247,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1117,6 +1281,10 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1146,6 +1314,10 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1176,6 +1348,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1206,6 +1383,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1236,6 +1418,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1266,6 +1453,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1296,6 +1488,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1326,6 +1523,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1356,6 +1558,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1386,6 +1593,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1415,6 +1627,10 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1444,6 +1660,10 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1473,6 +1693,10 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1503,6 +1727,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": true,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1535,6 +1764,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": false,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1565,6 +1799,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": false,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1595,6 +1834,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": false,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1625,6 +1869,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": false,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1655,6 +1904,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": false,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
@@ -1685,6 +1939,11 @@
}
properties:
'{'
+ ' "$build/goma": {'
+ ' "enable_ats": false,'
+ ' "server_host": "goma.chromium.org",'
+ ' "use_luci_auth": true'
+ ' },'
' "$build/reclient": {'
' "instance": "rbe-webrtc-untrusted",'
' "metrics_project": "chromium-reclient-metrics"'
diff --git a/infra/config/main.star b/infra/config/main.star
index e83afe4..7490a59 100755
--- a/infra/config/main.star
+++ b/infra/config/main.star
@@ -8,6 +8,24 @@
LIBYUV_GIT = "https://chromium.googlesource.com/libyuv/libyuv"
LIBYUV_GERRIT = "https://chromium-review.googlesource.com/libyuv/libyuv"
+GOMA_BACKEND_RBE_PROD = {
+ "server_host": "goma.chromium.org",
+ "use_luci_auth": True,
+}
+
+GOMA_BACKEND_RBE_ATS_PROD = {
+ "server_host": "goma.chromium.org",
+ "use_luci_auth": True,
+ "enable_ats": True,
+}
+
+# Disable ATS on Windows CQ/try.
+GOMA_BACKEND_RBE_NO_ATS_PROD = {
+ "server_host": "goma.chromium.org",
+ "use_luci_auth": True,
+ "enable_ats": False,
+}
+
RECLIENT_CI = {
"instance": "rbe-webrtc-trusted",
"metrics_project": "chromium-reclient-metrics",
@@ -62,7 +80,7 @@
],
bindings = [
luci.binding(
- roles = "role/swarming.taskTriggerer", # for LED tasks.
+ roles = "role/swarming.taskTriggerer", # for LED tasks.
groups = "project-libyuv-admins",
),
luci.binding(
@@ -200,6 +218,19 @@
return {"os": "Ubuntu-18.04", "cores": "8", "cpu": "x86-64"}
return {}
+def get_os_properties(os, try_builder = False):
+ if os == "android":
+ return {"$build/goma": GOMA_BACKEND_RBE_PROD}
+ elif os in ("ios", "mac"):
+ return {"$build/goma": GOMA_BACKEND_RBE_PROD}
+ elif os == "win" and try_builder:
+ return {"$build/goma": GOMA_BACKEND_RBE_NO_ATS_PROD}
+ elif os == "win":
+ return {"$build/goma": GOMA_BACKEND_RBE_ATS_PROD}
+ elif os == "linux":
+ return {"$build/goma": GOMA_BACKEND_RBE_ATS_PROD}
+ return {}
+
def libyuv_ci_builder(name, dimensions, properties, triggered_by):
return luci.builder(
name = name,
@@ -237,7 +268,8 @@
def ci_builder(name, os, category, short_name = None):
dimensions = get_os_dimensions(os)
- properties = {"$build/reclient": RECLIENT_CI}
+ properties = get_os_properties(os)
+ properties["$build/reclient"] = RECLIENT_CI
dimensions["pool"] = "luci.flex.ci"
properties["builder_group"] = "client.libyuv"
@@ -248,7 +280,8 @@
def try_builder(name, os, experiment_percentage = None):
dimensions = get_os_dimensions(os)
- properties = {"$build/reclient": RECLIENT_CQ}
+ properties = get_os_properties(os, try_builder = True)
+ properties["$build/reclient"] = RECLIENT_CQ
dimensions["pool"] = "luci.flex.try"
properties["builder_group"] = "tryserver.libyuv"
diff --git a/infra/config/project.cfg b/infra/config/project.cfg
index 3c32711..af79cfb 100644
--- a/infra/config/project.cfg
+++ b/infra/config/project.cfg
@@ -7,7 +7,7 @@
name: "libyuv"
access: "group:all"
lucicfg {
- version: "1.39.14"
+ version: "1.39.8"
package_dir: "."
config_dir: "."
entry_point: "main.star"
diff --git a/libyuv.gni b/libyuv.gni
index 343160c..0a6c445 100644
--- a/libyuv.gni
+++ b/libyuv.gni
@@ -7,7 +7,6 @@
# be found in the AUTHORS file in the root of the source tree.
import("//build/config/arm.gni")
-import("//build/config/loongarch64.gni")
import("//build/config/mips.gni")
import("//build_overrides/build.gni")
@@ -22,8 +21,4 @@
(current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_msa
libyuv_use_mmi =
(current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_mmi
- libyuv_use_lsx =
- (current_cpu == "loong64") && loongarch64_use_lsx
- libyuv_use_lasx =
- (current_cpu == "loong64") && loongarch64_use_lasx
}
diff --git a/riscv_script/riscv-clang.cmake b/riscv_script/riscv-clang.cmake
index e287941..47dd506 100644
--- a/riscv_script/riscv-clang.cmake
+++ b/riscv_script/riscv-clang.cmake
@@ -28,20 +28,17 @@
set(CMAKE_OBJCOPY "${TOOLCHAIN_PATH}/bin/llvm-objcopy")
# compile options
-set(RISCV_COMPILER_FLAGS "" CACHE STRING "Compile flags")
-# if user provides RISCV_COMPILER_FLAGS, appeding compile flags is avoided.
-if(RISCV_COMPILER_FLAGS STREQUAL "")
- message(STATUS "USE_RVV: ${USE_RVV}")
- message(STATUS "USE_AUTO_VECTORIZER: ${USE_AUTO_VECTORIZER}")
- if(USE_RVV)
- list(APPEND RISCV_COMPILER_FLAGS "-march=rv64gcv")
- if(NOT USE_AUTO_VECTORIZER)
- # Disable auto-vectorizer
- add_compile_options(-fno-vectorize -fno-slp-vectorize)
- endif()
- else()
- list(APPEND RISCV_COMPILER_FLAGS "-march=rv64gc")
+message(STATUS "USE_RVV: ${USE_RVV}")
+message(STATUS "USE_AUTO_VECTORIZER: ${USE_AUTO_VECTORIZER}")
+set(RISCV_COMPILER_FLAGS)
+if(USE_RVV)
+ list(APPEND RISCV_COMPILER_FLAGS "-march=rv64gcv")
+ if(NOT USE_AUTO_VECTORIZER)
+ # Disable auto-vectorizer
+ add_compile_options(-fno-vectorize -fno-slp-vectorize)
endif()
+else()
+ list(APPEND RISCV_COMPILER_FLAGS "-march=rv64gc")
endif()
message(STATUS "RISCV_COMPILER_FLAGS: ${RISCV_COMPILER_FLAGS}")
diff --git a/source/convert.cc b/source/convert.cc
index b68fb1d..b11ab1b 100644
--- a/source/convert.cc
+++ b/source/convert.cc
@@ -2128,11 +2128,6 @@
: ARGBExtractAlphaRow_Any_LSX;
}
#endif
-#if defined(HAS_ARGBEXTRACTALPHAROW_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- ARGBExtractAlphaRow = ARGBExtractAlphaRow_RVV;
- }
-#endif
for (y = 0; y < height - 1; y += 2) {
ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width);
diff --git a/source/convert_argb.cc b/source/convert_argb.cc
index f6ab078..cc6560d 100644
--- a/source/convert_argb.cc
+++ b/source/convert_argb.cc
@@ -3853,11 +3853,6 @@
}
}
#endif
-#if defined(HAS_NV12TOARGBROW_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- NV12ToARGBRow = NV12ToARGBRow_RVV;
- }
-#endif
for (y = 0; y < height; ++y) {
NV12ToARGBRow(src_y, src_uv, dst_argb, yuvconstants, width);
@@ -3943,11 +3938,6 @@
}
}
#endif
-#if defined(HAS_NV21TOARGBROW_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- NV21ToARGBRow = NV21ToARGBRow_RVV;
- }
-#endif
for (y = 0; y < height; ++y) {
NV21ToARGBRow(src_y, src_vu, dst_argb, yuvconstants, width);
@@ -4068,11 +4058,6 @@
}
}
#endif
-#if defined(HAS_NV12TORGB24ROW_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- NV12ToRGB24Row = NV12ToRGB24Row_RVV;
- }
-#endif
for (y = 0; y < height; ++y) {
NV12ToRGB24Row(src_y, src_uv, dst_rgb24, yuvconstants, width);
@@ -4134,11 +4119,6 @@
}
}
#endif
-#if defined(HAS_NV21TORGB24ROW_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- NV21ToRGB24Row = NV21ToRGB24Row_RVV;
- }
-#endif
for (y = 0; y < height; ++y) {
NV21ToRGB24Row(src_y, src_vu, dst_rgb24, yuvconstants, width);
@@ -6040,12 +6020,6 @@
ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
}
#endif
-#if defined(HAS_SCALEROWUP2_BILINEAR_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_RVV;
- ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV;
- }
-#endif
// alloc 4 lines temp
const int row_size = (width + 31) & ~31;
@@ -6177,11 +6151,6 @@
ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV;
- }
-#endif
// alloc 2 lines temp
const int row_size = (width + 31) & ~31;
@@ -6307,12 +6276,6 @@
ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
}
#endif
-#if defined(HAS_SCALEROWUP2_BILINEAR_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_RVV;
- ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV;
- }
-#endif
// alloc 4 lines temp
const int row_size = (width + 31) & ~31;
@@ -6874,12 +6837,6 @@
ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
}
#endif
-#if defined(HAS_SCALEROWUP2_BILINEAR_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- Scale2RowUp_Bilinear = ScaleRowUp2_Bilinear_RVV;
- ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV;
- }
-#endif
// alloc 4 lines temp
const int row_size = (width + 31) & ~31;
@@ -7075,11 +7032,6 @@
ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV;
- }
-#endif
// alloc 2 lines temp
const int row_size = (width + 31) & ~31;
@@ -7818,11 +7770,6 @@
ScaleRowUp2_Linear = ScaleRowUp2_Linear_Any_NEON;
}
#endif
-#if defined(HAS_SCALEROWUP2_LINEAR_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- ScaleRowUp2_Linear = ScaleRowUp2_Linear_RVV;
- }
-#endif
// alloc 2 lines temp
const int row_size = (width + 31) & ~31;
diff --git a/source/planar_functions.cc b/source/planar_functions.cc
index f6ec0da..d115a2a 100644
--- a/source/planar_functions.cc
+++ b/source/planar_functions.cc
@@ -2783,38 +2783,17 @@
return 0;
}
-// Alpha Blend 2 ARGB images and store to destination.
+// Get a blender that optimized for the CPU and pixel count.
+// As there are 6 blenders to choose from, the caller should try to use
+// the same blend function for all pixels if possible.
LIBYUV_API
-int ARGBBlend(const uint8_t* src_argb0,
- int src_stride_argb0,
- const uint8_t* src_argb1,
- int src_stride_argb1,
- uint8_t* dst_argb,
- int dst_stride_argb,
- int width,
- int height) {
- int y;
+ARGBBlendRow GetARGBBlend() {
void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
uint8_t* dst_argb, int width) = ARGBBlendRow_C;
- if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
- return -1;
- }
- // Negative height means invert the image.
- if (height < 0) {
- height = -height;
- dst_argb = dst_argb + (height - 1) * dst_stride_argb;
- dst_stride_argb = -dst_stride_argb;
- }
- // Coalesce rows.
- if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
- dst_stride_argb == width * 4) {
- width *= height;
- height = 1;
- src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
- }
#if defined(HAS_ARGBBLENDROW_SSSE3)
if (TestCpuFlag(kCpuHasSSSE3)) {
ARGBBlendRow = ARGBBlendRow_SSSE3;
+ return ARGBBlendRow;
}
#endif
#if defined(HAS_ARGBBLENDROW_NEON)
@@ -2832,11 +2811,39 @@
ARGBBlendRow = ARGBBlendRow_LSX;
}
#endif
-#if defined(HAS_ARGBBLENDROW_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- ARGBBlendRow = ARGBBlendRow_RVV;
+ return ARGBBlendRow;
+}
+
+// Alpha Blend 2 ARGB images and store to destination.
+LIBYUV_API
+int ARGBBlend(const uint8_t* src_argb0,
+ int src_stride_argb0,
+ const uint8_t* src_argb1,
+ int src_stride_argb1,
+ uint8_t* dst_argb,
+ int dst_stride_argb,
+ int width,
+ int height) {
+ int y;
+ void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1,
+ uint8_t* dst_argb, int width) = GetARGBBlend();
+ if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) {
+ return -1;
}
-#endif
+ // Negative height means invert the image.
+ if (height < 0) {
+ height = -height;
+ dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+ dst_stride_argb = -dst_stride_argb;
+ }
+ // Coalesce rows.
+ if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 &&
+ dst_stride_argb == width * 4) {
+ width *= height;
+ height = 1;
+ src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0;
+ }
+
for (y = 0; y < height; ++y) {
ARGBBlendRow(src_argb0, src_argb1, dst_argb, width);
src_argb0 += src_stride_argb0;
@@ -2896,11 +2903,6 @@
}
}
#endif
-#if defined(HAS_BLENDPLANEROW_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- BlendPlaneRow = BlendPlaneRow_RVV;
- }
-#endif
for (y = 0; y < height; ++y) {
BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width);
@@ -2978,11 +2980,6 @@
}
}
#endif
-#if defined(HAS_BLENDPLANEROW_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- BlendPlaneRow = BlendPlaneRow_RVV;
- }
-#endif
if (!IS_ALIGNED(width, 2)) {
ScaleRowDown2 = ScaleRowDown2Box_Odd_C;
}
@@ -3019,11 +3016,6 @@
}
}
#endif
-#if defined(HAS_SCALEROWDOWN2_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- ScaleRowDown2 = ScaleRowDown2Box_RVV;
- }
-#endif
// Row buffer for intermediate alpha pixels.
align_buffer_64(halfalpha, halfwidth);
@@ -5348,11 +5340,6 @@
: ARGBExtractAlphaRow_Any_LSX;
}
#endif
-#if defined(HAS_ARGBEXTRACTALPHAROW_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- ARGBExtractAlphaRow = ARGBExtractAlphaRow_RVV;
- }
-#endif
for (int y = 0; y < height; ++y) {
ARGBExtractAlphaRow(src_argb, dst_a, width);
@@ -5404,11 +5391,6 @@
}
}
#endif
-#if defined(HAS_ARGBCOPYYTOALPHAROW_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_RVV;
- }
-#endif
for (y = 0; y < height; ++y) {
ARGBCopyYToAlphaRow(src_y, dst_argb, width);
diff --git a/source/rotate.cc b/source/rotate.cc
index 3678b80..8d3978c 100644
--- a/source/rotate.cc
+++ b/source/rotate.cc
@@ -489,12 +489,13 @@
return -1;
}
-static void TransposePlane_16(const uint16_t* src,
- int src_stride,
- uint16_t* dst,
- int dst_stride,
- int width,
- int height) {
+LIBYUV_API
+void TransposePlane_16(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst,
+ int dst_stride,
+ int width,
+ int height) {
int i = height;
// Work across the source in 8x8 tiles
while (i >= 8) {
diff --git a/source/rotate_argb.cc b/source/rotate_argb.cc
index 034d53e..c723901 100644
--- a/source/rotate_argb.cc
+++ b/source/rotate_argb.cc
@@ -69,11 +69,6 @@
}
}
#endif
-#if defined(HAS_SCALEARGBROWDOWNEVEN_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- ScaleARGBRowDownEven = ScaleARGBRowDownEven_RVV;
- }
-#endif
for (i = 0; i < width; ++i) { // column of source to row of dest.
ScaleARGBRowDownEven(src_argb, 0, src_pixel_step, dst_argb, height);
diff --git a/source/rotate_common.cc b/source/rotate_common.cc
index e72608e..4b496d1 100644
--- a/source/rotate_common.cc
+++ b/source/rotate_common.cc
@@ -120,6 +120,37 @@
}
}
+void TransposeUVWx8_16_C(const uint16_t* src,
+ int src_stride,
+ uint16_t* dst_a,
+ int dst_stride_a,
+ uint16_t* dst_b,
+ int dst_stride_b,
+ int width) {
+ int i;
+ for (i = 0; i < width; ++i) {
+ dst_a[0] = src[0 * src_stride + 0];
+ dst_b[0] = src[0 * src_stride + 1];
+ dst_a[1] = src[1 * src_stride + 0];
+ dst_b[1] = src[1 * src_stride + 1];
+ dst_a[2] = src[2 * src_stride + 0];
+ dst_b[2] = src[2 * src_stride + 1];
+ dst_a[3] = src[3 * src_stride + 0];
+ dst_b[3] = src[3 * src_stride + 1];
+ dst_a[4] = src[4 * src_stride + 0];
+ dst_b[4] = src[4 * src_stride + 1];
+ dst_a[5] = src[5 * src_stride + 0];
+ dst_b[5] = src[5 * src_stride + 1];
+ dst_a[6] = src[6 * src_stride + 0];
+ dst_b[6] = src[6 * src_stride + 1];
+ dst_a[7] = src[7 * src_stride + 0];
+ dst_b[7] = src[7 * src_stride + 1];
+ src += 2;
+ dst_a += dst_stride_a;
+ dst_b += dst_stride_b;
+ }
+}
+
void TransposeWxH_16_C(const uint16_t* src,
int src_stride,
uint16_t* dst,
diff --git a/source/row_common.cc b/source/row_common.cc
index 7591c6b..8be37fb 100644
--- a/source/row_common.cc
+++ b/source/row_common.cc
@@ -48,6 +48,7 @@
defined(__i386__) || defined(_M_IX86))
#define LIBYUV_ARGBTOUV_PAVGB 1
#define LIBYUV_RGBTOU_TRUNCATE 1
+#define LIBYUV_ATTENUATE_DUP 1
#endif
#if defined(LIBYUV_BIT_EXACT)
#define LIBYUV_UNATTENUATE_DUP 1
@@ -1875,10 +1876,9 @@
int yg = yuvconstants->kYToRgb[0];
#endif
uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16;
- uint8_t b8 = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
- *b = b8;
- *g = b8;
- *r = b8;
+ *b = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
+ *g = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
+ *r = STATIC_CAST(uint8_t, Clamp(((int32_t)(y1) + ygb) >> 6));
}
void I444ToARGBRow_C(const uint8_t* src_y,
@@ -3369,7 +3369,12 @@
}
#undef UBLEND
-#define ATTENUATE(f, a) (f * a + 255) >> 8
+#if LIBYUV_ATTENUATE_DUP
+// This code mimics the SSSE3 version for better testability.
+#define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
+#else
+#define ATTENUATE(f, a) (f * a + 128) >> 8
+#endif
// Multiply source RGB by alpha and store to destination.
void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) {
diff --git a/source/row_gcc.cc b/source/row_gcc.cc
index d807498..e94fd04 100644
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@@ -7441,106 +7441,93 @@
#ifdef HAS_ARGBATTENUATEROW_SSSE3
// Shuffle table duplicating alpha.
-static const vec8 kAttenuateShuffle = {6, -128, 6, -128, 6, -128,
- -128, -128, 14, -128, 14, -128,
- 14, -128, -128, -128};
-
+static const uvec8 kShuffleAlpha0 = {3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u,
+ 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u};
+static const uvec8 kShuffleAlpha1 = {11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
+ 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u};
// Attenuate 4 pixels at a time.
void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb,
uint8_t* dst_argb,
int width) {
asm volatile(
+ "pcmpeqb %%xmm3,%%xmm3 \n"
+ "pslld $0x18,%%xmm3 \n"
"movdqa %3,%%xmm4 \n"
- "pcmpeqb %%xmm5,%%xmm5 \n"
- "pslld $0x18,%%xmm5 \n"
- "pxor %%xmm6,%%xmm6 \n"
- "pcmpeqb %%xmm7,%%xmm7 \n"
- "punpcklbw %%xmm6,%%xmm7 \n"
- "sub %0,%1 \n"
+ "movdqa %4,%%xmm5 \n"
// 4 pixel loop.
LABELALIGN
"1: \n"
- "movdqu (%0),%%xmm6 \n"
- "movdqa %%xmm6,%%xmm0 \n"
- "movdqa %%xmm6,%%xmm1 \n"
- "punpcklbw %%xmm5,%%xmm0 \n"
- "punpckhbw %%xmm5,%%xmm1 \n"
- "movdqa %%xmm0,%%xmm2 \n"
- "movdqa %%xmm1,%%xmm3 \n"
- "pshufb %%xmm4,%%xmm2 \n" // a,a,a,0
- "pshufb %%xmm4,%%xmm3 \n"
- "pmullw %%xmm2,%%xmm0 \n" // rgb * alpha
- "pmullw %%xmm3,%%xmm1 \n"
- "paddw %%xmm7,%%xmm0 \n" // + 255
- "paddw %%xmm7,%%xmm1 \n"
+ "movdqu (%0),%%xmm0 \n"
+ "pshufb %%xmm4,%%xmm0 \n"
+ "movdqu (%0),%%xmm1 \n"
+ "punpcklbw %%xmm1,%%xmm1 \n"
+ "pmulhuw %%xmm1,%%xmm0 \n"
+ "movdqu (%0),%%xmm1 \n"
+ "pshufb %%xmm5,%%xmm1 \n"
+ "movdqu (%0),%%xmm2 \n"
+ "punpckhbw %%xmm2,%%xmm2 \n"
+ "pmulhuw %%xmm2,%%xmm1 \n"
+ "movdqu (%0),%%xmm2 \n"
+ "lea 0x10(%0),%0 \n"
+ "pand %%xmm3,%%xmm2 \n"
"psrlw $0x8,%%xmm0 \n"
"psrlw $0x8,%%xmm1 \n"
"packuswb %%xmm1,%%xmm0 \n"
- "pand %%xmm5,%%xmm6 \n"
- "por %%xmm6,%%xmm0 \n"
- "movdqu %%xmm0,(%0,%1) \n"
- "lea 0x10(%0),%0 \n"
+ "por %%xmm2,%%xmm0 \n"
+ "movdqu %%xmm0,(%1) \n"
+ "lea 0x10(%1),%1 \n"
"sub $0x4,%2 \n"
"jg 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "m"(kAttenuateShuffle) // %3
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
- "xmm7");
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleAlpha0), // %3
+ "m"(kShuffleAlpha1) // %4
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
}
#endif // HAS_ARGBATTENUATEROW_SSSE3
#ifdef HAS_ARGBATTENUATEROW_AVX2
-
// Shuffle table duplicating alpha.
-static const lvec8 kAttenuateShuffle_AVX2 = {
- 6, -128, 6, -128, 6, -128, -128, -128, 14, -128, 14,
- -128, 14, -128, -128, -128, 22, -128, 22, -128, 22, -128,
- -128, -128, 30, -128, 30, -128, 30, -128, -128, -128};
-
+static const uvec8 kShuffleAlpha_AVX2 = {6u, 7u, 6u, 7u, 6u, 7u,
+ 128u, 128u, 14u, 15u, 14u, 15u,
+ 14u, 15u, 128u, 128u};
// Attenuate 8 pixels at a time.
void ARGBAttenuateRow_AVX2(const uint8_t* src_argb,
uint8_t* dst_argb,
int width) {
asm volatile(
- "vmovdqa %3,%%ymm4 \n"
+ "vbroadcastf128 %3,%%ymm4 \n"
"vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
"vpslld $0x18,%%ymm5,%%ymm5 \n"
- "vpxor %%ymm6,%%ymm6,%%ymm6 \n"
- "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n"
- "vpunpcklbw %%ymm6,%%ymm7,%%ymm7 \n"
"sub %0,%1 \n"
// 8 pixel loop.
LABELALIGN
"1: \n"
"vmovdqu (%0),%%ymm6 \n"
- "vpunpcklbw %%ymm5,%%ymm6,%%ymm0 \n"
- "vpunpckhbw %%ymm5,%%ymm6,%%ymm1 \n"
+ "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n"
+ "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n"
"vpshufb %%ymm4,%%ymm0,%%ymm2 \n"
"vpshufb %%ymm4,%%ymm1,%%ymm3 \n"
- "vpmullw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpmullw %%ymm3,%%ymm1,%%ymm1 \n"
- "vpaddw %%ymm7,%%ymm0,%%ymm0 \n"
- "vpaddw %%ymm7,%%ymm1,%%ymm1 \n"
+ "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
+ "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
+ "vpand %%ymm5,%%ymm6,%%ymm6 \n"
"vpsrlw $0x8,%%ymm0,%%ymm0 \n"
"vpsrlw $0x8,%%ymm1,%%ymm1 \n"
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
- "vpand %%ymm5,%%ymm6,%%ymm1 \n"
- "vpor %%ymm1,%%ymm0,%%ymm0 \n"
+ "vpor %%ymm6,%%ymm0,%%ymm0 \n"
"vmovdqu %%ymm0,0x00(%0,%1,1) \n"
"lea 0x20(%0),%0 \n"
"sub $0x8,%2 \n"
"jg 1b \n"
"vzeroupper \n"
- : "+r"(src_argb), // %0
- "+r"(dst_argb), // %1
- "+r"(width) // %2
- : "m"(kAttenuateShuffle_AVX2) // %3
- : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
- "xmm7");
+ : "+r"(src_argb), // %0
+ "+r"(dst_argb), // %1
+ "+r"(width) // %2
+ : "m"(kShuffleAlpha_AVX2) // %3
+ : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
}
#endif // HAS_ARGBATTENUATEROW_AVX2
diff --git a/source/row_neon.cc b/source/row_neon.cc
index 31142a9..4ed1363 100644
--- a/source/row_neon.cc
+++ b/source/row_neon.cc
@@ -1827,27 +1827,19 @@
);
}
-struct RgbUVConstants {
- uint8_t kRGBToU[4];
- uint8_t kRGBToV[4];
-};
-
// 8x1 pixels.
-void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width,
- const struct RgbUVConstants* rgbuvconstants) {
+void ARGBToUV444Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
asm volatile(
-
- "vld1.8 {d0}, [%4] \n" // load rgbuvconstants
- "vdup.u8 d24, d0[0] \n" // UB 0.875 coefficient
- "vdup.u8 d25, d0[1] \n" // UG -0.5781 coefficient
- "vdup.u8 d26, d0[2] \n" // UR -0.2969 coefficient
- "vdup.u8 d27, d0[4] \n" // VB -0.1406 coefficient
- "vdup.u8 d28, d0[5] \n" // VG -0.7344 coefficient
+ "vmov.u8 d24, #112 \n" // UB / VR 0.875
+ // coefficient
+ "vmov.u8 d25, #74 \n" // UG -0.5781 coefficient
+ "vmov.u8 d26, #38 \n" // UR -0.2969 coefficient
+ "vmov.u8 d27, #18 \n" // VB -0.1406 coefficient
+ "vmov.u8 d28, #94 \n" // VG -0.7344 coefficient
"vmov.u16 q15, #0x8080 \n" // 128.5
-
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels.
"subs %3, %3, #8 \n" // 8 processed per loop.
@@ -1865,53 +1857,15 @@
"vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
"vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
"bgt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3
- : "r"(rgbuvconstants) // %4
+ : "+r"(src_argb), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
: "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14",
"q15");
}
-// RGB to bt601 coefficients
-// UB 0.875 coefficient = 112
-// UG -0.5781 coefficient = 74
-// UR -0.2969 coefficient = 38
-// VB -0.1406 coefficient = 18
-// VG -0.7344 coefficient = 94
-// VR 0.875 coefficient = 112 (ignored)
-
-static const struct RgbUVConstants kRgb24I601UVConstants = {{112, 74, 38, 0},
- {18, 94, 112, 0}};
-
-// RGB to JPeg coefficients
-// UB coeff 0.500 = 127
-// UG coeff -0.33126 = 84
-// UR coeff -0.16874 = 43
-// VB coeff -0.08131 = 20
-// VG coeff -0.41869 = 107
-// VR coeff 0.500 = 127 (ignored)
-
-static const struct RgbUVConstants kRgb24JPegUVConstants = {{127, 84, 43, 0},
- {20, 107, 127, 0}};
-
-void ARGBToUV444Row_NEON(const uint8_t* src_argb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
- &kRgb24I601UVConstants);
-}
-
-void ARGBToUVJ444Row_NEON(const uint8_t* src_argb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
- &kRgb24JPegUVConstants);
-}
-
// clang-format off
// 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
#define RGBTOUV(QB, QG, QR) \
@@ -2748,6 +2702,7 @@
struct RgbConstants {
uint8_t kRGBToY[4];
uint16_t kAddY;
+ uint16_t pad;
};
// RGB to JPeg coefficients
@@ -2755,9 +2710,11 @@
// G * 0.5870 coefficient = 150
// R * 0.2990 coefficient = 77
// Add 0.5 = 0x80
-static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, 128};
+static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
+ 128,
+ 0};
-static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128};
+static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
// RGB to BT.601 coefficients
// B * 0.1016 coefficient = 25
@@ -2766,9 +2723,12 @@
// Add 16.5 = 0x1080
static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
- 0x1080};
+ 0x1080,
+ 0};
-static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, 0x1080};
+static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
+ 0x1080,
+ 0};
// ARGB expects first 3 values to contain RGB and 4th value is ignored.
void ARGBToYMatrixRow_NEON(const uint8_t* src_argb,
@@ -3098,8 +3058,6 @@
uint8_t* dst_argb,
int width) {
asm volatile(
- "vmov.u16 q15, #0x00ff \n" // 255 for rounding up
-
// Attenuate 8 pixels.
"1: \n"
"vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB.
@@ -3107,16 +3065,16 @@
"vmull.u8 q10, d0, d3 \n" // b * a
"vmull.u8 q11, d1, d3 \n" // g * a
"vmull.u8 q12, d2, d3 \n" // r * a
- "vaddhn.u16 d0, q10, q15 \n" // (b + 255) >> 8
- "vaddhn.u16 d1, q11, q15 \n" // (g + 255) >> 8
- "vaddhn.u16 d2, q12, q15 \n" // (r + 255) >> 8
+ "vqrshrn.u16 d0, q10, #8 \n" // b >>= 8
+ "vqrshrn.u16 d1, q11, #8 \n" // g >>= 8
+ "vqrshrn.u16 d2, q12, #8 \n" // r >>= 8
"vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB.
"bgt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
:
- : "cc", "memory", "q0", "q1", "q10", "q11", "q12", "q15");
+ : "cc", "memory", "q0", "q1", "q10", "q11", "q12");
}
// Quantize 8 ARGB pixels (32 bytes).
diff --git a/source/row_neon64.cc b/source/row_neon64.cc
index 1679f87..74190d6 100644
--- a/source/row_neon64.cc
+++ b/source/row_neon64.cc
@@ -2198,26 +2198,19 @@
);
}
-struct RgbUVConstants {
- uint8_t kRGBToU[4];
- uint8_t kRGBToV[4];
-};
-
// 8x1 pixels.
-void ARGBToUV444MatrixRow_NEON(const uint8_t* src_argb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width,
- const struct RgbUVConstants* rgbuvconstants) {
+void ARGBToUV444Row_NEON(const uint8_t* src_argb,
+ uint8_t* dst_u,
+ uint8_t* dst_v,
+ int width) {
asm volatile(
- "ldr d0, [%4] \n" // load rgbuvconstants
- "dup v24.16b, v0.b[0] \n" // UB 0.875 coefficient
- "dup v25.16b, v0.b[1] \n" // UG -0.5781 coefficient
- "dup v26.16b, v0.b[2] \n" // UR -0.2969 coefficient
- "dup v27.16b, v0.b[4] \n" // VB -0.1406 coefficient
- "dup v28.16b, v0.b[5] \n" // VG -0.7344 coefficient
- "movi v29.16b, #0x80 \n" // 128.5
-
+ "movi v24.8b, #112 \n" // UB / VR 0.875
+ // coefficient
+ "movi v25.8b, #74 \n" // UG -0.5781 coefficient
+ "movi v26.8b, #38 \n" // UR -0.2969 coefficient
+ "movi v27.8b, #18 \n" // VB -0.1406 coefficient
+ "movi v28.8b, #94 \n" // VG -0.7344 coefficient
+ "movi v29.16b,#0x80 \n" // 128.5
"1: \n"
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB
"subs %w3, %w3, #8 \n" // 8 processed per loop.
@@ -2236,53 +2229,15 @@
"st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U.
"st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V.
"b.gt 1b \n"
- : "+r"(src_argb), // %0
- "+r"(dst_u), // %1
- "+r"(dst_v), // %2
- "+r"(width) // %3
- : "r"(rgbuvconstants) // %4
+ : "+r"(src_argb), // %0
+ "+r"(dst_u), // %1
+ "+r"(dst_v), // %2
+ "+r"(width) // %3
+ :
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v24", "v25", "v26",
"v27", "v28", "v29");
}
-// RGB to bt601 coefficients
-// UB 0.875 coefficient = 112
-// UG -0.5781 coefficient = 74
-// UR -0.2969 coefficient = 38
-// VB -0.1406 coefficient = 18
-// VG -0.7344 coefficient = 94
-// VR 0.875 coefficient = 112 (ignored)
-
-static const struct RgbUVConstants kRgb24I601UVConstants = {{112, 74, 38, 0},
- {18, 94, 112, 0}};
-
-// RGB to JPeg coefficients
-// UB coeff 0.500 = 127
-// UG coeff -0.33126 = 84
-// UR coeff -0.16874 = 43
-// VB coeff -0.08131 = 20
-// VG coeff -0.41869 = 107
-// VR coeff 0.500 = 127 (ignored)
-
-static const struct RgbUVConstants kRgb24JPegUVConstants = {{127, 84, 43, 0},
- {20, 107, 127, 0}};
-
-void ARGBToUV444Row_NEON(const uint8_t* src_argb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
- &kRgb24I601UVConstants);
-}
-
-void ARGBToUVJ444Row_NEON(const uint8_t* src_argb,
- uint8_t* dst_u,
- uint8_t* dst_v,
- int width) {
- ARGBToUV444MatrixRow_NEON(src_argb, dst_u, dst_v, width,
- &kRgb24JPegUVConstants);
-}
-
#define RGBTOUV_SETUP_REG \
"movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \
"movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \
@@ -2988,8 +2943,34 @@
struct RgbConstants {
uint8_t kRGBToY[4];
uint16_t kAddY;
+ uint16_t pad;
};
+// RGB to JPeg coefficients
+// B * 0.1140 coefficient = 29
+// G * 0.5870 coefficient = 150
+// R * 0.2990 coefficient = 77
+// Add 0.5 = 0x80
+static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0},
+ 128,
+ 0};
+
+static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128, 0};
+
+// RGB to BT.601 coefficients
+// B * 0.1016 coefficient = 25
+// G * 0.5078 coefficient = 129
+// R * 0.2578 coefficient = 66
+// Add 16.5 = 0x1080
+
+static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
+ 0x1080,
+ 0};
+
+static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0},
+ 0x1080,
+ 0};
+
// ARGB expects first 3 values to contain RGB and 4th value is ignored.
void ARGBToYMatrixRow_NEON(const uint8_t* src_argb,
uint8_t* dst_y,
@@ -3024,26 +3005,6 @@
"v17");
}
-// RGB to JPeg coefficients
-// B * 0.1140 coefficient = 29
-// G * 0.5870 coefficient = 150
-// R * 0.2990 coefficient = 77
-// Add 0.5 = 0x80
-static const struct RgbConstants kRgb24JPEGConstants = {{29, 150, 77, 0}, 128};
-
-static const struct RgbConstants kRawJPEGConstants = {{77, 150, 29, 0}, 128};
-
-// RGB to BT.601 coefficients
-// B * 0.1016 coefficient = 25
-// G * 0.5078 coefficient = 129
-// R * 0.2578 coefficient = 66
-// Add 16.5 = 0x1080
-
-static const struct RgbConstants kRgb24I601Constants = {{25, 129, 66, 0},
- 0x1080};
-
-static const struct RgbConstants kRawI601Constants = {{66, 129, 25, 0}, 0x1080};
-
void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) {
ARGBToYMatrixRow_NEON(src_argb, dst_y, width, &kRgb24I601Constants);
}
@@ -3441,8 +3402,6 @@
uint8_t* dst_argb,
int width) {
asm volatile(
- "movi v7.8h, #0x00ff \n" // 255 for rounding up
-
// Attenuate 8 pixels.
"1: \n"
"ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB
@@ -3451,16 +3410,16 @@
"prfm pldl1keep, [%0, 448] \n"
"umull v5.8h, v1.8b, v3.8b \n" // g * a
"umull v6.8h, v2.8b, v3.8b \n" // r * a
- "addhn v0.8b, v4.8h, v7.8h \n" // (b + 255) >> 8
- "addhn v1.8b, v5.8h, v7.8h \n" // (g + 255) >> 8
- "addhn v2.8b, v6.8h, v7.8h \n" // (r + 255) >> 8
+ "uqrshrn v0.8b, v4.8h, #8 \n" // b >>= 8
+ "uqrshrn v1.8b, v5.8h, #8 \n" // g >>= 8
+ "uqrshrn v2.8b, v6.8h, #8 \n" // r >>= 8
"st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB
"b.gt 1b \n"
: "+r"(src_argb), // %0
"+r"(dst_argb), // %1
"+r"(width) // %2
:
- : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7");
+ : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6");
}
// Quantize 8 ARGB pixels (32 bytes).
@@ -4001,86 +3960,6 @@
: "cc", "memory", "v1", "v2", "v3");
}
-// Convert FP16 Half Floats to FP32 Floats
-void ConvertFP16ToFP32Row_NEON(const uint16_t* src, // fp16
- float* dst,
- int width) {
- asm volatile(
- "1: \n"
- "ld1 {v1.8h}, [%0], #16 \n" // load 8 halffloats
- "subs %w2, %w2, #8 \n" // 8 floats per loop
- "prfm pldl1keep, [%0, 448] \n"
- "fcvtl v2.4s, v1.4h \n" // 8 floats
- "fcvtl2 v3.4s, v1.8h \n"
- "stp q2, q3, [%1], #32 \n" // store 8 floats
- "b.gt 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "cc", "memory", "v1", "v2", "v3");
-}
-
-// Convert FP16 Half Floats to FP32 Floats
-// Read a column and write a row
-void ConvertFP16ToFP32Column_NEON(const uint16_t* src, // fp16
- int src_stride, // stride in elements
- float* dst,
- int width) {
- asm volatile(
- "cmp %w2, #8 \n" // Is there 8 rows?
- "b.lo 2f \n"
- "1: \n"
- "ld1 {v0.h}[0], [%0], %3 \n" // load 8 halffloats
- "ld1 {v0.h}[1], [%0], %3 \n"
- "ld1 {v0.h}[2], [%0], %3 \n"
- "ld1 {v0.h}[3], [%0], %3 \n"
- "ld1 {v1.h}[0], [%0], %3 \n"
- "ld1 {v1.h}[1], [%0], %3 \n"
- "ld1 {v1.h}[2], [%0], %3 \n"
- "ld1 {v1.h}[3], [%0], %3 \n"
- "subs %w2, %w2, #8 \n" // 8 rows per loop
- "prfm pldl1keep, [%0, 448] \n"
- "fcvtl v2.4s, v0.4h \n" // 4 floats
- "fcvtl v3.4s, v1.4h \n" // 4 more floats
- "stp q2, q3, [%1], #32 \n" // store 8 floats
- "b.gt 1b \n"
- "cmp %w2, #1 \n" // Is there 1 value?
- "b.lo 3f \n"
- "2: \n"
- "ld1 {v1.h}[0], [%0], %3 \n" // load 1 halffloats
- "subs %w2, %w2, #1 \n" // 1 floats per loop
- "fcvtl v2.4s, v1.4h \n" // 1 floats
- "str s2, [%1], #4 \n" // store 1 floats
- "b.gt 2b \n"
- "3: \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- : "r"((ptrdiff_t)(src_stride * 2)) // %3
- : "cc", "memory", "v0", "v1", "v2", "v3");
-}
-
-// Convert FP32 Floats to FP16 Half Floats
-void ConvertFP32ToFP16Row_NEON(const float* src,
- uint16_t* dst, // fp16
- int width) {
- asm volatile(
- "1: \n"
- "ldp q2, q3, [%0], #32 \n" // load 8 floats
- "subs %w2, %w2, #8 \n" // 8 floats per loop
- "prfm pldl1keep, [%0, 448] \n"
- "fcvtn v1.4h, v2.4s \n" // 8 fp16 halffloats
- "fcvtn2 v1.8h, v3.4s \n"
- "str q1, [%1], #16 \n" // store 8 fp16 halffloats
- "b.gt 1b \n"
- : "+r"(src), // %0
- "+r"(dst), // %1
- "+r"(width) // %2
- :
- : "cc", "memory", "v1", "v2", "v3");
-}
-
float ScaleMaxSamples_NEON(const float* src,
float* dst,
float scale,
diff --git a/source/row_rvv.cc b/source/row_rvv.cc
index c875be2..27e91a3 100644
--- a/source/row_rvv.cc
+++ b/source/row_rvv.cc
@@ -17,9 +17,7 @@
#include "libyuv/row.h"
-// This module is for clang rvv. GCC hasn't supported segment load & store.
-#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) && \
- defined(__clang__)
+#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
#include <assert.h>
#include <riscv_vector.h>
@@ -31,48 +29,48 @@
// Fill YUV -> RGB conversion constants into vectors
// NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
// register) is set to round-to-nearest-up mode(0).
-#define YUVTORGB_SETUP(yuvconst, ub, vr, ug, vg, yg, bb, bg, br) \
- { \
- asm volatile("csrwi vxrm, 0"); \
- ub = yuvconst->kUVCoeff[0]; \
- vr = yuvconst->kUVCoeff[1]; \
- ug = yuvconst->kUVCoeff[2]; \
- vg = yuvconst->kUVCoeff[3]; \
- yg = yuvconst->kRGBCoeffBias[0]; \
- bb = yuvconst->kRGBCoeffBias[1] + 32; \
- bg = yuvconst->kRGBCoeffBias[2] - 32; \
- br = yuvconst->kRGBCoeffBias[3] + 32; \
+#define YUVTORGB_SETUP(vl, yuvconst, ub, vr, ug, vg, yg, bb, bg, br) \
+ { \
+ asm volatile("csrwi vxrm, 0"); \
+ ub = yuvconst->kUVCoeff[0]; \
+ vr = yuvconst->kUVCoeff[1]; \
+ ug = yuvconst->kUVCoeff[2]; \
+ vg = yuvconst->kUVCoeff[3]; \
+ yg = yuvconst->kRGBCoeffBias[0]; \
+ bb = yuvconst->kRGBCoeffBias[1] + 32; \
+ bg = yuvconst->kRGBCoeffBias[2] - 32; \
+ br = yuvconst->kRGBCoeffBias[3] + 32; \
}
-// Read [2*VLEN/8] Y, [VLEN/8] U and [VLEN/8] V from 422
-#define READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16) \
- { \
- vuint8m1_t v_tmp0, v_tmp1; \
- vuint8m2_t v_y; \
- vuint16m2_t v_u_16, v_v_16; \
- vl = __riscv_vsetvl_e8m1((w + 1) / 2); \
- v_tmp0 = __riscv_vle8_v_u8m1(src_u, vl); \
- v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl); \
- v_tmp1 = __riscv_vle8_v_u8m1(src_v, vl); \
- v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl); \
- v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl); \
- v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl); \
- v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16); \
- v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16); \
- vl = __riscv_vsetvl_e8m2(w); \
- v_y = __riscv_vle8_v_u8m2(src_y, vl); \
- v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \
+// Read [VLEN/8] Y, [VLEN/(8 * 2)] U and [VLEN/(8 * 2)] V from 422
+#define READYUV422(vl, v_u, v_v, v_y_16) \
+ { \
+ vuint8m1_t v_tmp0, v_tmp1; \
+ vuint8m2_t v_y; \
+ vuint16m2_t v_u_16, v_v_16; \
+ vl = __riscv_vsetvl_e8m1((w + 1) / 2); \
+ v_tmp0 = __riscv_vle8_v_u8m1(src_u, vl); \
+ v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl); \
+ v_tmp1 = __riscv_vle8_v_u8m1(src_v, vl); \
+ v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl); \
+ v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl); \
+ v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl); \
+ v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16); \
+ v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16); \
+ vl = __riscv_vsetvl_e8m2(w); \
+ v_y = __riscv_vle8_v_u8m2(src_y, vl); \
+ v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \
}
-// Read [2*VLEN/8] Y, [2*VLEN/8] U, and [2*VLEN/8] V from 444
-#define READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16) \
- { \
- vuint8m2_t v_y; \
- vl = __riscv_vsetvl_e8m2(w); \
- v_y = __riscv_vle8_v_u8m2(src_y, vl); \
- v_u = __riscv_vle8_v_u8m2(src_u, vl); \
- v_v = __riscv_vle8_v_u8m2(src_v, vl); \
- v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \
+// Read [VLEN/8] Y, [VLEN/8] U, and [VLEN/8] V from 444
+#define READYUV444(vl, v_u, v_v, v_y_16) \
+ { \
+ vuint8m2_t v_y; \
+ vl = __riscv_vsetvl_e8m2(w); \
+ v_y = __riscv_vle8_v_u8m2(src_y, vl); \
+ v_u = __riscv_vle8_v_u8m2(src_u, vl); \
+ v_v = __riscv_vle8_v_u8m2(src_v, vl); \
+ v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \
}
// Convert from YUV to fixed point RGB
@@ -103,45 +101,6 @@
v_r = __riscv_vnclipu_wx_u8m2(v_r_16, 6, vl); \
}
-// Read [2*VLEN/8] Y from src_y; Read [VLEN/8] U and [VLEN/8] V from src_uv
-#define READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16) \
- { \
- vuint8m1_t v_tmp0, v_tmp1; \
- vuint8m2_t v_y; \
- vuint16m2_t v_u_16, v_v_16; \
- vl = __riscv_vsetvl_e8m1((w + 1) / 2); \
- __riscv_vlseg2e8_v_u8m1(&v_tmp0, &v_tmp1, src_uv, vl); \
- v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl); \
- v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl); \
- v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl); \
- v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl); \
- v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16); \
- v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16); \
- vl = __riscv_vsetvl_e8m2(w); \
- v_y = __riscv_vle8_v_u8m2(src_y, vl); \
- v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \
- }
-
-// Read 2*[VLEN/8] Y from src_y; Read [VLEN/8] U and [VLEN/8] V from src_vu
-#define READNV21(vl, w, src_y, src_vu, v_u, v_v, v_y_16) \
- { \
- vuint8m1_t v_tmp0, v_tmp1; \
- vuint8m2_t v_y; \
- vuint16m2_t v_u_16, v_v_16; \
- vl = __riscv_vsetvl_e8m1((w + 1) / 2); \
- __riscv_vlseg2e8_v_u8m1(&v_tmp0, &v_tmp1, src_vu, vl); \
- v_u_16 = __riscv_vwaddu_vx_u16m2(v_tmp1, 0, vl); \
- v_v_16 = __riscv_vwaddu_vx_u16m2(v_tmp0, 0, vl); \
- v_v_16 = __riscv_vmul_vx_u16m2(v_v_16, 0x0101, vl); \
- v_u_16 = __riscv_vmul_vx_u16m2(v_u_16, 0x0101, vl); \
- v_v = __riscv_vreinterpret_v_u16m2_u8m2(v_v_16); \
- v_u = __riscv_vreinterpret_v_u16m2_u8m2(v_u_16); \
- vl = __riscv_vsetvl_e8m2(w); \
- v_y = __riscv_vle8_v_u8m2(src_y, vl); \
- v_y_16 = __riscv_vwaddu_vx_u16m4(v_y, 0, vl); \
- }
-
-#ifdef HAS_ARGBTOAR64ROW_RVV
void ARGBToAR64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ar64, int width) {
size_t avl = (size_t)4 * width;
do {
@@ -157,9 +116,7 @@
dst_ar64 += vl;
} while (avl > 0);
}
-#endif
-#ifdef HAS_ARGBTOAB64ROW_RVV
void ARGBToAB64Row_RVV(const uint8_t* src_argb, uint16_t* dst_ab64, int width) {
size_t avl = (size_t)width;
do {
@@ -181,9 +138,7 @@
dst_ab64 += 4 * vl;
} while (avl > 0);
}
-#endif
-#ifdef HAS_AR64TOARGBROW_RVV
void AR64ToARGBRow_RVV(const uint16_t* src_ar64, uint8_t* dst_argb, int width) {
size_t avl = (size_t)4 * width;
do {
@@ -198,9 +153,7 @@
dst_argb += vl;
} while (avl > 0);
}
-#endif
-#ifdef HAS_AB64TOARGBROW_RVV
void AB64ToARGBRow_RVV(const uint16_t* src_ab64, uint8_t* dst_argb, int width) {
size_t avl = (size_t)width;
do {
@@ -218,9 +171,7 @@
dst_argb += 4 * vl;
} while (avl > 0);
}
-#endif
-#ifdef HAS_RAWTOARGBROW_RVV
void RAWToARGBRow_RVV(const uint8_t* src_raw, uint8_t* dst_argb, int width) {
size_t w = (size_t)width;
size_t vl = __riscv_vsetvl_e8m2(w);
@@ -235,9 +186,7 @@
vl = __riscv_vsetvl_e8m2(w);
} while (w > 0);
}
-#endif
-#ifdef HAS_RAWTORGBAROW_RVV
void RAWToRGBARow_RVV(const uint8_t* src_raw, uint8_t* dst_rgba, int width) {
size_t w = (size_t)width;
size_t vl = __riscv_vsetvl_e8m2(w);
@@ -252,9 +201,7 @@
vl = __riscv_vsetvl_e8m2(w);
} while (w > 0);
}
-#endif
-#ifdef HAS_RAWTORGB24ROW_RVV
void RAWToRGB24Row_RVV(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) {
size_t w = (size_t)width;
do {
@@ -267,9 +214,7 @@
dst_rgb24 += vl * 3;
} while (w > 0);
}
-#endif
-#ifdef HAS_ARGBTORAWROW_RVV
void ARGBToRAWRow_RVV(const uint8_t* src_argb, uint8_t* dst_raw, int width) {
size_t w = (size_t)width;
do {
@@ -282,9 +227,7 @@
dst_raw += vl * 3;
} while (w > 0);
}
-#endif
-#ifdef HAS_ARGBTORGB24ROW_RVV
void ARGBToRGB24Row_RVV(const uint8_t* src_argb,
uint8_t* dst_rgb24,
int width) {
@@ -299,9 +242,7 @@
dst_rgb24 += vl * 3;
} while (w > 0);
}
-#endif
-#ifdef HAS_RGB24TOARGBROW_RVV
void RGB24ToARGBRow_RVV(const uint8_t* src_rgb24,
uint8_t* dst_argb,
int width) {
@@ -318,26 +259,24 @@
vl = __riscv_vsetvl_e8m2(w);
} while (w > 0);
}
-#endif
-#ifdef HAS_I444TOARGBROW_RVV
void I444ToARGBRow_RVV(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
+ size_t vl;
size_t w = (size_t)width;
- size_t vl = __riscv_vsetvl_e8m2(w);
uint8_t ub, vr, ug, vg;
int16_t yg, bb, bg, br;
vuint8m2_t v_u, v_v;
vuint8m2_t v_b, v_g, v_r, v_a;
vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
- YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
v_a = __riscv_vmv_v_x_u8m2(255u, vl);
do {
- READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
+ READYUV444(vl, v_u, v_v, v_y_16);
YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
v_b_16, v_r_16);
RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
@@ -349,9 +288,7 @@
dst_argb += vl * 4;
} while (w > 0);
}
-#endif
-#ifdef HAS_I444ALPHATOARGBROW_RVV
void I444AlphaToARGBRow_RVV(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -366,9 +303,9 @@
vuint8m2_t v_u, v_v;
vuint8m2_t v_b, v_g, v_r, v_a;
vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
- YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
do {
- READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
+ READYUV444(vl, v_u, v_v, v_y_16);
v_a = __riscv_vle8_v_u8m2(src_a, vl);
YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
v_b_16, v_r_16);
@@ -382,9 +319,7 @@
dst_argb += vl * 4;
} while (w > 0);
}
-#endif
-#ifdef HAS_I444TORGB24ROW_RVV
void I444ToRGB24Row_RVV(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -398,9 +333,9 @@
vuint8m2_t v_u, v_v;
vuint8m2_t v_b, v_g, v_r;
vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
- YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
do {
- READYUV444(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
+ READYUV444(vl, v_u, v_v, v_y_16);
YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
v_b_16, v_r_16);
RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
@@ -412,26 +347,24 @@
dst_rgb24 += vl * 3;
} while (w > 0);
}
-#endif
-#ifdef HAS_I422TOARGBROW_RVV
void I422ToARGBRow_RVV(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
int width) {
+ size_t vl;
size_t w = (size_t)width;
- size_t vl = __riscv_vsetvl_e8m2(w);
uint8_t ub, vr, ug, vg;
int16_t yg, bb, bg, br;
vuint8m2_t v_u, v_v;
vuint8m2_t v_b, v_g, v_r, v_a;
vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
- YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
v_a = __riscv_vmv_v_x_u8m2(255u, vl);
do {
- READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
+ READYUV422(vl, v_u, v_v, v_y_16);
YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
v_b_16, v_r_16);
RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
@@ -443,9 +376,7 @@
dst_argb += vl * 4;
} while (w > 0);
}
-#endif
-#ifdef HAS_I422ALPHATOARGBROW_RVV
void I422AlphaToARGBRow_RVV(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -460,9 +391,9 @@
vuint8m2_t v_u, v_v;
vuint8m2_t v_b, v_g, v_r, v_a;
vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
- YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
do {
- READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
+ READYUV422(vl, v_u, v_v, v_y_16);
v_a = __riscv_vle8_v_u8m2(src_a, vl);
YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
v_b_16, v_r_16);
@@ -476,26 +407,24 @@
dst_argb += vl * 4;
} while (w > 0);
}
-#endif
-#ifdef HAS_I422TORGBAROW_RVV
void I422ToRGBARow_RVV(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_rgba,
const struct YuvConstants* yuvconstants,
int width) {
+ size_t vl;
size_t w = (size_t)width;
- size_t vl = __riscv_vsetvl_e8m2(w);
uint8_t ub, vr, ug, vg;
int16_t yg, bb, bg, br;
vuint8m2_t v_u, v_v;
vuint8m2_t v_b, v_g, v_r, v_a;
vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
- YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
v_a = __riscv_vmv_v_x_u8m2(255u, vl);
do {
- READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
+ READYUV422(vl, v_u, v_v, v_y_16);
YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
v_b_16, v_r_16);
RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
@@ -507,9 +436,7 @@
dst_rgba += vl * 4;
} while (w > 0);
}
-#endif
-#ifdef HAS_I422TORGB24ROW_RVV
void I422ToRGB24Row_RVV(const uint8_t* src_y,
const uint8_t* src_u,
const uint8_t* src_v,
@@ -523,9 +450,9 @@
vuint8m2_t v_u, v_v;
vuint8m2_t v_b, v_g, v_r;
vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
- YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
+ YUVTORGB_SETUP(vl, yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
do {
- READYUV422(vl, w, src_y, src_u, src_v, v_u, v_v, v_y_16);
+ READYUV422(vl, v_u, v_v, v_y_16);
YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
v_b_16, v_r_16);
RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
@@ -537,9 +464,7 @@
dst_rgb24 += vl * 3;
} while (w > 0);
}
-#endif
-#ifdef HAS_I400TOARGBROW_RVV
void I400ToARGBRow_RVV(const uint8_t* src_y,
uint8_t* dst_argb,
const struct YuvConstants* yuvconstants,
@@ -578,9 +503,7 @@
dst_argb += vl * 4;
} while (w > 0);
}
-#endif
-#ifdef HAS_J400TOARGBROW_RVV
void J400ToARGBRow_RVV(const uint8_t* src_y, uint8_t* dst_argb, int width) {
size_t w = (size_t)width;
size_t vl = __riscv_vsetvl_e8m2(w);
@@ -595,9 +518,7 @@
vl = __riscv_vsetvl_e8m2(w);
} while (w > 0);
}
-#endif
-#ifdef HAS_COPYROW_RVV
void CopyRow_RVV(const uint8_t* src, uint8_t* dst, int width) {
size_t w = (size_t)width;
do {
@@ -609,125 +530,8 @@
dst += vl;
} while (w > 0);
}
-#endif
-
-#ifdef HAS_NV12TOARGBROW_RVV
-void NV12ToARGBRow_RVV(const uint8_t* src_y,
- const uint8_t* src_uv,
- uint8_t* dst_argb,
- const struct YuvConstants* yuvconstants,
- int width) {
- size_t w = (size_t)width;
- size_t vl = __riscv_vsetvl_e8m2(w);
- uint8_t ub, vr, ug, vg;
- int16_t yg, bb, bg, br;
- vuint8m2_t v_u, v_v;
- vuint8m2_t v_b, v_g, v_r, v_a;
- vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
- YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
- v_a = __riscv_vmv_v_x_u8m2(255u, vl);
- do {
- READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16);
- YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
- v_b_16, v_r_16);
- RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
- __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
- w -= vl;
- src_y += vl;
- src_uv += vl;
- dst_argb += vl * 4;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_NV12TORGB24ROW_RVV
-void NV12ToRGB24Row_RVV(const uint8_t* src_y,
- const uint8_t* src_uv,
- uint8_t* dst_rgb24,
- const struct YuvConstants* yuvconstants,
- int width) {
- size_t w = (size_t)width;
- size_t vl = __riscv_vsetvl_e8m2(w);
- uint8_t ub, vr, ug, vg;
- int16_t yg, bb, bg, br;
- vuint8m2_t v_u, v_v;
- vuint8m2_t v_b, v_g, v_r;
- vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
- YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
- do {
- READNV12(vl, w, src_y, src_uv, v_u, v_v, v_y_16);
- YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
- v_b_16, v_r_16);
- RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
- __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
- w -= vl;
- src_y += vl;
- src_uv += vl;
- dst_rgb24 += vl * 3;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_NV21TOARGBROW_RVV
-void NV21ToARGBRow_RVV(const uint8_t* src_y,
- const uint8_t* src_vu,
- uint8_t* dst_argb,
- const struct YuvConstants* yuvconstants,
- int width) {
- size_t w = (size_t)width;
- size_t vl = __riscv_vsetvl_e8m2(w);
- uint8_t ub, vr, ug, vg;
- int16_t yg, bb, bg, br;
- vuint8m2_t v_u, v_v;
- vuint8m2_t v_b, v_g, v_r, v_a;
- vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
- YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
- v_a = __riscv_vmv_v_x_u8m2(255u, vl);
- do {
- READNV21(vl, w, src_y, src_vu, v_u, v_v, v_y_16);
- YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
- v_b_16, v_r_16);
- RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
- __riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
- w -= vl;
- src_y += vl;
- src_vu += vl;
- dst_argb += vl * 4;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_NV21TORGB24ROW_RVV
-void NV21ToRGB24Row_RVV(const uint8_t* src_y,
- const uint8_t* src_vu,
- uint8_t* dst_rgb24,
- const struct YuvConstants* yuvconstants,
- int width) {
- size_t w = (size_t)width;
- size_t vl = __riscv_vsetvl_e8m2(w);
- uint8_t ub, vr, ug, vg;
- int16_t yg, bb, bg, br;
- vuint8m2_t v_u, v_v;
- vuint8m2_t v_b, v_g, v_r;
- vuint16m4_t v_y_16, v_g_16, v_b_16, v_r_16;
- YUVTORGB_SETUP(yuvconstants, ub, vr, ug, vg, yg, bb, bg, br);
- do {
- READNV21(vl, w, src_y, src_vu, v_u, v_v, v_y_16);
- YUVTORGB(vl, v_u, v_v, ub, vr, ug, vg, yg, bb, bg, br, v_y_16, v_g_16,
- v_b_16, v_r_16);
- RGBTORGB8(vl, v_g_16, v_b_16, v_r_16, v_g, v_b, v_r);
- __riscv_vsseg3e8_v_u8m2(dst_rgb24, v_b, v_g, v_r, vl);
- w -= vl;
- src_y += vl;
- src_vu += vl;
- dst_rgb24 += vl * 3;
- } while (w > 0);
-}
-#endif
// Bilinear filter [VLEN/8]x2 -> [VLEN/8]x1
-
-#ifdef HAS_INTERPOLATEROW_RVV
void InterpolateRow_RVV(uint8_t* dst_ptr,
const uint8_t* src_ptr,
ptrdiff_t src_stride,
@@ -750,16 +554,13 @@
} while (dst_w > 0);
return;
}
- // To match behavior on other platforms, vxrm (fixed-point rounding mode
- // register) is set to round-to-nearest-up(0).
- asm volatile("csrwi vxrm, 0");
// Blend 50 / 50.
if (y1_fraction == 128) {
do {
size_t vl = __riscv_vsetvl_e8m8(dst_w);
vuint8m8_t row0 = __riscv_vle8_v_u8m8(src_ptr, vl);
vuint8m8_t row1 = __riscv_vle8_v_u8m8(src_ptr1, vl);
- // Use round-to-nearest-up mode for averaging add
+ // Averaging add
vuint8m8_t row_out = __riscv_vaaddu_vv_u8m8(row0, row1, vl);
__riscv_vse8_v_u8m8(dst_ptr, row_out, vl);
dst_w -= vl;
@@ -770,13 +571,15 @@
return;
}
// General purpose row blend.
+ // To match behavior on other platforms, vxrm (fixed-point rounding mode
+ // register) is set to round-to-nearest-up(0).
+ asm volatile("csrwi vxrm, 0");
do {
size_t vl = __riscv_vsetvl_e8m4(dst_w);
vuint8m4_t row0 = __riscv_vle8_v_u8m4(src_ptr, vl);
vuint16m8_t acc = __riscv_vwmulu_vx_u16m8(row0, y0_fraction, vl);
vuint8m4_t row1 = __riscv_vle8_v_u8m4(src_ptr1, vl);
acc = __riscv_vwmaccu_vx_u16m8(acc, y1_fraction, row1, vl);
- // Use round-to-nearest-up mode for vnclip
__riscv_vse8_v_u8m4(dst_ptr, __riscv_vnclipu_wx_u8m4(acc, 8, vl), vl);
dst_w -= vl;
src_ptr += vl;
@@ -784,9 +587,7 @@
dst_ptr += vl;
} while (dst_w > 0);
}
-#endif
-#ifdef HAS_SPLITRGBROW_RVV
void SplitRGBRow_RVV(const uint8_t* src_rgb,
uint8_t* dst_r,
uint8_t* dst_g,
@@ -807,9 +608,7 @@
src_rgb += vl * 3;
} while (w > 0);
}
-#endif
-#ifdef HAS_MERGERGBROW_RVV
void MergeRGBRow_RVV(const uint8_t* src_r,
const uint8_t* src_g,
const uint8_t* src_b,
@@ -829,9 +628,7 @@
dst_rgb += vl * 3;
} while (w > 0);
}
-#endif
-#ifdef HAS_SPLITARGBROW_RVV
void SplitARGBRow_RVV(const uint8_t* src_argb,
uint8_t* dst_r,
uint8_t* dst_g,
@@ -855,9 +652,7 @@
src_argb += vl * 4;
} while (w > 0);
}
-#endif
-#ifdef HAS_MERGEARGBROW_RVV
void MergeARGBRow_RVV(const uint8_t* src_r,
const uint8_t* src_g,
const uint8_t* src_b,
@@ -880,9 +675,7 @@
dst_argb += vl * 4;
} while (w > 0);
}
-#endif
-#ifdef HAS_SPLITXRGBROW_RVV
void SplitXRGBRow_RVV(const uint8_t* src_argb,
uint8_t* dst_r,
uint8_t* dst_g,
@@ -903,9 +696,7 @@
src_argb += vl * 4;
} while (w > 0);
}
-#endif
-#ifdef HAS_MERGEXRGBROW_RVV
void MergeXRGBRow_RVV(const uint8_t* src_r,
const uint8_t* src_g,
const uint8_t* src_b,
@@ -928,9 +719,7 @@
vl = __riscv_vsetvl_e8m2(w);
} while (w > 0);
}
-#endif
-#ifdef HAS_SPLITUVROW_RVV
void SplitUVRow_RVV(const uint8_t* src_uv,
uint8_t* dst_u,
uint8_t* dst_v,
@@ -948,9 +737,7 @@
src_uv += 2 * vl;
} while (w > 0);
}
-#endif
-#ifdef HAS_MERGEUVROW_RVV
void MergeUVRow_RVV(const uint8_t* src_u,
const uint8_t* src_v,
uint8_t* dst_uv,
@@ -968,7 +755,6 @@
dst_uv += 2 * vl;
} while (w > 0);
}
-#endif
struct RgbConstants {
uint8_t kRGBToY[4];
@@ -1001,8 +787,7 @@
0x1080,
0};
-// ARGB expects first 3 values to contain RGB and 4th value is ignored
-#ifdef HAS_ARGBTOYMATRIXROW_RVV
+// ARGB expects first 3 values to contain RGB and 4th value is ignored.
void ARGBToYMatrixRow_RVV(const uint8_t* src_argb,
uint8_t* dst_y,
int width,
@@ -1032,34 +817,24 @@
dst_y += vl;
} while (w > 0);
}
-#endif
-#ifdef HAS_ARGBTOYROW_RVV
void ARGBToYRow_RVV(const uint8_t* src_argb, uint8_t* dst_y, int width) {
ARGBToYMatrixRow_RVV(src_argb, dst_y, width, &kRgb24I601Constants);
}
-#endif
-#ifdef HAS_ARGBTOYJROW_RVV
void ARGBToYJRow_RVV(const uint8_t* src_argb, uint8_t* dst_yj, int width) {
ARGBToYMatrixRow_RVV(src_argb, dst_yj, width, &kRgb24JPEGConstants);
}
-#endif
-#ifdef HAS_ABGRTOYROW_RVV
void ABGRToYRow_RVV(const uint8_t* src_abgr, uint8_t* dst_y, int width) {
ARGBToYMatrixRow_RVV(src_abgr, dst_y, width, &kRawI601Constants);
}
-#endif
-#ifdef HAS_ABGRTOYJROW_RVV
void ABGRToYJRow_RVV(const uint8_t* src_abgr, uint8_t* dst_yj, int width) {
ARGBToYMatrixRow_RVV(src_abgr, dst_yj, width, &kRawJPEGConstants);
}
-#endif
// RGBA expects first value to be A and ignored, then 3 values to contain RGB.
-#ifdef HAS_RGBATOYMATRIXROW_RVV
void RGBAToYMatrixRow_RVV(const uint8_t* src_rgba,
uint8_t* dst_y,
int width,
@@ -1089,27 +864,19 @@
dst_y += vl;
} while (w > 0);
}
-#endif
-#ifdef HAS_RGBATOYROW_RVV
void RGBAToYRow_RVV(const uint8_t* src_rgba, uint8_t* dst_y, int width) {
RGBAToYMatrixRow_RVV(src_rgba, dst_y, width, &kRgb24I601Constants);
}
-#endif
-#ifdef HAS_RGBATOYJROW_RVV
void RGBAToYJRow_RVV(const uint8_t* src_rgba, uint8_t* dst_yj, int width) {
RGBAToYMatrixRow_RVV(src_rgba, dst_yj, width, &kRgb24JPEGConstants);
}
-#endif
-#ifdef HAS_BGRATOYROW_RVV
void BGRAToYRow_RVV(const uint8_t* src_bgra, uint8_t* dst_y, int width) {
RGBAToYMatrixRow_RVV(src_bgra, dst_y, width, &kRawI601Constants);
}
-#endif
-#ifdef HAS_RGBTOYMATRIXROW_RVV
void RGBToYMatrixRow_RVV(const uint8_t* src_rgb,
uint8_t* dst_y,
int width,
@@ -1139,179 +906,51 @@
dst_y += vl;
} while (w > 0);
}
-#endif
-#ifdef HAS_RGB24TOYJROW_RVV
void RGB24ToYJRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_yj, int width) {
RGBToYMatrixRow_RVV(src_rgb24, dst_yj, width, &kRgb24JPEGConstants);
}
-#endif
-#ifdef HAS_RAWTOYJROW_RVV
void RAWToYJRow_RVV(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
RGBToYMatrixRow_RVV(src_raw, dst_yj, width, &kRawJPEGConstants);
}
-#endif
-#ifdef HAS_RGB24TOYROW_RVV
void RGB24ToYRow_RVV(const uint8_t* src_rgb24, uint8_t* dst_y, int width) {
RGBToYMatrixRow_RVV(src_rgb24, dst_y, width, &kRgb24I601Constants);
}
-#endif
-#ifdef HAS_RAWTOYROW_RVV
void RAWToYRow_RVV(const uint8_t* src_raw, uint8_t* dst_y, int width) {
RGBToYMatrixRow_RVV(src_raw, dst_y, width, &kRawI601Constants);
}
-#endif
-// Blend src_argb over src_argb1 and store to dst_argb.
-// dst_argb may be src_argb or src_argb1.
-// src_argb: RGB values have already been pre-multiplied by the a.
-#ifdef HAS_ARGBBLENDROW_RVV
-void ARGBBlendRow_RVV(const uint8_t* src_argb,
- const uint8_t* src_argb1,
- uint8_t* dst_argb,
- int width) {
- size_t w = (size_t)width;
- size_t vl = __riscv_vsetvlmax_e8m2();
- // clamp255((((256 - a) * b) >> 8) + f)
- // = b * (256 - a) / 256 + f
- // = b - (b * a / 256) + f
- vuint8m2_t v_255 = __riscv_vmv_v_x_u8m2(255, vl);
- do {
- vuint8m2_t v_src0_b, v_src0_g, v_src0_r, v_src0_a;
- vuint8m2_t v_src1_b, v_src1_g, v_src1_r, v_src1_a;
- vuint8m2_t v_tmp_b, v_tmp_g, v_tmp_r;
- vuint8m2_t v_dst_b, v_dst_g, v_dst_r;
- vl = __riscv_vsetvl_e8m2(w);
- __riscv_vlseg4e8_v_u8m2(&v_src0_b, &v_src0_g, &v_src0_r, &v_src0_a,
- src_argb, vl);
- __riscv_vlseg4e8_v_u8m2(&v_src1_b, &v_src1_g, &v_src1_r, &v_src1_a,
- src_argb1, vl);
-
- v_tmp_b = __riscv_vmulhu_vv_u8m2(v_src1_b, v_src0_a, vl);
- v_tmp_g = __riscv_vmulhu_vv_u8m2(v_src1_g, v_src0_a, vl);
- v_tmp_r = __riscv_vmulhu_vv_u8m2(v_src1_r, v_src0_a, vl);
-
- v_dst_b = __riscv_vsub_vv_u8m2(v_src1_b, v_tmp_b, vl);
- v_dst_g = __riscv_vsub_vv_u8m2(v_src1_g, v_tmp_g, vl);
- v_dst_r = __riscv_vsub_vv_u8m2(v_src1_r, v_tmp_r, vl);
-
- v_dst_b = __riscv_vsaddu_vv_u8m2(v_dst_b, v_src0_b, vl);
- v_dst_g = __riscv_vsaddu_vv_u8m2(v_dst_g, v_src0_g, vl);
- v_dst_r = __riscv_vsaddu_vv_u8m2(v_dst_r, v_src0_r, vl);
- __riscv_vsseg4e8_v_u8m2(dst_argb, v_dst_b, v_dst_g, v_dst_r, v_255, vl);
-
- w -= vl;
- src_argb += 4 * vl;
- src_argb1 += 4 * vl;
- dst_argb += 4 * vl;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_BLENDPLANEROW_RVV
-void BlendPlaneRow_RVV(const uint8_t* src0,
- const uint8_t* src1,
- const uint8_t* alpha,
- uint8_t* dst,
- int width) {
- size_t w = (size_t)width;
- do {
- vuint16m8_t v_dst_u16;
- vuint8m4_t v_dst;
- size_t vl = __riscv_vsetvl_e8m4(w);
- vuint8m4_t v_src0 = __riscv_vle8_v_u8m4(src0, vl);
- vuint8m4_t v_src1 = __riscv_vle8_v_u8m4(src1, vl);
- vuint8m4_t v_alpha = __riscv_vle8_v_u8m4(alpha, vl);
- vuint8m4_t v_255_minus_alpha = __riscv_vrsub_vx_u8m4(v_alpha, 255u, vl);
-
- // (a * foreground) + (1-a) * background
- v_dst_u16 = __riscv_vwmulu_vv_u16m8(v_alpha, v_src0, vl);
- v_dst_u16 =
- __riscv_vwmaccu_vv_u16m8(v_dst_u16, v_255_minus_alpha, v_src1, vl);
- v_dst_u16 = __riscv_vadd_vx_u16m8(v_dst_u16, 255u, vl);
- v_dst = __riscv_vnsrl_wx_u8m4(v_dst_u16, 8, vl);
-
- __riscv_vse8_v_u8m4(dst, v_dst, vl);
- w -= vl;
- src0 += vl;
- src1 += vl;
- alpha += vl;
- dst += vl;
- } while (w > 0);
-}
-#endif
-
-// Attenuate: (f * a + 255) >> 8
-#ifdef HAS_ARGBATTENUATEROW_RVV
void ARGBAttenuateRow_RVV(const uint8_t* src_argb,
uint8_t* dst_argb,
int width) {
size_t w = (size_t)width;
+ // To match behavior on other platforms, vxrm (fixed-point rounding mode
+ // register) is set to round-to-nearest-up(0).
+ asm volatile("csrwi vxrm, 0");
do {
vuint8m2_t v_b, v_g, v_r, v_a;
vuint16m4_t v_ba_16, v_ga_16, v_ra_16;
size_t vl = __riscv_vsetvl_e8m2(w);
__riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
- // f * a
v_ba_16 = __riscv_vwmulu_vv_u16m4(v_b, v_a, vl);
v_ga_16 = __riscv_vwmulu_vv_u16m4(v_g, v_a, vl);
v_ra_16 = __riscv_vwmulu_vv_u16m4(v_r, v_a, vl);
- // f * a + 255
- v_ba_16 = __riscv_vadd_vx_u16m4(v_ba_16, 255u, vl);
- v_ga_16 = __riscv_vadd_vx_u16m4(v_ga_16, 255u, vl);
- v_ra_16 = __riscv_vadd_vx_u16m4(v_ra_16, 255u, vl);
- // (f * a + 255) >> 8
- v_b = __riscv_vnsrl_wx_u8m2(v_ba_16, 8, vl);
- v_g = __riscv_vnsrl_wx_u8m2(v_ga_16, 8, vl);
- v_r = __riscv_vnsrl_wx_u8m2(v_ra_16, 8, vl);
+ v_b = __riscv_vnclipu_wx_u8m2(v_ba_16, 8, vl);
+ v_g = __riscv_vnclipu_wx_u8m2(v_ga_16, 8, vl);
+ v_r = __riscv_vnclipu_wx_u8m2(v_ra_16, 8, vl);
__riscv_vsseg4e8_v_u8m2(dst_argb, v_b, v_g, v_r, v_a, vl);
w -= vl;
src_argb += vl * 4;
dst_argb += vl * 4;
} while (w > 0);
}
-#endif
-
-#ifdef HAS_ARGBEXTRACTALPHAROW_RVV
-void ARGBExtractAlphaRow_RVV(const uint8_t* src_argb,
- uint8_t* dst_a,
- int width) {
- size_t w = (size_t)width;
- do {
- size_t vl = __riscv_vsetvl_e8m2(w);
- vuint8m2_t v_b, v_g, v_r, v_a;
- __riscv_vlseg4e8_v_u8m2(&v_b, &v_g, &v_r, &v_a, src_argb, vl);
- __riscv_vse8_v_u8m2(dst_a, v_a, vl);
- w -= vl;
- src_argb += vl * 4;
- dst_a += vl;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_ARGBCOPYYTOALPHAROW_RVV
-void ARGBCopyYToAlphaRow_RVV(const uint8_t* src, uint8_t* dst, int width) {
- size_t w = (size_t)width;
- const ptrdiff_t dst_stride = 4;
- dst += 3;
- do {
- size_t vl = __riscv_vsetvl_e8m8(w);
- vuint8m8_t v_a = __riscv_vle8_v_u8m8(src, vl);
- __riscv_vsse8_v_u8m8(dst, dst_stride, v_a, vl);
- w -= vl;
- src += vl;
- dst += vl * dst_stride;
- } while (w > 0);
-}
-#endif
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
#endif
-#endif // !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) &&
- // defined(__clang__)
+#endif // !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector)
diff --git a/source/scale.cc b/source/scale.cc
index 43d973a..80b030d 100644
--- a/source/scale.cc
+++ b/source/scale.cc
@@ -135,14 +135,6 @@
}
}
#endif
-#if defined(HAS_SCALEROWDOWN2_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- ScaleRowDown2 = filtering == kFilterNone
- ? ScaleRowDown2_RVV
- : (filtering == kFilterLinear ? ScaleRowDown2Linear_RVV
- : ScaleRowDown2Box_RVV);
- }
-#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@@ -320,11 +312,6 @@
}
}
#endif
-#if defined(HAS_SCALEROWDOWN4_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- ScaleRowDown4 = filtering ? ScaleRowDown4Box_RVV : ScaleRowDown4_RVV;
- }
-#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@@ -485,17 +472,6 @@
}
}
#endif
-#if defined(HAS_SCALEROWDOWN34_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_RVV;
- ScaleRowDown34_1 = ScaleRowDown34_RVV;
- } else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_RVV;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_RVV;
- }
- }
-#endif
for (y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
@@ -711,17 +687,6 @@
}
}
#endif
-#if defined(HAS_SCALEROWDOWN38_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- if (!filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_RVV;
- ScaleRowDown38_2 = ScaleRowDown38_RVV;
- } else {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_RVV;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_RVV;
- }
- }
-#endif
for (y = 0; y < dst_height - 2; y += 3) {
ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
@@ -1006,11 +971,6 @@
}
}
#endif
-#if defined(HAS_SCALEADDROW_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- ScaleAddRow = ScaleAddRow_RVV;
- }
-#endif
for (j = 0; j < dst_height; ++j) {
int boxheight;
@@ -1088,15 +1048,15 @@
}
// Scale plane down with bilinear interpolation.
-static void ScalePlaneBilinearDown(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_ptr,
- uint8_t* dst_ptr,
- enum FilterMode filtering) {
+void ScalePlaneBilinearDown(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ enum FilterMode filtering) {
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
@@ -1216,15 +1176,15 @@
free_aligned_buffer_64(row);
}
-static void ScalePlaneBilinearDown_16(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint16_t* src_ptr,
- uint16_t* dst_ptr,
- enum FilterMode filtering) {
+void ScalePlaneBilinearDown_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ enum FilterMode filtering) {
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
@@ -1308,15 +1268,15 @@
}
// Scale up down with bilinear interpolation.
-static void ScalePlaneBilinearUp(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_ptr,
- uint8_t* dst_ptr,
- enum FilterMode filtering) {
+void ScalePlaneBilinearUp(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr,
+ enum FilterMode filtering) {
int j;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
@@ -1465,14 +1425,14 @@
// This is an optimized version for scaling up a plane to 2 times of
// its original width, using linear interpolation.
// This is used to scale U and V planes of I422 to I444.
-static void ScalePlaneUp2_Linear(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_ptr,
- uint8_t* dst_ptr) {
+void ScalePlaneUp2_Linear(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr) {
void (*ScaleRowUp)(const uint8_t* src_ptr, uint8_t* dst_ptr, int dst_width) =
ScaleRowUp2_Linear_Any_C;
int i;
@@ -1505,11 +1465,6 @@
ScaleRowUp = ScaleRowUp2_Linear_Any_NEON;
}
#endif
-#ifdef HAS_SCALEROWUP2_LINEAR_RVV
- if (TestCpuFlag(kCpuHasRVV)) {
- ScaleRowUp = ScaleRowUp2_Linear_RVV;
- }
-#endif
if (dst_height == 1) {
ScaleRowUp(src_ptr + ((src_height - 1) / 2) * (int64_t)src_stride, dst_ptr,
@@ -1529,14 +1484,14 @@
// This is an optimized version for scaling up a plane to 2 times of
// its original size, using bilinear interpolation.
// This is used to scale U and V planes of I420 to I444.
-static void ScalePlaneUp2_Bilinear(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_ptr,
- uint8_t* dst_ptr) {
+void ScalePlaneUp2_Bilinear(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr) {
void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
ScaleRowUp2_Bilinear_Any_C;
@@ -1569,11 +1524,6 @@
Scale2RowUp = ScaleRowUp2_Bilinear_Any_NEON;
}
#endif
-#ifdef HAS_SCALEROWUP2_BILINEAR_RVV
- if (TestCpuFlag(kCpuHasRVV)) {
- Scale2RowUp = ScaleRowUp2_Bilinear_RVV;
- }
-#endif
Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
dst_ptr += dst_stride;
@@ -1594,14 +1544,14 @@
// its original width, using linear interpolation.
// stride is in count of uint16_t.
// This is used to scale U and V planes of I210 to I410 and I212 to I412.
-static void ScalePlaneUp2_12_Linear(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint16_t* src_ptr,
- uint16_t* dst_ptr) {
+void ScalePlaneUp2_12_Linear(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr) {
void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
int dst_width) = ScaleRowUp2_Linear_16_Any_C;
int i;
@@ -1648,14 +1598,14 @@
// its original size, using bilinear interpolation.
// stride is in count of uint16_t.
// This is used to scale U and V planes of I010 to I410 and I012 to I412.
-static void ScalePlaneUp2_12_Bilinear(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint16_t* src_ptr,
- uint16_t* dst_ptr) {
+void ScalePlaneUp2_12_Bilinear(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr) {
void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
ScaleRowUp2_Bilinear_16_Any_C;
@@ -1695,14 +1645,14 @@
}
}
-static void ScalePlaneUp2_16_Linear(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint16_t* src_ptr,
- uint16_t* dst_ptr) {
+void ScalePlaneUp2_16_Linear(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr) {
void (*ScaleRowUp)(const uint16_t* src_ptr, uint16_t* dst_ptr,
int dst_width) = ScaleRowUp2_Linear_16_Any_C;
int i;
@@ -1744,14 +1694,14 @@
}
}
-static void ScalePlaneUp2_16_Bilinear(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint16_t* src_ptr,
- uint16_t* dst_ptr) {
+void ScalePlaneUp2_16_Bilinear(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr) {
void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
ScaleRowUp2_Bilinear_16_Any_C;
@@ -1791,15 +1741,15 @@
}
}
-static void ScalePlaneBilinearUp_16(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint16_t* src_ptr,
- uint16_t* dst_ptr,
- enum FilterMode filtering) {
+void ScalePlaneBilinearUp_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr,
+ enum FilterMode filtering) {
int j;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
diff --git a/source/scale_argb.cc b/source/scale_argb.cc
index 1d5c1b6..ddd8d29 100644
--- a/source/scale_argb.cc
+++ b/source/scale_argb.cc
@@ -16,7 +16,6 @@
#include "libyuv/cpu_id.h"
#include "libyuv/planar_functions.h" // For CopyARGB
#include "libyuv/row.h"
-#include "libyuv/scale_argb.h"
#include "libyuv/scale_row.h"
#ifdef __cplusplus
@@ -128,15 +127,6 @@
}
}
#endif
-#if defined(HAS_SCALEARGBROWDOWN2_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- ScaleARGBRowDown2 =
- filtering == kFilterNone
- ? ScaleARGBRowDown2_RVV
- : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_RVV
- : ScaleARGBRowDown2Box_RVV);
- }
-#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@@ -194,11 +184,6 @@
}
}
#endif
-#if defined(HAS_SCALEARGBROWDOWN2_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- ScaleARGBRowDown2 = ScaleARGBRowDown2Box_RVV;
- }
-#endif
for (j = 0; j < dst_height; ++j) {
ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
@@ -278,12 +263,6 @@
}
}
#endif
-#if defined(HAS_SCALEARGBROWDOWNEVEN_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- ScaleARGBRowDownEven =
- filtering ? ScaleARGBRowDownEvenBox_RVV : ScaleARGBRowDownEven_RVV;
- }
-#endif
if (filtering == kFilterLinear) {
src_stride = 0;
diff --git a/source/scale_common.cc b/source/scale_common.cc
index d07a39a..7745590 100644
--- a/source/scale_common.cc
+++ b/source/scale_common.cc
@@ -1964,6 +1964,35 @@
}
#undef CENTERSTART
+// Read 8x2 upsample with filtering and write 16x1.
+// actually reads an extra pixel, so 9x2.
+void ScaleRowUp2_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width) {
+ const uint16_t* src2 = src_ptr + src_stride;
+
+ int x;
+ for (x = 0; x < dst_width - 1; x += 2) {
+ uint16_t p0 = src_ptr[0];
+ uint16_t p1 = src_ptr[1];
+ uint16_t p2 = src2[0];
+ uint16_t p3 = src2[1];
+ dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
+ dst[1] = (p0 * 3 + p1 * 9 + p2 + p3 * 3 + 8) >> 4;
+ ++src_ptr;
+ ++src2;
+ dst += 2;
+ }
+ if (dst_width & 1) {
+ uint16_t p0 = src_ptr[0];
+ uint16_t p1 = src_ptr[1];
+ uint16_t p2 = src2[0];
+ uint16_t p3 = src2[1];
+ dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4;
+ }
+}
+
#ifdef __cplusplus
} // extern "C"
} // namespace libyuv
diff --git a/source/scale_neon64.cc b/source/scale_neon64.cc
index 7c07238..ad06ee8 100644
--- a/source/scale_neon64.cc
+++ b/source/scale_neon64.cc
@@ -1118,6 +1118,101 @@
#undef LOAD2_DATA8_LANE
+// 16x2 -> 16x1
+void ScaleFilterRows_NEON(uint8_t* dst_ptr,
+ const uint8_t* src_ptr,
+ ptrdiff_t src_stride,
+ int dst_width,
+ int source_y_fraction) {
+ int y_fraction = 256 - source_y_fraction;
+ asm volatile(
+ "cmp %w4, #0 \n"
+ "b.eq 100f \n"
+ "add %2, %2, %1 \n"
+ "cmp %w4, #64 \n"
+ "b.eq 75f \n"
+ "cmp %w4, #128 \n"
+ "b.eq 50f \n"
+ "cmp %w4, #192 \n"
+ "b.eq 25f \n"
+
+ "dup v5.8b, %w4 \n"
+ "dup v4.8b, %w5 \n"
+ // General purpose row blend.
+ "1: \n"
+ "ld1 {v0.16b}, [%1], #16 \n"
+ "ld1 {v1.16b}, [%2], #16 \n"
+ "subs %w3, %w3, #16 \n"
+ "umull v6.8h, v0.8b, v4.8b \n"
+ "umull2 v7.8h, v0.16b, v4.16b \n"
+ "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead
+ "umlal v6.8h, v1.8b, v5.8b \n"
+ "umlal2 v7.8h, v1.16b, v5.16b \n"
+ "prfm pldl1keep, [%2, 448] \n"
+ "rshrn v0.8b, v6.8h, #8 \n"
+ "rshrn2 v0.16b, v7.8h, #8 \n"
+ "st1 {v0.16b}, [%0], #16 \n"
+ "b.gt 1b \n"
+ "b 99f \n"
+
+ // Blend 25 / 75.
+ "25: \n"
+ "ld1 {v0.16b}, [%1], #16 \n"
+ "ld1 {v1.16b}, [%2], #16 \n"
+ "subs %w3, %w3, #16 \n"
+ "urhadd v0.16b, v0.16b, v1.16b \n"
+ "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead
+ "urhadd v0.16b, v0.16b, v1.16b \n"
+ "prfm pldl1keep, [%2, 448] \n"
+ "st1 {v0.16b}, [%0], #16 \n"
+ "b.gt 25b \n"
+ "b 99f \n"
+
+ // Blend 50 / 50.
+ "50: \n"
+ "ld1 {v0.16b}, [%1], #16 \n"
+ "ld1 {v1.16b}, [%2], #16 \n"
+ "subs %w3, %w3, #16 \n"
+ "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead
+ "urhadd v0.16b, v0.16b, v1.16b \n"
+ "prfm pldl1keep, [%2, 448] \n"
+ "st1 {v0.16b}, [%0], #16 \n"
+ "b.gt 50b \n"
+ "b 99f \n"
+
+ // Blend 75 / 25.
+ "75: \n"
+ "ld1 {v1.16b}, [%1], #16 \n"
+ "ld1 {v0.16b}, [%2], #16 \n"
+ "subs %w3, %w3, #16 \n"
+ "urhadd v0.16b, v0.16b, v1.16b \n"
+ "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead
+ "urhadd v0.16b, v0.16b, v1.16b \n"
+ "prfm pldl1keep, [%2, 448] \n"
+ "st1 {v0.16b}, [%0], #16 \n"
+ "b.gt 75b \n"
+ "b 99f \n"
+
+ // Blend 100 / 0 - Copy row unchanged.
+ "100: \n"
+ "ld1 {v0.16b}, [%1], #16 \n"
+ "subs %w3, %w3, #16 \n"
+ "prfm pldl1keep, [%1, 448] \n" // prefetch 7 lines ahead
+ "st1 {v0.16b}, [%0], #16 \n"
+ "b.gt 100b \n"
+
+ "99: \n"
+ "st1 {v0.b}[15], [%0] \n"
+ : "+r"(dst_ptr), // %0
+ "+r"(src_ptr), // %1
+ "+r"(src_stride), // %2
+ "+r"(dst_width), // %3
+ "+r"(source_y_fraction), // %4
+ "+r"(y_fraction) // %5
+ :
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "memory", "cc");
+}
+
void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr,
ptrdiff_t src_stride,
uint8_t* dst,
diff --git a/source/scale_rvv.cc b/source/scale_rvv.cc
deleted file mode 100644
index fd14842..0000000
--- a/source/scale_rvv.cc
+++ /dev/null
@@ -1,1038 +0,0 @@
-/*
- * Copyright 2023 The LibYuv Project Authors. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-
-/*
- * Copyright (c) 2023 SiFive, Inc. All rights reserved.
- *
- * Contributed by Darren Hsieh <darren.hsieh@sifive.com>
- * Contributed by Bruce Lai <bruce.lai@sifive.com>
- */
-
-#include "libyuv/row.h"
-#include "libyuv/scale_row.h"
-
-// This module is for clang rvv. GCC hasn't supported segment load & store.
-#if !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) && \
- defined(__clang__)
-#include <assert.h>
-#include <riscv_vector.h>
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#ifdef HAS_SCALEADDROW_RVV
-void ScaleAddRow_RVV(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) {
- size_t w = (size_t)src_width;
- do {
- size_t vl = __riscv_vsetvl_e8m4(w);
- vuint8m4_t v_src = __riscv_vle8_v_u8m4(src_ptr, vl);
- vuint16m8_t v_dst = __riscv_vle16_v_u16m8(dst_ptr, vl);
- // Use widening multiply-add instead of widening + add
- v_dst = __riscv_vwmaccu_vx_u16m8(v_dst, 1, v_src, vl);
- __riscv_vse16_v_u16m8(dst_ptr, v_dst, vl);
- w -= vl;
- src_ptr += vl;
- dst_ptr += vl;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_SCALEARGBROWDOWN2_RVV
-void ScaleARGBRowDown2_RVV(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- uint8_t* dst_argb,
- int dst_width) {
- (void)src_stride;
- size_t w = (size_t)dst_width;
- const uint64_t* src = (const uint64_t*)(src_argb);
- uint32_t* dst = (uint32_t*)(dst_argb);
- do {
- size_t vl = __riscv_vsetvl_e64m8(w);
- vuint64m8_t v_data = __riscv_vle64_v_u64m8(src, vl);
- vuint32m4_t v_dst = __riscv_vnsrl_wx_u32m4(v_data, 32, vl);
- __riscv_vse32_v_u32m4(dst, v_dst, vl);
- w -= vl;
- src += vl;
- dst += vl;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_SCALEARGBROWDOWN2LINEAR_RVV
-void ScaleARGBRowDown2Linear_RVV(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- uint8_t* dst_argb,
- int dst_width) {
- (void)src_stride;
- size_t w = (size_t)dst_width;
- const uint32_t* src = (const uint32_t*)(src_argb);
- // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
- // register) is set to round-to-nearest-up mode(0).
- asm volatile("csrwi vxrm, 0");
- do {
- vuint8m4_t v_odd, v_even, v_dst;
- vuint32m4_t v_odd_32, v_even_32;
- size_t vl = __riscv_vsetvl_e32m4(w);
- __riscv_vlseg2e32_v_u32m4(&v_even_32, &v_odd_32, src, vl);
- v_even = __riscv_vreinterpret_v_u32m4_u8m4(v_even_32);
- v_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_odd_32);
- // Use round-to-nearest-up mode for averaging add
- v_dst = __riscv_vaaddu_vv_u8m4(v_even, v_odd, vl * 4);
- __riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4);
- w -= vl;
- src += vl * 2;
- dst_argb += vl * 4;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_SCALEARGBROWDOWN2BOX_RVV
-void ScaleARGBRowDown2Box_RVV(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- uint8_t* dst_argb,
- int dst_width) {
- size_t w = (size_t)dst_width;
- const uint32_t* src0 = (const uint32_t*)(src_argb);
- const uint32_t* src1 = (const uint32_t*)(src_argb + src_stride);
- // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
- // register) is set to round-to-nearest-up mode(0).
- asm volatile("csrwi vxrm, 0");
- do {
- vuint8m4_t v_row0_odd, v_row0_even, v_row1_odd, v_row1_even, v_dst;
- vuint16m8_t v_row0_sum, v_row1_sum, v_dst_16;
- vuint32m4_t v_row0_odd_32, v_row0_even_32, v_row1_odd_32, v_row1_even_32;
- size_t vl = __riscv_vsetvl_e32m4(w);
- __riscv_vlseg2e32_v_u32m4(&v_row0_even_32, &v_row0_odd_32, src0, vl);
- __riscv_vlseg2e32_v_u32m4(&v_row1_even_32, &v_row1_odd_32, src1, vl);
- v_row0_even = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_even_32);
- v_row0_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_odd_32);
- v_row1_even = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_even_32);
- v_row1_odd = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_odd_32);
- v_row0_sum = __riscv_vwaddu_vv_u16m8(v_row0_even, v_row0_odd, vl * 4);
- v_row1_sum = __riscv_vwaddu_vv_u16m8(v_row1_even, v_row1_odd, vl * 4);
- v_dst_16 = __riscv_vadd_vv_u16m8(v_row0_sum, v_row1_sum, vl * 4);
- // Use round-to-nearest-up mode for vnclip
- v_dst = __riscv_vnclipu_wx_u8m4(v_dst_16, 2, vl * 4);
- __riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4);
- w -= vl;
- src0 += vl * 2;
- src1 += vl * 2;
- dst_argb += vl * 4;
- } while (w > 0);
-}
-#endif
-
-void ScaleARGBRowDownEven_RVV(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8_t* dst_argb,
- int dst_width) {
- size_t w = (size_t)dst_width;
- const uint32_t* src = (const uint32_t*)(src_argb);
- uint32_t* dst = (uint32_t*)(dst_argb);
- const int stride_byte = src_stepx * 4;
- do {
- size_t vl = __riscv_vsetvl_e32m8(w);
- vuint32m8_t v_row = __riscv_vlse32_v_u32m8(src, stride_byte, vl);
- __riscv_vse32_v_u32m8(dst, v_row, vl);
- w -= vl;
- src += vl * src_stepx;
- dst += vl;
- } while (w > 0);
-}
-
-#ifdef HAS_SCALEARGBROWDOWNEVENBOX_RVV
-void ScaleARGBRowDownEvenBox_RVV(const uint8_t* src_argb,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8_t* dst_argb,
- int dst_width) {
- size_t w = (size_t)dst_width;
- const uint32_t* src0 = (const uint32_t*)(src_argb);
- const uint32_t* src1 = (const uint32_t*)(src_argb + src_stride);
- const int stride_byte = src_stepx * 4;
- // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
- // register) is set to round-to-nearest-up mode(0).
- asm volatile("csrwi vxrm, 0");
- do {
- vuint8m4_t v_row0_low, v_row0_high, v_row1_low, v_row1_high, v_dst;
- vuint16m8_t v_row0_sum, v_row1_sum, v_sum;
- vuint32m4_t v_row0_low_32, v_row0_high_32, v_row1_low_32, v_row1_high_32;
- size_t vl = __riscv_vsetvl_e32m4(w);
- __riscv_vlsseg2e32_v_u32m4(&v_row0_low_32, &v_row0_high_32, src0,
- stride_byte, vl);
- __riscv_vlsseg2e32_v_u32m4(&v_row1_low_32, &v_row1_high_32, src1,
- stride_byte, vl);
- v_row0_low = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_low_32);
- v_row0_high = __riscv_vreinterpret_v_u32m4_u8m4(v_row0_high_32);
- v_row1_low = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_low_32);
- v_row1_high = __riscv_vreinterpret_v_u32m4_u8m4(v_row1_high_32);
- v_row0_sum = __riscv_vwaddu_vv_u16m8(v_row0_low, v_row0_high, vl * 4);
- v_row1_sum = __riscv_vwaddu_vv_u16m8(v_row1_low, v_row1_high, vl * 4);
- v_sum = __riscv_vadd_vv_u16m8(v_row0_sum, v_row1_sum, vl * 4);
- // Use round-to-nearest-up mode for vnclip
- v_dst = __riscv_vnclipu_wx_u8m4(v_sum, 2, vl * 4);
- __riscv_vse8_v_u8m4(dst_argb, v_dst, vl * 4);
- w -= vl;
- src0 += vl * src_stepx;
- src1 += vl * src_stepx;
- dst_argb += vl * 4;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_SCALEROWDOWN2_RVV
-void ScaleRowDown2_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width) {
- size_t w = (size_t)dst_width;
- const uint16_t* src = (const uint16_t*)src_ptr;
- (void)src_stride;
- do {
- size_t vl = __riscv_vsetvl_e16m8(w);
- vuint16m8_t v_src = __riscv_vle16_v_u16m8(src, vl);
- vuint8m4_t v_dst = __riscv_vnsrl_wx_u8m4(v_src, 8, vl);
- __riscv_vse8_v_u8m4(dst, v_dst, vl);
- w -= vl;
- src += vl;
- dst += vl;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_SCALEROWDOWN2LINEAR_RVV
-void ScaleRowDown2Linear_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width) {
- size_t w = (size_t)dst_width;
- (void)src_stride;
- // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
- // register) is set to round-to-nearest-up mode(0).
- asm volatile("csrwi vxrm, 0");
- do {
- vuint8m4_t v_s0, v_s1, v_dst;
- size_t vl = __riscv_vsetvl_e8m4(w);
- __riscv_vlseg2e8_v_u8m4(&v_s0, &v_s1, src_ptr, vl);
- // Use round-to-nearest-up mode for averaging add
- v_dst = __riscv_vaaddu_vv_u8m4(v_s0, v_s1, vl);
- __riscv_vse8_v_u8m4(dst, v_dst, vl);
- w -= vl;
- src_ptr += 2 * vl;
- dst += vl;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_SCALEROWDOWN2BOX_RVV
-void ScaleRowDown2Box_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst,
- int dst_width) {
- const uint8_t* s = src_ptr;
- const uint8_t* t = src_ptr + src_stride;
- size_t w = (size_t)dst_width;
- // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
- // register) is set to round-to-nearest-up mode(0).
- asm volatile("csrwi vxrm, 0");
- do {
- size_t vl = __riscv_vsetvl_e8m4(w);
- vuint8m4_t v_s0, v_s1, v_t0, v_t1;
- vuint16m8_t v_s01, v_t01, v_st01;
- vuint8m4_t v_dst;
- __riscv_vlseg2e8_v_u8m4(&v_s0, &v_s1, s, vl);
- __riscv_vlseg2e8_v_u8m4(&v_t0, &v_t1, t, vl);
- v_s01 = __riscv_vwaddu_vv_u16m8(v_s0, v_s1, vl);
- v_t01 = __riscv_vwaddu_vv_u16m8(v_t0, v_t1, vl);
- v_st01 = __riscv_vadd_vv_u16m8(v_s01, v_t01, vl);
- // Use round-to-nearest-up mode for vnclip
- v_dst = __riscv_vnclipu_wx_u8m4(v_st01, 2, vl);
- __riscv_vse8_v_u8m4(dst, v_dst, vl);
- w -= vl;
- s += 2 * vl;
- t += 2 * vl;
- dst += vl;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_SCALEROWDOWN4_RVV
-void ScaleRowDown4_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- int dst_width) {
- size_t w = (size_t)dst_width;
- (void)src_stride;
- do {
- size_t vl = __riscv_vsetvl_e8m2(w);
- vuint8m2_t v_s0, v_s1, v_s2, v_s3;
- __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl);
- __riscv_vse8_v_u8m2(dst_ptr, v_s2, vl);
- w -= vl;
- src_ptr += (4 * vl);
- dst_ptr += vl;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_SCALEROWDOWN4BOX_RVV
-void ScaleRowDown4Box_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- int dst_width) {
- const uint8_t* src_ptr1 = src_ptr + src_stride;
- const uint8_t* src_ptr2 = src_ptr + src_stride * 2;
- const uint8_t* src_ptr3 = src_ptr + src_stride * 3;
- size_t w = (size_t)dst_width;
- // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
- // register) is set to round-to-nearest-up mode(0).
- asm volatile("csrwi vxrm, 0");
- do {
- vuint8m2_t v_s0, v_s1, v_s2, v_s3;
- vuint8m2_t v_t0, v_t1, v_t2, v_t3;
- vuint8m2_t v_u0, v_u1, v_u2, v_u3;
- vuint8m2_t v_v0, v_v1, v_v2, v_v3;
- vuint16m4_t v_s01, v_s23, v_t01, v_t23;
- vuint16m4_t v_u01, v_u23, v_v01, v_v23;
- vuint16m4_t v_st01, v_st23, v_uv01, v_uv23;
- vuint16m4_t v_st0123, v_uv0123, v_stuv0123;
- vuint8m2_t v_dst;
- size_t vl = __riscv_vsetvl_e8m2(w);
-
- __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl);
- v_s01 = __riscv_vwaddu_vv_u16m4(v_s0, v_s1, vl);
-
- __riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, src_ptr1, vl);
- v_t01 = __riscv_vwaddu_vv_u16m4(v_t0, v_t1, vl);
-
- __riscv_vlseg4e8_v_u8m2(&v_u0, &v_u1, &v_u2, &v_u3, src_ptr2, vl);
- v_u01 = __riscv_vwaddu_vv_u16m4(v_u0, v_u1, vl);
- v_u23 = __riscv_vwaddu_vv_u16m4(v_u2, v_u3, vl);
-
- v_s23 = __riscv_vwaddu_vv_u16m4(v_s2, v_s3, vl);
- v_t23 = __riscv_vwaddu_vv_u16m4(v_t2, v_t3, vl);
- v_st01 = __riscv_vadd_vv_u16m4(v_s01, v_t01, vl);
- v_st23 = __riscv_vadd_vv_u16m4(v_s23, v_t23, vl);
-
- __riscv_vlseg4e8_v_u8m2(&v_v0, &v_v1, &v_v2, &v_v3, src_ptr3, vl);
-
- v_v01 = __riscv_vwaddu_vv_u16m4(v_v0, v_v1, vl);
- v_v23 = __riscv_vwaddu_vv_u16m4(v_v2, v_v3, vl);
-
- v_uv01 = __riscv_vadd_vv_u16m4(v_u01, v_v01, vl);
- v_uv23 = __riscv_vadd_vv_u16m4(v_u23, v_v23, vl);
-
- v_st0123 = __riscv_vadd_vv_u16m4(v_st01, v_st23, vl);
- v_uv0123 = __riscv_vadd_vv_u16m4(v_uv01, v_uv23, vl);
- v_stuv0123 = __riscv_vadd_vv_u16m4(v_st0123, v_uv0123, vl);
- // Use round-to-nearest-up mode for vnclip
- v_dst = __riscv_vnclipu_wx_u8m2(v_stuv0123, 4, vl);
- __riscv_vse8_v_u8m2(dst_ptr, v_dst, vl);
- w -= vl;
- src_ptr += 4 * vl;
- src_ptr1 += 4 * vl;
- src_ptr2 += 4 * vl;
- src_ptr3 += 4 * vl;
- dst_ptr += vl;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_SCALEROWDOWN34_RVV
-void ScaleRowDown34_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- int dst_width) {
- size_t w = (size_t)dst_width / 3u;
- do {
- size_t vl = __riscv_vsetvl_e8m2(w);
- vuint8m2_t v_s0, v_s1, v_s2, v_s3;
- __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, src_ptr, vl);
- __riscv_vsseg3e8_v_u8m2(dst_ptr, v_s0, v_s1, v_s3, vl);
- w -= vl;
- src_ptr += 4 * vl;
- dst_ptr += 3 * vl;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_SCALEROWDOWN34_0_BOX_RVV
-void ScaleRowDown34_0_Box_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- int dst_width) {
- size_t w = (size_t)dst_width / 3u;
- const uint8_t* s = src_ptr;
- const uint8_t* t = src_ptr + src_stride;
- // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
- // register) is set to round-to-nearest-up mode(0).
- asm volatile("csrwi vxrm, 0");
- do {
- vuint8m2_t v_s0, v_s1, v_s2, v_s3;
- vuint16m4_t v_t0_u16, v_t1_u16, v_t2_u16, v_t3_u16;
- vuint8m2_t v_u0, v_u1, v_u2, v_u3;
- vuint16m4_t v_u1_u16;
- vuint8m2_t v_a0, v_a1, v_a2;
- size_t vl = __riscv_vsetvl_e8m2(w);
- __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, s, vl);
-
- if (src_stride == 0) {
- v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_s0, 2, vl);
- v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_s1, 2, vl);
- v_t2_u16 = __riscv_vwaddu_vx_u16m4(v_s2, 2, vl);
- v_t3_u16 = __riscv_vwaddu_vx_u16m4(v_s3, 2, vl);
- } else {
- vuint8m2_t v_t0, v_t1, v_t2, v_t3;
- __riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, t, vl);
- v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_t0, 0, vl);
- v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_t1, 0, vl);
- v_t2_u16 = __riscv_vwaddu_vx_u16m4(v_t2, 0, vl);
- v_t3_u16 = __riscv_vwaddu_vx_u16m4(v_t3, 0, vl);
- t += 4 * vl;
- }
-
- v_t0_u16 = __riscv_vwmaccu_vx_u16m4(v_t0_u16, 3, v_s0, vl);
- v_t1_u16 = __riscv_vwmaccu_vx_u16m4(v_t1_u16, 3, v_s1, vl);
- v_t2_u16 = __riscv_vwmaccu_vx_u16m4(v_t2_u16, 3, v_s2, vl);
- v_t3_u16 = __riscv_vwmaccu_vx_u16m4(v_t3_u16, 3, v_s3, vl);
-
- // Use round-to-nearest-up mode for vnclip & averaging add
- v_u0 = __riscv_vnclipu_wx_u8m2(v_t0_u16, 2, vl);
- v_u1 = __riscv_vnclipu_wx_u8m2(v_t1_u16, 2, vl);
- v_u2 = __riscv_vnclipu_wx_u8m2(v_t2_u16, 2, vl);
- v_u3 = __riscv_vnclipu_wx_u8m2(v_t3_u16, 2, vl);
-
- // a0 = (src[0] * 3 + s[1] * 1 + 2) >> 2
- v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_u1, 0, vl);
- v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_u0, vl);
- v_a0 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl);
-
- // a1 = (src[1] * 1 + s[2] * 1 + 1) >> 1
- v_a1 = __riscv_vaaddu_vv_u8m2(v_u1, v_u2, vl);
-
- // a2 = (src[2] * 1 + s[3] * 3 + 2) >> 2
- v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_u2, 0, vl);
- v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_u3, vl);
- v_a2 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl);
-
- __riscv_vsseg3e8_v_u8m2(dst_ptr, v_a0, v_a1, v_a2, vl);
-
- w -= vl;
- s += 4 * vl;
- dst_ptr += 3 * vl;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_SCALEROWDOWN34_1_BOX_RVV
-void ScaleRowDown34_1_Box_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- int dst_width) {
- size_t w = (size_t)dst_width / 3u;
- const uint8_t* s = src_ptr;
- const uint8_t* t = src_ptr + src_stride;
- // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
- // register) is set to round-to-nearest-up mode(0).
- asm volatile("csrwi vxrm, 0");
- do {
- vuint8m2_t v_s0, v_s1, v_s2, v_s3;
- vuint8m2_t v_ave0, v_ave1, v_ave2, v_ave3;
- vuint16m4_t v_u1_u16;
- vuint8m2_t v_a0, v_a1, v_a2;
- size_t vl = __riscv_vsetvl_e8m2(w);
- __riscv_vlseg4e8_v_u8m2(&v_s0, &v_s1, &v_s2, &v_s3, s, vl);
-
- // Use round-to-nearest-up mode for vnclip & averaging add
- if (src_stride == 0) {
- v_ave0 = __riscv_vaaddu_vv_u8m2(v_s0, v_s0, vl);
- v_ave1 = __riscv_vaaddu_vv_u8m2(v_s1, v_s1, vl);
- v_ave2 = __riscv_vaaddu_vv_u8m2(v_s2, v_s2, vl);
- v_ave3 = __riscv_vaaddu_vv_u8m2(v_s3, v_s3, vl);
- } else {
- vuint8m2_t v_t0, v_t1, v_t2, v_t3;
- __riscv_vlseg4e8_v_u8m2(&v_t0, &v_t1, &v_t2, &v_t3, t, vl);
- v_ave0 = __riscv_vaaddu_vv_u8m2(v_s0, v_t0, vl);
- v_ave1 = __riscv_vaaddu_vv_u8m2(v_s1, v_t1, vl);
- v_ave2 = __riscv_vaaddu_vv_u8m2(v_s2, v_t2, vl);
- v_ave3 = __riscv_vaaddu_vv_u8m2(v_s3, v_t3, vl);
- t += 4 * vl;
- }
- // a0 = (src[0] * 3 + s[1] * 1 + 2) >> 2
- v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_ave1, 0, vl);
- v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_ave0, vl);
- v_a0 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl);
-
- // a1 = (src[1] * 1 + s[2] * 1 + 1) >> 1
- v_a1 = __riscv_vaaddu_vv_u8m2(v_ave1, v_ave2, vl);
-
- // a2 = (src[2] * 1 + s[3] * 3 + 2) >> 2
- v_u1_u16 = __riscv_vwaddu_vx_u16m4(v_ave2, 0, vl);
- v_u1_u16 = __riscv_vwmaccu_vx_u16m4(v_u1_u16, 3, v_ave3, vl);
- v_a2 = __riscv_vnclipu_wx_u8m2(v_u1_u16, 2, vl);
-
- __riscv_vsseg3e8_v_u8m2(dst_ptr, v_a0, v_a1, v_a2, vl);
-
- w -= vl;
- s += 4 * vl;
- dst_ptr += 3 * vl;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_SCALEROWDOWN38_RVV
-void ScaleRowDown38_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- int dst_width) {
- size_t w = (size_t)dst_width / 3u;
- (void)src_stride;
- assert(dst_width % 3 == 0);
- do {
- vuint8m1_t v_s0, v_s1, v_s2, v_s3, v_s4, v_s5, v_s6, v_s7;
- size_t vl = __riscv_vsetvl_e8m1(w);
- __riscv_vlseg8e8_v_u8m1(&v_s0, &v_s1, &v_s2, &v_s3, &v_s4, &v_s5, &v_s6,
- &v_s7, src_ptr, vl);
- __riscv_vsseg3e8_v_u8m1(dst_ptr, v_s0, v_s3, v_s6, vl);
- w -= vl;
- src_ptr += 8 * vl;
- dst_ptr += 3 * vl;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_SCALEROWDOWN38_2_BOX_RVV
-void ScaleRowDown38_2_Box_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- int dst_width) {
- size_t w = (size_t)dst_width / 3u;
- const uint16_t coeff_a = (65536u / 6u);
- const uint16_t coeff_b = (65536u / 4u);
- assert((dst_width % 3 == 0) && (dst_width > 0));
- do {
- vuint8m1_t v_s0, v_s1, v_s2, v_s3, v_s4, v_s5, v_s6, v_s7;
- vuint8m1_t v_t0, v_t1, v_t2, v_t3, v_t4, v_t5, v_t6, v_t7;
- vuint16m2_t v_e0, v_e1, v_e2, v_e;
- vuint16m2_t v_f0, v_f1, v_f2, v_f;
- vuint16m2_t v_g0, v_g1, v_g;
- vuint8m1_t v_dst_e, v_dst_f, v_dst_g;
- size_t vl = __riscv_vsetvl_e8m1(w);
- // s: e00, e10, e20, f00, f10, f20, g00, g10
- // t: e01, e11, e21, f01, f11, f21, g01, g11
- __riscv_vlseg8e8_v_u8m1(&v_s0, &v_s1, &v_s2, &v_s3, &v_s4, &v_s5, &v_s6,
- &v_s7, src_ptr, vl);
- __riscv_vlseg8e8_v_u8m1(&v_t0, &v_t1, &v_t2, &v_t3, &v_t4, &v_t5, &v_t6,
- &v_t7, src_ptr + src_stride, vl);
- // Calculate sum of [e00, e21] to v_e
- // Calculate sum of [f00, f21] to v_f
- // Calculate sum of [g00, g11] to v_g
- v_e0 = __riscv_vwaddu_vv_u16m2(v_s0, v_t0, vl);
- v_e1 = __riscv_vwaddu_vv_u16m2(v_s1, v_t1, vl);
- v_e2 = __riscv_vwaddu_vv_u16m2(v_s2, v_t2, vl);
- v_f0 = __riscv_vwaddu_vv_u16m2(v_s3, v_t3, vl);
- v_f1 = __riscv_vwaddu_vv_u16m2(v_s4, v_t4, vl);
- v_f2 = __riscv_vwaddu_vv_u16m2(v_s5, v_t5, vl);
- v_g0 = __riscv_vwaddu_vv_u16m2(v_s6, v_t6, vl);
- v_g1 = __riscv_vwaddu_vv_u16m2(v_s7, v_t7, vl);
-
- v_e0 = __riscv_vadd_vv_u16m2(v_e0, v_e1, vl);
- v_f0 = __riscv_vadd_vv_u16m2(v_f0, v_f1, vl);
- v_e = __riscv_vadd_vv_u16m2(v_e0, v_e2, vl);
- v_f = __riscv_vadd_vv_u16m2(v_f0, v_f2, vl);
- v_g = __riscv_vadd_vv_u16m2(v_g0, v_g1, vl);
-
- // Average in 16-bit fixed-point
- v_e = __riscv_vmulhu_vx_u16m2(v_e, coeff_a, vl);
- v_f = __riscv_vmulhu_vx_u16m2(v_f, coeff_a, vl);
- v_g = __riscv_vmulhu_vx_u16m2(v_g, coeff_b, vl);
-
- v_dst_e = __riscv_vnsrl_wx_u8m1(v_e, 0, vl);
- v_dst_f = __riscv_vnsrl_wx_u8m1(v_f, 0, vl);
- v_dst_g = __riscv_vnsrl_wx_u8m1(v_g, 0, vl);
-
- __riscv_vsseg3e8_v_u8m1(dst_ptr, v_dst_e, v_dst_f, v_dst_g, vl);
- w -= vl;
- src_ptr += 8 * vl;
- dst_ptr += 3 * vl;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_SCALEROWDOWN38_3_BOX_RVV
-void ScaleRowDown38_3_Box_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- int dst_width) {
- size_t w = (size_t)dst_width / 3u;
- const uint16_t coeff_a = (65536u / 9u);
- const uint16_t coeff_b = (65536u / 6u);
- assert((dst_width % 3 == 0) && (dst_width > 0));
- do {
- vuint8m1_t v_s0, v_s1, v_s2, v_s3, v_s4, v_s5, v_s6, v_s7;
- vuint8m1_t v_t0, v_t1, v_t2, v_t3, v_t4, v_t5, v_t6, v_t7;
- vuint8m1_t v_u0, v_u1, v_u2, v_u3, v_u4, v_u5, v_u6, v_u7;
- vuint16m2_t v_e0, v_e1, v_e2, v_e3, v_e4, v_e;
- vuint16m2_t v_f0, v_f1, v_f2, v_f3, v_f4, v_f;
- vuint16m2_t v_g0, v_g1, v_g2, v_g;
- vuint8m1_t v_dst_e, v_dst_f, v_dst_g;
- size_t vl = __riscv_vsetvl_e8m1(w);
- // s: e00, e10, e20, f00, f10, f20, g00, g10
- // t: e01, e11, e21, f01, f11, f21, g01, g11
- // u: e02, e12, e22, f02, f12, f22, g02, g12
- __riscv_vlseg8e8_v_u8m1(&v_s0, &v_s1, &v_s2, &v_s3, &v_s4, &v_s5, &v_s6,
- &v_s7, src_ptr, vl);
- __riscv_vlseg8e8_v_u8m1(&v_t0, &v_t1, &v_t2, &v_t3, &v_t4, &v_t5, &v_t6,
- &v_t7, src_ptr + src_stride, vl);
- __riscv_vlseg8e8_v_u8m1(&v_u0, &v_u1, &v_u2, &v_u3, &v_u4, &v_u5, &v_u6,
- &v_u7, src_ptr + 2 * src_stride, vl);
- // Calculate sum of [e00, e22]
- v_e0 = __riscv_vwaddu_vv_u16m2(v_s0, v_t0, vl);
- v_e1 = __riscv_vwaddu_vv_u16m2(v_s1, v_t1, vl);
- v_e2 = __riscv_vwaddu_vv_u16m2(v_s2, v_t2, vl);
- v_e3 = __riscv_vwaddu_vv_u16m2(v_u0, v_u1, vl);
- v_e4 = __riscv_vwaddu_vx_u16m2(v_u2, 0, vl);
-
- v_e0 = __riscv_vadd_vv_u16m2(v_e0, v_e1, vl);
- v_e2 = __riscv_vadd_vv_u16m2(v_e2, v_e3, vl);
- v_e0 = __riscv_vadd_vv_u16m2(v_e0, v_e4, vl);
- v_e = __riscv_vadd_vv_u16m2(v_e0, v_e2, vl);
- // Calculate sum of [f00, f22]
- v_f0 = __riscv_vwaddu_vv_u16m2(v_s3, v_t3, vl);
- v_f1 = __riscv_vwaddu_vv_u16m2(v_s4, v_t4, vl);
- v_f2 = __riscv_vwaddu_vv_u16m2(v_s5, v_t5, vl);
- v_f3 = __riscv_vwaddu_vv_u16m2(v_u3, v_u4, vl);
- v_f4 = __riscv_vwaddu_vx_u16m2(v_u5, 0, vl);
-
- v_f0 = __riscv_vadd_vv_u16m2(v_f0, v_f1, vl);
- v_f2 = __riscv_vadd_vv_u16m2(v_f2, v_f3, vl);
- v_f0 = __riscv_vadd_vv_u16m2(v_f0, v_f4, vl);
- v_f = __riscv_vadd_vv_u16m2(v_f0, v_f2, vl);
- // Calculate sum of [g00, g12]
- v_g0 = __riscv_vwaddu_vv_u16m2(v_s6, v_t6, vl);
- v_g1 = __riscv_vwaddu_vv_u16m2(v_s7, v_t7, vl);
- v_g2 = __riscv_vwaddu_vv_u16m2(v_u6, v_u7, vl);
-
- v_g = __riscv_vadd_vv_u16m2(v_g0, v_g1, vl);
- v_g = __riscv_vadd_vv_u16m2(v_g, v_g2, vl);
-
- // Average in 16-bit fixed-point
- v_e = __riscv_vmulhu_vx_u16m2(v_e, coeff_a, vl);
- v_f = __riscv_vmulhu_vx_u16m2(v_f, coeff_a, vl);
- v_g = __riscv_vmulhu_vx_u16m2(v_g, coeff_b, vl);
-
- v_dst_e = __riscv_vnsrl_wx_u8m1(v_e, 0, vl);
- v_dst_f = __riscv_vnsrl_wx_u8m1(v_f, 0, vl);
- v_dst_g = __riscv_vnsrl_wx_u8m1(v_g, 0, vl);
- __riscv_vsseg3e8_v_u8m1(dst_ptr, v_dst_e, v_dst_f, v_dst_g, vl);
- w -= vl;
- src_ptr += 8 * vl;
- dst_ptr += 3 * vl;
- } while (w > 0);
-}
-#endif
-
-// ScaleUVRowUp2_(Bi)linear_RVV function is equal to other platforms'
-// ScaleRowUp2_(Bi)linear_Any_XXX. We process entire row in this function. Other
-// platforms only implement non-edge part of image and process edge with scalar.
-
-#ifdef HAS_SCALEROWUP2_LINEAR_RVV
-void ScaleRowUp2_Linear_RVV(const uint8_t* src_ptr,
- uint8_t* dst_ptr,
- int dst_width) {
- size_t work_width = (size_t)dst_width - 1u;
- size_t src_width = work_width >> 1u;
- const uint8_t* work_src_ptr = src_ptr;
- uint8_t* work_dst_ptr = dst_ptr + 1;
- size_t vl = __riscv_vsetvlmax_e8m4();
- vuint8m4_t v_3 = __riscv_vmv_v_x_u8m4(3, vl);
- dst_ptr[0] = src_ptr[0];
- while (src_width > 0) {
- vuint8m4_t v_src0, v_src1, v_dst_odd, v_dst_even;
- vuint16m8_t v_src0_u16, v_src1_u16;
- size_t vl = __riscv_vsetvl_e8m4(src_width);
- v_src0 = __riscv_vle8_v_u8m4(work_src_ptr, vl);
- v_src1 = __riscv_vle8_v_u8m4(work_src_ptr + 1, vl);
-
- v_src0_u16 = __riscv_vwaddu_vx_u16m8(v_src0, 2, vl);
- v_src1_u16 = __riscv_vwaddu_vx_u16m8(v_src1, 2, vl);
- v_src0_u16 = __riscv_vwmaccu_vv_u16m8(v_src0_u16, v_3, v_src1, vl);
- v_src1_u16 = __riscv_vwmaccu_vv_u16m8(v_src1_u16, v_3, v_src0, vl);
-
- v_dst_odd = __riscv_vnsrl_wx_u8m4(v_src0_u16, 2, vl);
- v_dst_even = __riscv_vnsrl_wx_u8m4(v_src1_u16, 2, vl);
-
- __riscv_vsseg2e8_v_u8m4(work_dst_ptr, v_dst_even, v_dst_odd, vl);
-
- src_width -= vl;
- work_src_ptr += vl;
- work_dst_ptr += 2 * vl;
- }
- dst_ptr[dst_width - 1] = src_ptr[(dst_width - 1) / 2];
-}
-#endif
-
-#ifdef HAS_SCALEROWUP2_BILINEAR_RVV
-void ScaleRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- ptrdiff_t dst_stride,
- int dst_width) {
- size_t work_width = ((size_t)dst_width - 1u) & ~1u;
- size_t src_width = work_width >> 1u;
- const uint8_t* work_s = src_ptr;
- const uint8_t* work_t = src_ptr + src_stride;
- const uint8_t* s = work_s;
- const uint8_t* t = work_t;
- uint8_t* d = dst_ptr;
- uint8_t* e = dst_ptr + dst_stride;
- uint8_t* work_d = d + 1;
- uint8_t* work_e = e + 1;
- size_t vl = __riscv_vsetvlmax_e16m4();
- vuint16m4_t v_3_u16 = __riscv_vmv_v_x_u16m4(3, vl);
- vuint8m2_t v_3_u8 = __riscv_vmv_v_x_u8m2(3, vl);
- d[0] = (3 * s[0] + t[0] + 2) >> 2;
- e[0] = (s[0] + 3 * t[0] + 2) >> 2;
- while (src_width > 0) {
- vuint8m2_t v_s0, v_s1, v_t0, v_t1;
- vuint16m4_t v_s0_u16, v_s1_u16, v_t0_u16, v_t1_u16;
- vuint16m4_t v_t0_u16_, v_t1_u16_;
- vuint8m2_t v_dst0_even, v_dst0_odd, v_dst1_even, v_dst1_odd;
- size_t vl = __riscv_vsetvl_e8m2(src_width);
- v_s0 = __riscv_vle8_v_u8m2(work_s, vl);
- v_s1 = __riscv_vle8_v_u8m2(work_s + 1, vl);
-
- v_s0_u16 = __riscv_vwaddu_vx_u16m4(v_s0, 2, vl);
- v_s1_u16 = __riscv_vwaddu_vx_u16m4(v_s1, 2, vl);
- v_s0_u16 = __riscv_vwmaccu_vv_u16m4(v_s0_u16, v_3_u8, v_s1, vl);
- v_s1_u16 = __riscv_vwmaccu_vv_u16m4(v_s1_u16, v_3_u8, v_s0, vl);
-
- v_t0 = __riscv_vle8_v_u8m2(work_t, vl);
- v_t1 = __riscv_vle8_v_u8m2(work_t + 1, vl);
-
- v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_t0, 2, vl);
- v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_t1, 2, vl);
- v_t0_u16 = __riscv_vwmaccu_vv_u16m4(v_t0_u16, v_3_u8, v_t1, vl);
- v_t1_u16 = __riscv_vwmaccu_vv_u16m4(v_t1_u16, v_3_u8, v_t0, vl);
-
- v_t0_u16_ = __riscv_vmv_v_v_u16m4(v_t0_u16, vl);
- v_t1_u16_ = __riscv_vmv_v_v_u16m4(v_t1_u16, vl);
-
- v_t0_u16 = __riscv_vmacc_vv_u16m4(v_t0_u16, v_3_u16, v_s0_u16, vl);
- v_t1_u16 = __riscv_vmacc_vv_u16m4(v_t1_u16, v_3_u16, v_s1_u16, vl);
- v_s0_u16 = __riscv_vmacc_vv_u16m4(v_s0_u16, v_3_u16, v_t0_u16_, vl);
- v_s1_u16 = __riscv_vmacc_vv_u16m4(v_s1_u16, v_3_u16, v_t1_u16_, vl);
-
- v_dst0_odd = __riscv_vnsrl_wx_u8m2(v_t0_u16, 4, vl);
- v_dst0_even = __riscv_vnsrl_wx_u8m2(v_t1_u16, 4, vl);
- v_dst1_odd = __riscv_vnsrl_wx_u8m2(v_s0_u16, 4, vl);
- v_dst1_even = __riscv_vnsrl_wx_u8m2(v_s1_u16, 4, vl);
-
- __riscv_vsseg2e8_v_u8m2(work_d, v_dst0_even, v_dst0_odd, vl);
- __riscv_vsseg2e8_v_u8m2(work_e, v_dst1_even, v_dst1_odd, vl);
-
- src_width -= vl;
- work_s += vl;
- work_t += vl;
- work_d += 2 * vl;
- work_e += 2 * vl;
- }
- d[dst_width - 1] =
- (3 * s[(dst_width - 1) / 2] + t[(dst_width - 1) / 2] + 2) >> 2;
- e[dst_width - 1] =
- (s[(dst_width - 1) / 2] + 3 * t[(dst_width - 1) / 2] + 2) >> 2;
-}
-#endif
-
-#ifdef HAS_SCALEUVROWDOWN2_RVV
-void ScaleUVRowDown2_RVV(const uint8_t* src_uv,
- ptrdiff_t src_stride,
- uint8_t* dst_uv,
- int dst_width) {
- size_t w = (size_t)dst_width;
- const uint32_t* src = (const uint32_t*)src_uv;
- uint16_t* dst = (uint16_t*)dst_uv;
- (void)src_stride;
- do {
- size_t vl = __riscv_vsetvl_e32m8(w);
- vuint32m8_t v_data = __riscv_vle32_v_u32m8(src, vl);
- vuint16m4_t v_u1v1 = __riscv_vnsrl_wx_u16m4(v_data, 16, vl);
- __riscv_vse16_v_u16m4(dst, v_u1v1, vl);
- w -= vl;
- src += vl;
- dst += vl;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_SCALEUVROWDOWN2LINEAR_RVV
-void ScaleUVRowDown2Linear_RVV(const uint8_t* src_uv,
- ptrdiff_t src_stride,
- uint8_t* dst_uv,
- int dst_width) {
- size_t w = (size_t)dst_width;
- const uint16_t* src = (const uint16_t*)src_uv;
- (void)src_stride;
- // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
- // register) is set to round-to-nearest-up mode(0).
- asm volatile("csrwi vxrm, 0");
- do {
- vuint8m4_t v_u0v0, v_u1v1, v_avg;
- vuint16m4_t v_u0v0_16, v_u1v1_16;
- size_t vl = __riscv_vsetvl_e16m4(w);
- __riscv_vlseg2e16_v_u16m4(&v_u0v0_16, &v_u1v1_16, src, vl);
- v_u0v0 = __riscv_vreinterpret_v_u16m4_u8m4(v_u0v0_16);
- v_u1v1 = __riscv_vreinterpret_v_u16m4_u8m4(v_u1v1_16);
- // Use round-to-nearest-up mode for averaging add
- v_avg = __riscv_vaaddu_vv_u8m4(v_u0v0, v_u1v1, vl * 2);
- __riscv_vse8_v_u8m4(dst_uv, v_avg, vl * 2);
- w -= vl;
- src += vl * 2;
- dst_uv += vl * 2;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_SCALEUVROWDOWN2BOX_RVV
-void ScaleUVRowDown2Box_RVV(const uint8_t* src_uv,
- ptrdiff_t src_stride,
- uint8_t* dst_uv,
- int dst_width) {
- const uint8_t* src_uv_row1 = src_uv + src_stride;
- size_t w = (size_t)dst_width;
- // NOTE: To match behavior on other platforms, vxrm (fixed-point rounding mode
- // register) is set to round-to-nearest-up mode(0).
- asm volatile("csrwi vxrm, 0");
- do {
- vuint8m2_t v_u0_row0, v_v0_row0, v_u1_row0, v_v1_row0;
- vuint8m2_t v_u0_row1, v_v0_row1, v_u1_row1, v_v1_row1;
- vuint16m4_t v_u0u1_row0, v_u0u1_row1, v_v0v1_row0, v_v0v1_row1;
- vuint16m4_t v_sum0, v_sum1;
- vuint8m2_t v_dst_u, v_dst_v;
- size_t vl = __riscv_vsetvl_e8m2(w);
-
- __riscv_vlseg4e8_v_u8m2(&v_u0_row0, &v_v0_row0, &v_u1_row0, &v_v1_row0,
- src_uv, vl);
- __riscv_vlseg4e8_v_u8m2(&v_u0_row1, &v_v0_row1, &v_u1_row1, &v_v1_row1,
- src_uv_row1, vl);
-
- v_u0u1_row0 = __riscv_vwaddu_vv_u16m4(v_u0_row0, v_u1_row0, vl);
- v_u0u1_row1 = __riscv_vwaddu_vv_u16m4(v_u0_row1, v_u1_row1, vl);
- v_v0v1_row0 = __riscv_vwaddu_vv_u16m4(v_v0_row0, v_v1_row0, vl);
- v_v0v1_row1 = __riscv_vwaddu_vv_u16m4(v_v0_row1, v_v1_row1, vl);
-
- v_sum0 = __riscv_vadd_vv_u16m4(v_u0u1_row0, v_u0u1_row1, vl);
- v_sum1 = __riscv_vadd_vv_u16m4(v_v0v1_row0, v_v0v1_row1, vl);
- // Use round-to-nearest-up mode for vnclip
- v_dst_u = __riscv_vnclipu_wx_u8m2(v_sum0, 2, vl);
- v_dst_v = __riscv_vnclipu_wx_u8m2(v_sum1, 2, vl);
-
- __riscv_vsseg2e8_v_u8m2(dst_uv, v_dst_u, v_dst_v, vl);
-
- dst_uv += 2 * vl;
- src_uv += 4 * vl;
- w -= vl;
- src_uv_row1 += 4 * vl;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_SCALEUVROWDOWN4_RVV
-void ScaleUVRowDown4_RVV(const uint8_t* src_uv,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8_t* dst_uv,
- int dst_width) {
- // Overflow will never happen here, since sizeof(size_t)/sizeof(int)=2.
- // dst_width = src_width / 4 and src_width is also int.
- size_t w = (size_t)dst_width * 8;
- (void)src_stride;
- (void)src_stepx;
- do {
- size_t vl = __riscv_vsetvl_e8m8(w);
- vuint8m8_t v_row = __riscv_vle8_v_u8m8(src_uv, vl);
- vuint64m8_t v_row_64 = __riscv_vreinterpret_v_u8m8_u64m8(v_row);
- // Narrowing without clipping
- vuint32m4_t v_tmp = __riscv_vncvt_x_x_w_u32m4(v_row_64, vl / 8);
- vuint16m2_t v_dst_16 = __riscv_vncvt_x_x_w_u16m2(v_tmp, vl / 8);
- vuint8m2_t v_dst = __riscv_vreinterpret_v_u16m2_u8m2(v_dst_16);
- __riscv_vse8_v_u8m2(dst_uv, v_dst, vl / 4);
- w -= vl;
- src_uv += vl;
- dst_uv += vl / 4;
- } while (w > 0);
-}
-#endif
-
-#ifdef HAS_SCALEUVROWDOWNEVEN_RVV
-void ScaleUVRowDownEven_RVV(const uint8_t* src_uv,
- ptrdiff_t src_stride,
- int src_stepx,
- uint8_t* dst_uv,
- int dst_width) {
- size_t w = (size_t)dst_width;
- const ptrdiff_t stride_byte = (ptrdiff_t)src_stepx * 2;
- const uint16_t* src = (const uint16_t*)(src_uv);
- uint16_t* dst = (uint16_t*)(dst_uv);
- (void)src_stride;
- do {
- size_t vl = __riscv_vsetvl_e16m8(w);
- vuint16m8_t v_row = __riscv_vlse16_v_u16m8(src, stride_byte, vl);
- __riscv_vse16_v_u16m8(dst, v_row, vl);
- w -= vl;
- src += vl * src_stepx;
- dst += vl;
- } while (w > 0);
-}
-#endif
-
-// ScaleUVRowUp2_(Bi)linear_RVV function is equal to other platforms'
-// ScaleUVRowUp2_(Bi)linear_Any_XXX. We process entire row in this function.
-// Other platforms only implement non-edge part of image and process edge with
-// scalar.
-
-#ifdef HAS_SCALEUVROWUP2_LINEAR_RVV
-void ScaleUVRowUp2_Linear_RVV(const uint8_t* src_ptr,
- uint8_t* dst_ptr,
- int dst_width) {
- size_t work_width = ((size_t)dst_width - 1u) & ~1u;
- uint16_t* work_dst_ptr = (uint16_t*)dst_ptr + 1;
- const uint8_t* work_src_ptr = src_ptr;
- size_t vl = __riscv_vsetvlmax_e8m4();
- vuint8m4_t v_3_u8 = __riscv_vmv_v_x_u8m4(3, vl);
- dst_ptr[0] = src_ptr[0];
- dst_ptr[1] = src_ptr[1];
- while (work_width > 0) {
- vuint8m4_t v_uv0, v_uv1, v_dst_odd_u8, v_dst_even_u8;
- vuint16m4_t v_dst_odd, v_dst_even;
- vuint16m8_t v_uv0_u16, v_uv1_u16;
- size_t vl = __riscv_vsetvl_e8m4(work_width);
- v_uv0 = __riscv_vle8_v_u8m4(work_src_ptr, vl);
- v_uv1 = __riscv_vle8_v_u8m4(work_src_ptr + 2, vl);
-
- v_uv0_u16 = __riscv_vwaddu_vx_u16m8(v_uv0, 2, vl);
- v_uv1_u16 = __riscv_vwaddu_vx_u16m8(v_uv1, 2, vl);
-
- v_uv0_u16 = __riscv_vwmaccu_vv_u16m8(v_uv0_u16, v_3_u8, v_uv1, vl);
- v_uv1_u16 = __riscv_vwmaccu_vv_u16m8(v_uv1_u16, v_3_u8, v_uv0, vl);
-
- v_dst_odd_u8 = __riscv_vnsrl_wx_u8m4(v_uv0_u16, 2, vl);
- v_dst_even_u8 = __riscv_vnsrl_wx_u8m4(v_uv1_u16, 2, vl);
-
- v_dst_even = __riscv_vreinterpret_v_u8m4_u16m4(v_dst_even_u8);
- v_dst_odd = __riscv_vreinterpret_v_u8m4_u16m4(v_dst_odd_u8);
-
- __riscv_vsseg2e16_v_u16m4(work_dst_ptr, v_dst_even, v_dst_odd, vl / 2);
-
- work_width -= vl;
- work_src_ptr += vl;
- work_dst_ptr += vl;
- }
- dst_ptr[2 * dst_width - 2] = src_ptr[((dst_width + 1) & ~1) - 2];
- dst_ptr[2 * dst_width - 1] = src_ptr[((dst_width + 1) & ~1) - 1];
-}
-#endif
-
-#ifdef HAS_SCALEUVROWUP2_BILINEAR_RVV
-void ScaleUVRowUp2_Bilinear_RVV(const uint8_t* src_ptr,
- ptrdiff_t src_stride,
- uint8_t* dst_ptr,
- ptrdiff_t dst_stride,
- int dst_width) {
- size_t work_width = ((size_t)dst_width - 1u) & ~1u;
- const uint8_t* work_s = src_ptr;
- const uint8_t* work_t = src_ptr + src_stride;
- const uint8_t* s = work_s;
- const uint8_t* t = work_t;
- uint8_t* d = dst_ptr;
- uint8_t* e = dst_ptr + dst_stride;
- uint16_t* work_d = (uint16_t*)d + 1;
- uint16_t* work_e = (uint16_t*)e + 1;
- size_t vl = __riscv_vsetvlmax_e16m4();
- vuint16m4_t v_3_u16 = __riscv_vmv_v_x_u16m4(3, vl);
- vuint8m2_t v_3_u8 = __riscv_vmv_v_x_u8m2(3, vl);
- d[0] = (3 * s[0] + t[0] + 2) >> 2;
- e[0] = (s[0] + 3 * t[0] + 2) >> 2;
- d[1] = (3 * s[1] + t[1] + 2) >> 2;
- e[1] = (s[1] + 3 * t[1] + 2) >> 2;
- while (work_width > 0) {
- vuint8m2_t v_s0, v_s1, v_t0, v_t1;
- vuint16m4_t v_s0_u16, v_s1_u16, v_t0_u16, v_t1_u16;
- vuint16m4_t v_t0_u16_, v_t1_u16_;
- vuint8m2_t v_dst0_odd_u8, v_dst0_even_u8, v_dst1_odd_u8, v_dst1_even_u8;
- vuint16m2_t v_dst0_even, v_dst0_odd, v_dst1_even, v_dst1_odd;
- size_t vl = __riscv_vsetvl_e8m2(work_width);
- v_s0 = __riscv_vle8_v_u8m2(work_s, vl);
- v_s1 = __riscv_vle8_v_u8m2(work_s + 2, vl);
-
- v_s0_u16 = __riscv_vwaddu_vx_u16m4(v_s0, 2, vl);
- v_s1_u16 = __riscv_vwaddu_vx_u16m4(v_s1, 2, vl);
- v_s0_u16 = __riscv_vwmaccu_vv_u16m4(v_s0_u16, v_3_u8, v_s1, vl);
- v_s1_u16 = __riscv_vwmaccu_vv_u16m4(v_s1_u16, v_3_u8, v_s0, vl);
-
- v_t0 = __riscv_vle8_v_u8m2(work_t, vl);
- v_t1 = __riscv_vle8_v_u8m2(work_t + 2, vl);
-
- v_t0_u16 = __riscv_vwaddu_vx_u16m4(v_t0, 2, vl);
- v_t1_u16 = __riscv_vwaddu_vx_u16m4(v_t1, 2, vl);
- v_t0_u16 = __riscv_vwmaccu_vv_u16m4(v_t0_u16, v_3_u8, v_t1, vl);
- v_t1_u16 = __riscv_vwmaccu_vv_u16m4(v_t1_u16, v_3_u8, v_t0, vl);
-
- v_t0_u16_ = __riscv_vmv_v_v_u16m4(v_t0_u16, vl);
- v_t1_u16_ = __riscv_vmv_v_v_u16m4(v_t1_u16, vl);
-
- v_t0_u16 = __riscv_vmacc_vv_u16m4(v_t0_u16, v_3_u16, v_s0_u16, vl);
- v_t1_u16 = __riscv_vmacc_vv_u16m4(v_t1_u16, v_3_u16, v_s1_u16, vl);
- v_s0_u16 = __riscv_vmacc_vv_u16m4(v_s0_u16, v_3_u16, v_t0_u16_, vl);
- v_s1_u16 = __riscv_vmacc_vv_u16m4(v_s1_u16, v_3_u16, v_t1_u16_, vl);
-
- v_dst0_odd_u8 = __riscv_vnsrl_wx_u8m2(v_t0_u16, 4, vl);
- v_dst0_even_u8 = __riscv_vnsrl_wx_u8m2(v_t1_u16, 4, vl);
- v_dst1_odd_u8 = __riscv_vnsrl_wx_u8m2(v_s0_u16, 4, vl);
- v_dst1_even_u8 = __riscv_vnsrl_wx_u8m2(v_s1_u16, 4, vl);
-
- v_dst0_even = __riscv_vreinterpret_v_u8m2_u16m2(v_dst0_even_u8);
- v_dst0_odd = __riscv_vreinterpret_v_u8m2_u16m2(v_dst0_odd_u8);
- v_dst1_even = __riscv_vreinterpret_v_u8m2_u16m2(v_dst1_even_u8);
- v_dst1_odd = __riscv_vreinterpret_v_u8m2_u16m2(v_dst1_odd_u8);
-
- __riscv_vsseg2e16_v_u16m2(work_d, v_dst0_even, v_dst0_odd, vl / 2);
- __riscv_vsseg2e16_v_u16m2(work_e, v_dst1_even, v_dst1_odd, vl / 2);
-
- work_width -= vl;
- work_s += vl;
- work_t += vl;
- work_d += vl;
- work_e += vl;
- }
- d[2 * dst_width - 2] =
- (3 * s[((dst_width + 1) & ~1) - 2] + t[((dst_width + 1) & ~1) - 2] + 2) >>
- 2;
- e[2 * dst_width - 2] =
- (s[((dst_width + 1) & ~1) - 2] + 3 * t[((dst_width + 1) & ~1) - 2] + 2) >>
- 2;
- d[2 * dst_width - 1] =
- (3 * s[((dst_width + 1) & ~1) - 1] + t[((dst_width + 1) & ~1) - 1] + 2) >>
- 2;
- e[2 * dst_width - 1] =
- (s[((dst_width + 1) & ~1) - 1] + 3 * t[((dst_width + 1) & ~1) - 1] + 2) >>
- 2;
-}
-#endif
-
-#ifdef __cplusplus
-} // extern "C"
-} // namespace libyuv
-#endif
-
-#endif // !defined(LIBYUV_DISABLE_RVV) && defined(__riscv_vector) &&
- // defined(__clang__)
diff --git a/source/scale_uv.cc b/source/scale_uv.cc
index 536b943..1556071 100644
--- a/source/scale_uv.cc
+++ b/source/scale_uv.cc
@@ -128,15 +128,6 @@
}
}
#endif
-#if defined(HAS_SCALEUVROWDOWN2_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- ScaleUVRowDown2 =
- filtering == kFilterNone
- ? ScaleUVRowDown2_RVV
- : (filtering == kFilterLinear ? ScaleUVRowDown2Linear_RVV
- : ScaleUVRowDown2Box_RVV);
- }
-#endif
// This code is not enabled. Only box filter is available at this time.
#if defined(HAS_SCALEUVROWDOWN2_SSSE3)
@@ -240,11 +231,6 @@
}
}
#endif
-#if defined(HAS_SCALEUVROWDOWN2BOX_RVV)
- if (TestCpuFlag(kCpuHasRVV)) {
- ScaleUVRowDown2 = ScaleUVRowDown2Box_RVV;
- }
-#endif
for (j = 0; j < dst_height; ++j) {
ScaleUVRowDown2(src_uv, src_stride, row, dst_width * 2);
@@ -324,12 +310,6 @@
}
}
#endif
-#if defined(HAS_SCALEUVROWDOWNEVEN_RVV)
- if (TestCpuFlag(kCpuHasRVV) && !filtering) {
- ScaleUVRowDownEven =
- (col_step == 4) ? ScaleUVRowDown4_RVV : ScaleUVRowDownEven_RVV;
- }
-#endif
if (filtering == kFilterLinear) {
src_stride = 0;
@@ -657,14 +637,14 @@
// This is an optimized version for scaling up a plane to 2 times of
// its original width, using linear interpolation.
// This is used to scale U and V planes of NV16 to NV24.
-static void ScaleUVLinearUp2(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_uv,
- uint8_t* dst_uv) {
+void ScaleUVLinearUp2(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_uv,
+ uint8_t* dst_uv) {
void (*ScaleRowUp)(const uint8_t* src_uv, uint8_t* dst_uv, int dst_width) =
ScaleUVRowUp2_Linear_Any_C;
int i;
@@ -692,12 +672,6 @@
}
#endif
-#ifdef HAS_SCALEUVROWUP2_LINEAR_RVV
- if (TestCpuFlag(kCpuHasRVV)) {
- ScaleRowUp = ScaleUVRowUp2_Linear_RVV;
- }
-#endif
-
if (dst_height == 1) {
ScaleRowUp(src_uv + ((src_height - 1) / 2) * (intptr_t)src_stride, dst_uv,
dst_width);
@@ -716,14 +690,14 @@
// This is an optimized version for scaling up a plane to 2 times of
// its original size, using bilinear interpolation.
// This is used to scale U and V planes of NV12 to NV24.
-static void ScaleUVBilinearUp2(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint8_t* src_ptr,
- uint8_t* dst_ptr) {
+void ScaleUVBilinearUp2(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint8_t* src_ptr,
+ uint8_t* dst_ptr) {
void (*Scale2RowUp)(const uint8_t* src_ptr, ptrdiff_t src_stride,
uint8_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
ScaleUVRowUp2_Bilinear_Any_C;
@@ -751,12 +725,6 @@
}
#endif
-#ifdef HAS_SCALEUVROWUP2_BILINEAR_RVV
- if (TestCpuFlag(kCpuHasRVV)) {
- Scale2RowUp = ScaleUVRowUp2_Bilinear_RVV;
- }
-#endif
-
Scale2RowUp(src_ptr, 0, dst_ptr, 0, dst_width);
dst_ptr += dst_stride;
for (x = 0; x < src_height - 1; ++x) {
@@ -776,14 +744,14 @@
// This is an optimized version for scaling up a plane to 2 times of
// its original width, using linear interpolation.
// This is used to scale U and V planes of P210 to P410.
-static void ScaleUVLinearUp2_16(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint16_t* src_uv,
- uint16_t* dst_uv) {
+void ScaleUVLinearUp2_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_uv,
+ uint16_t* dst_uv) {
void (*ScaleRowUp)(const uint16_t* src_uv, uint16_t* dst_uv, int dst_width) =
ScaleUVRowUp2_Linear_16_Any_C;
int i;
@@ -829,14 +797,14 @@
// This is an optimized version for scaling up a plane to 2 times of
// its original size, using bilinear interpolation.
// This is used to scale U and V planes of P010 to P410.
-static void ScaleUVBilinearUp2_16(int src_width,
- int src_height,
- int dst_width,
- int dst_height,
- int src_stride,
- int dst_stride,
- const uint16_t* src_ptr,
- uint16_t* dst_ptr) {
+void ScaleUVBilinearUp2_16(int src_width,
+ int src_height,
+ int dst_width,
+ int dst_height,
+ int src_stride,
+ int dst_stride,
+ const uint16_t* src_ptr,
+ uint16_t* dst_ptr) {
void (*Scale2RowUp)(const uint16_t* src_ptr, ptrdiff_t src_stride,
uint16_t* dst_ptr, ptrdiff_t dst_stride, int dst_width) =
ScaleUVRowUp2_Bilinear_16_Any_C;
diff --git a/tools_libyuv/OWNERS b/tools_libyuv/OWNERS
deleted file mode 100644
index aae4fb6..0000000
--- a/tools_libyuv/OWNERS
+++ /dev/null
@@ -1,4 +0,0 @@
-mbonadei@chromium.org
-fbarchard@chromium.org
-pbos@chromium.org
-
diff --git a/tools_libyuv/autoroller/roll_deps.py b/tools_libyuv/autoroller/roll_deps.py
index d5c1089..2b57eb6 100755
--- a/tools_libyuv/autoroller/roll_deps.py
+++ b/tools_libyuv/autoroller/roll_deps.py
@@ -31,7 +31,6 @@
# Skip these dependencies (list without solution name prefix).
DONT_AUTOROLL_THESE = [
- 'third_party/fuchsia-gn-sdk',
'src/third_party/gflags/src',
'src/third_party/mockito/src',
]
diff --git a/tools_libyuv/msan/OWNERS b/tools_libyuv/msan/OWNERS
deleted file mode 100644
index 9b67a8f..0000000
--- a/tools_libyuv/msan/OWNERS
+++ /dev/null
@@ -1,3 +0,0 @@
-mbonadei@chromium.org
-fbarchard@chromium.org
-pbos@chromium.org
diff --git a/tools_libyuv/ubsan/OWNERS b/tools_libyuv/ubsan/OWNERS
deleted file mode 100644
index 9b67a8f..0000000
--- a/tools_libyuv/ubsan/OWNERS
+++ /dev/null
@@ -1,3 +0,0 @@
-mbonadei@chromium.org
-fbarchard@chromium.org
-pbos@chromium.org
diff --git a/unit_test/cpu_test.cc b/unit_test/cpu_test.cc
index 431343e..93867fa 100644
--- a/unit_test/cpu_test.cc
+++ b/unit_test/cpu_test.cc
@@ -137,9 +137,6 @@
#ifdef __riscv_vector
printf("__riscv_vector %d\n", __riscv_vector);
#endif
-#ifdef __riscv_v_intrinsic
- printf("__riscv_v_intrinsic %d\n", __riscv_v_intrinsic);
-#endif
#ifdef __APPLE__
printf("__APPLE__ %d\n", __APPLE__);
#endif
diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc
index ec1d72e..ad97b87 100644
--- a/unit_test/planar_test.cc
+++ b/unit_test/planar_test.cc
@@ -30,9 +30,9 @@
#endif
#if defined(LIBYUV_BIT_EXACT)
-#define EXPECTED_UNATTENUATE_DIFF 0
+#define EXPECTED_ATTENUATE_DIFF 0
#else
-#define EXPECTED_UNATTENUATE_DIFF 2
+#define EXPECTED_ATTENUATE_DIFF 2
#endif
namespace libyuv {
@@ -57,17 +57,12 @@
orig_pixels[2 * 4 + 0] = 16u;
orig_pixels[2 * 4 + 1] = 64u;
orig_pixels[2 * 4 + 2] = 192u;
- orig_pixels[2 * 4 + 3] = 128u;
+ orig_pixels[2 * 4 + 3] = 255u;
orig_pixels[3 * 4 + 0] = 16u;
orig_pixels[3 * 4 + 1] = 64u;
orig_pixels[3 * 4 + 2] = 192u;
- orig_pixels[3 * 4 + 3] = 255u;
- orig_pixels[4 * 4 + 0] = 255u;
- orig_pixels[4 * 4 + 1] = 255u;
- orig_pixels[4 * 4 + 2] = 255u;
- orig_pixels[4 * 4 + 3] = 255u;
-
- ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 5, 1);
+ orig_pixels[3 * 4 + 3] = 128u;
+ ARGBUnattenuate(orig_pixels, 0, unatten_pixels, 0, 4, 1);
EXPECT_EQ(255u, unatten_pixels[0 * 4 + 0]);
EXPECT_EQ(255u, unatten_pixels[0 * 4 + 1]);
EXPECT_EQ(254u, unatten_pixels[0 * 4 + 2]);
@@ -76,55 +71,14 @@
EXPECT_EQ(0u, unatten_pixels[1 * 4 + 1]);
EXPECT_EQ(0u, unatten_pixels[1 * 4 + 2]);
EXPECT_EQ(0u, unatten_pixels[1 * 4 + 3]);
- EXPECT_EQ(32u, unatten_pixels[2 * 4 + 0]);
- EXPECT_EQ(128u, unatten_pixels[2 * 4 + 1]);
- EXPECT_EQ(255u, unatten_pixels[2 * 4 + 2]);
- EXPECT_EQ(128u, unatten_pixels[2 * 4 + 3]);
- EXPECT_EQ(16u, unatten_pixels[3 * 4 + 0]);
- EXPECT_EQ(64u, unatten_pixels[3 * 4 + 1]);
- EXPECT_EQ(192u, unatten_pixels[3 * 4 + 2]);
- EXPECT_EQ(255u, unatten_pixels[3 * 4 + 3]);
- EXPECT_EQ(255u, unatten_pixels[4 * 4 + 0]);
- EXPECT_EQ(255u, unatten_pixels[4 * 4 + 1]);
- EXPECT_EQ(255u, unatten_pixels[4 * 4 + 2]);
- EXPECT_EQ(255u, unatten_pixels[4 * 4 + 3]);
-
- ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 5, 1);
- EXPECT_EQ(100u, atten_pixels[0 * 4 + 0]);
- EXPECT_EQ(65u, atten_pixels[0 * 4 + 1]);
- EXPECT_EQ(64u, atten_pixels[0 * 4 + 2]);
- EXPECT_EQ(128u, atten_pixels[0 * 4 + 3]);
- EXPECT_EQ(0u, atten_pixels[1 * 4 + 0]);
- EXPECT_EQ(0u, atten_pixels[1 * 4 + 1]);
- EXPECT_EQ(0u, atten_pixels[1 * 4 + 2]);
- EXPECT_EQ(0u, atten_pixels[1 * 4 + 3]);
- EXPECT_EQ(8u, atten_pixels[2 * 4 + 0]);
- EXPECT_EQ(32u, atten_pixels[2 * 4 + 1]);
- EXPECT_EQ(96u, atten_pixels[2 * 4 + 2]);
- EXPECT_EQ(128u, atten_pixels[2 * 4 + 3]);
- EXPECT_EQ(16u, atten_pixels[3 * 4 + 0]);
- EXPECT_EQ(64u, atten_pixels[3 * 4 + 1]);
- EXPECT_EQ(192u, atten_pixels[3 * 4 + 2]);
- EXPECT_EQ(255u, atten_pixels[3 * 4 + 3]);
- EXPECT_EQ(255u, atten_pixels[4 * 4 + 0]);
- EXPECT_EQ(255u, atten_pixels[4 * 4 + 1]);
- EXPECT_EQ(255u, atten_pixels[4 * 4 + 2]);
- EXPECT_EQ(255u, atten_pixels[4 * 4 + 3]);
-
- // test 255
- for (int i = 0; i < 256; ++i) {
- orig_pixels[i * 4 + 0] = i;
- orig_pixels[i * 4 + 1] = 0;
- orig_pixels[i * 4 + 2] = 0;
- orig_pixels[i * 4 + 3] = 255;
- }
- ARGBAttenuate(orig_pixels, 0, atten_pixels, 0, 256, 1);
- for (int i = 0; i < 256; ++i) {
- EXPECT_EQ(orig_pixels[i * 4 + 0], atten_pixels[i * 4 + 0]);
- EXPECT_EQ(0, atten_pixels[i * 4 + 1]);
- EXPECT_EQ(0, atten_pixels[i * 4 + 2]);
- EXPECT_EQ(255, atten_pixels[i * 4 + 3]);
- }
+ EXPECT_EQ(16u, unatten_pixels[2 * 4 + 0]);
+ EXPECT_EQ(64u, unatten_pixels[2 * 4 + 1]);
+ EXPECT_EQ(192u, unatten_pixels[2 * 4 + 2]);
+ EXPECT_EQ(255u, unatten_pixels[2 * 4 + 3]);
+ EXPECT_EQ(32u, unatten_pixels[3 * 4 + 0]);
+ EXPECT_EQ(128u, unatten_pixels[3 * 4 + 1]);
+ EXPECT_EQ(255u, unatten_pixels[3 * 4 + 2]);
+ EXPECT_EQ(128u, unatten_pixels[3 * 4 + 3]);
for (int i = 0; i < 1280; ++i) {
orig_pixels[i * 4 + 0] = i;
@@ -138,10 +92,10 @@
ARGBAttenuate(unatten_pixels, 0, atten2_pixels, 0, 1280, 1);
}
for (int i = 0; i < 1280; ++i) {
- EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 1);
- EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 1);
- EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 1);
- EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 1);
+ EXPECT_NEAR(atten_pixels[i * 4 + 0], atten2_pixels[i * 4 + 0], 2);
+ EXPECT_NEAR(atten_pixels[i * 4 + 1], atten2_pixels[i * 4 + 1], 2);
+ EXPECT_NEAR(atten_pixels[i * 4 + 2], atten2_pixels[i * 4 + 2], 2);
+ EXPECT_NEAR(atten_pixels[i * 4 + 3], atten2_pixels[i * 4 + 3], 2);
}
// Make sure transparent, 50% and opaque are fully accurate.
EXPECT_EQ(0, atten_pixels[0 * 4 + 0]);
@@ -152,9 +106,9 @@
EXPECT_EQ(32, atten_pixels[128 * 4 + 1]);
EXPECT_EQ(21, atten_pixels[128 * 4 + 2]);
EXPECT_EQ(128, atten_pixels[128 * 4 + 3]);
- EXPECT_EQ(255, atten_pixels[255 * 4 + 0]);
- EXPECT_EQ(127, atten_pixels[255 * 4 + 1]);
- EXPECT_EQ(85, atten_pixels[255 * 4 + 2]);
+ EXPECT_NEAR(254, atten_pixels[255 * 4 + 0], EXPECTED_ATTENUATE_DIFF);
+ EXPECT_NEAR(127, atten_pixels[255 * 4 + 1], EXPECTED_ATTENUATE_DIFF);
+ EXPECT_NEAR(85, atten_pixels[255 * 4 + 2], EXPECTED_ATTENUATE_DIFF);
EXPECT_EQ(255, atten_pixels[255 * 4 + 3]);
free_aligned_buffer_page_end(atten2_pixels);
@@ -211,28 +165,28 @@
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
- EXPECT_EQ(max_diff, 0);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Unaligned) {
int max_diff =
TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
- EXPECT_EQ(max_diff, 0);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Invert) {
int max_diff =
TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, -1, 0);
- EXPECT_EQ(max_diff, 0);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBAttenuate_Opt) {
int max_diff =
TestAttenuateI(benchmark_width_, benchmark_height_, benchmark_iterations_,
disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
- EXPECT_EQ(max_diff, 0);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
static int TestUnattenuateI(int width,
@@ -284,28 +238,28 @@
int max_diff = TestUnattenuateI(benchmark_width_ + 1, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
- EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Unaligned) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 1);
- EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Invert) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, -1, 0);
- EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, ARGBUnattenuate_Opt) {
int max_diff = TestUnattenuateI(benchmark_width_, benchmark_height_,
benchmark_iterations_, disable_cpu_flags_,
benchmark_cpu_info_, +1, 0);
- EXPECT_LE(max_diff, EXPECTED_UNATTENUATE_DIFF);
+ EXPECT_LE(max_diff, EXPECTED_ATTENUATE_DIFF);
}
TEST_F(LibYUVPlanarTest, TestARGBComputeCumulativeSum) {
@@ -2795,23 +2749,12 @@
MaskCpuFlags(disable_cpu_flags_);
ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
benchmark_width_, benchmark_width_, benchmark_height_);
- double c_time = get_time();
- ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_c,
- benchmark_width_, benchmark_width_, benchmark_height_);
- c_time = (get_time() - c_time);
-
MaskCpuFlags(benchmark_cpu_info_);
- ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
- benchmark_width_, benchmark_width_, benchmark_height_);
- double opt_time = get_time();
+
for (int i = 0; i < benchmark_iterations_; ++i) {
ARGBExtractAlpha(src_pixels, benchmark_width_ * 4, dst_pixels_opt,
benchmark_width_, benchmark_width_, benchmark_height_);
}
- opt_time = (get_time() - opt_time) / benchmark_iterations_;
- // Report performance of C vs OPT
- printf("%8d us C - %8d us OPT\n", static_cast<int>(c_time * 1e6),
- static_cast<int>(opt_time * 1e6));
for (int i = 0; i < kPixels; ++i) {
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
@@ -2834,24 +2777,12 @@
MaskCpuFlags(disable_cpu_flags_);
ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
benchmark_width_ * 4, benchmark_width_, benchmark_height_);
- double c_time = get_time();
- ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_c,
- benchmark_width_ * 4, benchmark_width_, benchmark_height_);
- c_time = (get_time() - c_time);
-
MaskCpuFlags(benchmark_cpu_info_);
- ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
- benchmark_width_ * 4, benchmark_width_, benchmark_height_);
- double opt_time = get_time();
+
for (int i = 0; i < benchmark_iterations_; ++i) {
ARGBCopyYToAlpha(orig_pixels, benchmark_width_, dst_pixels_opt,
benchmark_width_ * 4, benchmark_width_, benchmark_height_);
}
- opt_time = (get_time() - opt_time) / benchmark_iterations_;
-
- // Report performance of C vs OPT
- printf("%8d us C - %8d us OPT\n", static_cast<int>(c_time * 1e6),
- static_cast<int>(opt_time * 1e6));
for (int i = 0; i < kPixels * 4; ++i) {
EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
}
@@ -4537,83 +4468,4 @@
free_aligned_buffer_page_end(dst_vu);
}
-#if defined(ENABLE_ROW_TESTS) && !defined(LIBYUV_DISABLE_NEON) && \
- defined(__aarch64__)
-
-TEST_F(LibYUVPlanarTest, TestConvertFP16ToFP32) {
- int i, j;
- const int y_plane_size = benchmark_width_ * benchmark_height_;
-
- align_buffer_page_end(orig_f, y_plane_size * 4);
- align_buffer_page_end(orig_y, y_plane_size * 2);
- align_buffer_page_end(dst_opt, y_plane_size * 4);
- align_buffer_page_end(rec_opt, y_plane_size * 2);
-
- for (i = 0; i < y_plane_size; ++i) {
- ((float*)orig_f)[i] = (float)(i % 10000) * 3.14f;
- }
- memset(orig_y, 1, y_plane_size * 2);
- memset(dst_opt, 2, y_plane_size * 4);
- memset(rec_opt, 3, y_plane_size * 2);
-
- ConvertFP32ToFP16Row_NEON((const float*)orig_f, (uint16_t*)orig_y,
- y_plane_size);
-
- for (j = 0; j < benchmark_iterations_; j++) {
- ConvertFP16ToFP32Row_NEON((const uint16_t*)orig_y, (float*)dst_opt,
- y_plane_size);
- }
-
- ConvertFP32ToFP16Row_NEON((const float*)dst_opt, (uint16_t*)rec_opt,
- y_plane_size);
-
- for (i = 0; i < y_plane_size; ++i) {
- EXPECT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]);
- }
-
- free_aligned_buffer_page_end(orig_f);
- free_aligned_buffer_page_end(orig_y);
- free_aligned_buffer_page_end(dst_opt);
- free_aligned_buffer_page_end(rec_opt);
-}
-
-TEST_F(LibYUVPlanarTest, TestConvertFP16ToFP32Column) {
- int i, j;
- const int y_plane_size = benchmark_width_ * benchmark_height_;
-
- align_buffer_page_end(orig_f, y_plane_size * 4);
- align_buffer_page_end(orig_y, y_plane_size * 2);
- align_buffer_page_end(dst_opt, y_plane_size * 4);
- align_buffer_page_end(rec_opt, y_plane_size * 2);
-
- for (i = 0; i < y_plane_size; ++i) {
- ((float*)orig_f)[i] = (float)(i % 10000) * 3.14f;
- }
- memset(orig_y, 1, y_plane_size * 2);
- memset(dst_opt, 2, y_plane_size * 4);
- memset(rec_opt, 3, y_plane_size * 2);
-
- ConvertFP32ToFP16Row_NEON((const float*)orig_f, (uint16_t*)orig_y,
- y_plane_size);
-
- for (j = 0; j < benchmark_iterations_; j++) {
- ConvertFP16ToFP32Column_NEON((const uint16_t*)orig_y, 1, (float*)dst_opt,
- y_plane_size);
- }
-
- ConvertFP32ToFP16Row_NEON((const float*)dst_opt, (uint16_t*)rec_opt,
- y_plane_size);
-
- for (i = 0; i < y_plane_size; ++i) {
- EXPECT_EQ(((const uint16_t*)orig_y)[i], ((const uint16_t*)rec_opt)[i]);
- }
-
- free_aligned_buffer_page_end(orig_f);
- free_aligned_buffer_page_end(orig_y);
- free_aligned_buffer_page_end(dst_opt);
- free_aligned_buffer_page_end(rec_opt);
-}
-
-#endif // defined(ENABLE_ROW_TESTS) && defined(__aarch64__)
-
} // namespace libyuv
diff --git a/unit_test/scale_test.cc b/unit_test/scale_test.cc
index c2232e6..a8c9526 100644
--- a/unit_test/scale_test.cc
+++ b/unit_test/scale_test.cc
@@ -1217,6 +1217,48 @@
}
#endif // HAS_SCALEROWDOWN2_SSSE3
+extern "C" void ScaleRowUp2_16_NEON(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width);
+extern "C" void ScaleRowUp2_16_C(const uint16_t* src_ptr,
+ ptrdiff_t src_stride,
+ uint16_t* dst,
+ int dst_width);
+
+TEST_F(LibYUVScaleTest, TestScaleRowUp2_16) {
+ SIMD_ALIGNED(uint16_t orig_pixels[640 * 2 + 1]); // 2 rows + 1 pixel overrun.
+ SIMD_ALIGNED(uint16_t dst_pixels_opt[1280]);
+ SIMD_ALIGNED(uint16_t dst_pixels_c[1280]);
+
+ memset(orig_pixels, 0, sizeof(orig_pixels));
+ memset(dst_pixels_opt, 1, sizeof(dst_pixels_opt));
+ memset(dst_pixels_c, 2, sizeof(dst_pixels_c));
+
+ for (int i = 0; i < 640 * 2 + 1; ++i) {
+ orig_pixels[i] = i;
+ }
+ ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_c[0], 1280);
+ for (int i = 0; i < benchmark_pixels_div1280_; ++i) {
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+ int has_neon = TestCpuFlag(kCpuHasNEON);
+ if (has_neon) {
+ ScaleRowUp2_16_NEON(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
+ } else {
+ ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
+ }
+#else
+ ScaleRowUp2_16_C(&orig_pixels[0], 640, &dst_pixels_opt[0], 1280);
+#endif
+ }
+
+ for (int i = 0; i < 1280; ++i) {
+ EXPECT_EQ(dst_pixels_c[i], dst_pixels_opt[i]);
+ }
+ EXPECT_EQ(dst_pixels_c[0], (0 * 9 + 1 * 3 + 640 * 3 + 641 * 1 + 8) / 16);
+ EXPECT_EQ(dst_pixels_c[1279], 800);
+}
+
extern "C" void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr,
ptrdiff_t src_stride,
uint16_t* dst,