src/dsp/arm/common_neon_test.cc - platform/external/libgav1 - Git at Google

 // Copyright 2021 The libgav1 Authors
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 #include "src/dsp/arm/common_neon.h"

 #include "gtest/gtest.h"
 #include "src/utils/cpu.h"

 #if LIBGAV1_ENABLE_NEON
 #include <cstdint>

 #include "tests/block_utils.h"

 namespace libgav1 {
 namespace dsp {
 namespace {

 constexpr int kMaxBlockWidth = 16;
 constexpr int kMaxBlockHeight = 16;

 template <typename Pixel>
 class TransposeTest : public testing::Test {
  public:
   TransposeTest() {
     for (int y = 0; y < kMaxBlockHeight; ++y) {
       for (int x = 0; x < kMaxBlockWidth; ++x) {
         src_block_[y][x] = y * 16 + x;
         expected_transpose_[y][x] = x * 16 + y;
       }
     }
   }

   TransposeTest(const TransposeTest&) = delete;
   TransposeTest& operator=(const TransposeTest&) = delete;
   ~TransposeTest() override = default;

  protected:
   Pixel src_block_[kMaxBlockHeight][kMaxBlockWidth];
   Pixel expected_transpose_[kMaxBlockHeight][kMaxBlockWidth];
 };

 using TransposeTestLowBitdepth = TransposeTest<uint8_t>;

 TEST_F(TransposeTestLowBitdepth, Transpose4x4Test) {
   uint8x8_t a = Load4<1>(src_block_[1], Load4(src_block_[0]));
   uint8x8_t b = Load4<1>(src_block_[3], Load4(src_block_[2]));
   Transpose4x4(&a, &b);
   uint8_t output_4x4[4][4];
   StoreLo4(output_4x4[0], a);
   StoreLo4(output_4x4[1], b);
   StoreHi4(output_4x4[2], a);
   StoreHi4(output_4x4[3], b);
   EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_4x4[0],
                                         4, 4, kMaxBlockWidth, 4, false));
 }

 TEST_F(TransposeTestLowBitdepth, Transpose8x4Test) {
   uint8x8_t a0 = Load4<1>(src_block_[4], Load4(src_block_[0]));
   uint8x8_t a1 = Load4<1>(src_block_[5], Load4(src_block_[1]));
   uint8x8_t a2 = Load4<1>(src_block_[6], Load4(src_block_[2]));
   uint8x8_t a3 = Load4<1>(src_block_[7], Load4(src_block_[3]));
   Transpose8x4(&a0, &a1, &a2, &a3);
   uint8_t output_8x4[4][8];
   vst1_u8(output_8x4[0], a0);
   vst1_u8(output_8x4[1], a1);
   vst1_u8(output_8x4[2], a2);
   vst1_u8(output_8x4[3], a3);
   EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_8x4[0],
                                         8, 4, kMaxBlockWidth, 8, false));
 }

 TEST_F(TransposeTestLowBitdepth, Transpose8x8Test) {
   uint8x8_t input_8x8[8];
   for (int i = 0; i < 8; ++i) {
     input_8x8[i] = vld1_u8(src_block_[i]);
   }
   Transpose8x8(input_8x8);
   uint8_t output_8x8[8][8];
   for (int i = 0; i < 8; ++i) {
     vst1_u8(output_8x8[i], input_8x8[i]);
   }
   EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_8x8[0],
                                         8, 8, kMaxBlockWidth, 8, false));
 }

 TEST_F(TransposeTestLowBitdepth, Transpose8x16Test) {
   uint8x16_t input_8x16[8];
   for (int i = 0; i < 8; ++i) {
     input_8x16[i] =
         vcombine_u8(vld1_u8(src_block_[i]), vld1_u8(src_block_[i + 8]));
   }
   Transpose8x16(input_8x16);
   uint8_t output_16x8[8][16];
   for (int i = 0; i < 8; ++i) {
     vst1q_u8(output_16x8[i], input_8x16[i]);
   }
   EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_16x8[0],
                                         16, 8, kMaxBlockWidth, 16, false));
 }

 using TransposeTestHighBitdepth = TransposeTest<uint16_t>;

 TEST_F(TransposeTestHighBitdepth, Transpose4x4Test) {
   uint16x4_t input_4x4[4];
   input_4x4[0] = vld1_u16(src_block_[0]);
   input_4x4[1] = vld1_u16(src_block_[1]);
   input_4x4[2] = vld1_u16(src_block_[2]);
   input_4x4[3] = vld1_u16(src_block_[3]);
   Transpose4x4(input_4x4);
   uint16_t output_4x4[4][4];
   for (int i = 0; i < 4; ++i) {
     vst1_u16(output_4x4[i], input_4x4[i]);
   }
   EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_4x4[0],
                                         4, 4, kMaxBlockWidth, 4, false));
 }

 TEST_F(TransposeTestHighBitdepth, Transpose4x8Test) {
   uint16x8_t input_4x8[4];
   for (int i = 0; i < 4; ++i) {
     input_4x8[i] = vld1q_u16(src_block_[i]);
   }
   Transpose4x8(input_4x8);
   uint16_t output_4x8[4][8];
   for (int i = 0; i < 4; ++i) {
     vst1q_u16(output_4x8[i], input_4x8[i]);
     memcpy(&expected_transpose_[i][4], &expected_transpose_[i + 4][0],
            4 * sizeof(expected_transpose_[0][0]));
   }
   EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_4x8[0],
                                         8, 4, kMaxBlockWidth, 8, false));
 }

 TEST_F(TransposeTestHighBitdepth, LoopFilterTranspose4x8Test) {
   uint16x8_t input_4x8[4];
   for (int i = 0; i < 4; ++i) {
     input_4x8[i] = vld1q_u16(src_block_[i]);
   }
   LoopFilterTranspose4x8(input_4x8);
   uint16_t output_4x8[4][8];
   for (int i = 0; i < 4; ++i) {
     vst1q_u16(output_4x8[i], input_4x8[i]);
   }
   // a[0]: 03 13 23 33 04 14 24 34  p0q0
   // a[1]: 02 12 22 32 05 15 25 35  p1q1
   // a[2]: 01 11 21 31 06 16 26 36  p2q2
   // a[3]: 00 10 20 30 07 17 27 37  p3q3
   static constexpr uint16_t expected_output[4][8] = {
       {0x03, 0x13, 0x23, 0x33, 0x04, 0x14, 0x24, 0x34},
       {0x02, 0x12, 0x22, 0x32, 0x05, 0x15, 0x25, 0x35},
       {0x01, 0x11, 0x21, 0x31, 0x06, 0x16, 0x26, 0x36},
       {0x00, 0x10, 0x20, 0x30, 0x07, 0x17, 0x27, 0x37},
   };
   EXPECT_TRUE(test_utils::CompareBlocks(expected_output[0], output_4x8[0], 8, 4,
                                         8, 8, false));
 }

 TEST_F(TransposeTestHighBitdepth, Transpose8x8Test) {
   uint16x8_t input_8x8[8];
   for (int i = 0; i < 8; ++i) {
     input_8x8[i] = vld1q_u16(src_block_[i]);
   }
   Transpose8x8(input_8x8);
   uint16_t output_8x8[8][8];
   for (int i = 0; i < 8; ++i) {
     vst1q_u16(output_8x8[i], input_8x8[i]);
   }
   EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_8x8[0],
                                         8, 8, kMaxBlockWidth, 8, false));
 }

 TEST_F(TransposeTestHighBitdepth, Transpose8x8SignedTest) {
   int16x8_t input_8x8[8];
   for (int i = 0; i < 8; ++i) {
     input_8x8[i] = vreinterpretq_s16_u16(vld1q_u16(src_block_[i]));
   }
   Transpose8x8(input_8x8);
   uint16_t output_8x8[8][8];
   for (int i = 0; i < 8; ++i) {
     vst1q_u16(output_8x8[i], vreinterpretq_u16_s16(input_8x8[i]));
   }
   EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_8x8[0],
                                         8, 8, kMaxBlockWidth, 8, false));
 }

 }  // namespace
 }  // namespace dsp
 }  // namespace libgav1

 #else  // !LIBGAV1_ENABLE_NEON

 TEST(CommonDspTest, NEON) {
   GTEST_SKIP()
       << "Build this module for Arm with NEON enabled to enable the tests.";
 }

 #endif  // LIBGAV1_ENABLE_NEON
	// Copyright 2021 The libgav1 Authors
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	#include "src/dsp/arm/common_neon.h"

	#include "gtest/gtest.h"
	#include "src/utils/cpu.h"

	#if LIBGAV1_ENABLE_NEON
	#include <cstdint>

	#include "tests/block_utils.h"

	namespace libgav1 {
	namespace dsp {
	namespace {

	constexpr int kMaxBlockWidth = 16;
	constexpr int kMaxBlockHeight = 16;

	template <typename Pixel>
	class TransposeTest : public testing::Test {
	public:
	TransposeTest() {
	for (int y = 0; y < kMaxBlockHeight; ++y) {
	for (int x = 0; x < kMaxBlockWidth; ++x) {
	src_block_[y][x] = y * 16 + x;
	expected_transpose_[y][x] = x * 16 + y;
	}
	}
	}

	TransposeTest(const TransposeTest&) = delete;
	TransposeTest& operator=(const TransposeTest&) = delete;
	~TransposeTest() override = default;

	protected:
	Pixel src_block_[kMaxBlockHeight][kMaxBlockWidth];
	Pixel expected_transpose_[kMaxBlockHeight][kMaxBlockWidth];
	};

	using TransposeTestLowBitdepth = TransposeTest<uint8_t>;

	TEST_F(TransposeTestLowBitdepth, Transpose4x4Test) {
	uint8x8_t a = Load4<1>(src_block_[1], Load4(src_block_[0]));
	uint8x8_t b = Load4<1>(src_block_[3], Load4(src_block_[2]));
	Transpose4x4(&a, &b);
	uint8_t output_4x4[4][4];
	StoreLo4(output_4x4[0], a);
	StoreLo4(output_4x4[1], b);
	StoreHi4(output_4x4[2], a);
	StoreHi4(output_4x4[3], b);
	EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_4x4[0],
	4, 4, kMaxBlockWidth, 4, false));
	}

	TEST_F(TransposeTestLowBitdepth, Transpose8x4Test) {
	uint8x8_t a0 = Load4<1>(src_block_[4], Load4(src_block_[0]));
	uint8x8_t a1 = Load4<1>(src_block_[5], Load4(src_block_[1]));
	uint8x8_t a2 = Load4<1>(src_block_[6], Load4(src_block_[2]));
	uint8x8_t a3 = Load4<1>(src_block_[7], Load4(src_block_[3]));
	Transpose8x4(&a0, &a1, &a2, &a3);
	uint8_t output_8x4[4][8];
	vst1_u8(output_8x4[0], a0);
	vst1_u8(output_8x4[1], a1);
	vst1_u8(output_8x4[2], a2);
	vst1_u8(output_8x4[3], a3);
	EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_8x4[0],
	8, 4, kMaxBlockWidth, 8, false));
	}

	TEST_F(TransposeTestLowBitdepth, Transpose8x8Test) {
	uint8x8_t input_8x8[8];
	for (int i = 0; i < 8; ++i) {
	input_8x8[i] = vld1_u8(src_block_[i]);
	}
	Transpose8x8(input_8x8);
	uint8_t output_8x8[8][8];
	for (int i = 0; i < 8; ++i) {
	vst1_u8(output_8x8[i], input_8x8[i]);
	}
	EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_8x8[0],
	8, 8, kMaxBlockWidth, 8, false));
	}

	TEST_F(TransposeTestLowBitdepth, Transpose8x16Test) {
	uint8x16_t input_8x16[8];
	for (int i = 0; i < 8; ++i) {
	input_8x16[i] =
	vcombine_u8(vld1_u8(src_block_[i]), vld1_u8(src_block_[i + 8]));
	}
	Transpose8x16(input_8x16);
	uint8_t output_16x8[8][16];
	for (int i = 0; i < 8; ++i) {
	vst1q_u8(output_16x8[i], input_8x16[i]);
	}
	EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_16x8[0],
	16, 8, kMaxBlockWidth, 16, false));
	}

	using TransposeTestHighBitdepth = TransposeTest<uint16_t>;

	TEST_F(TransposeTestHighBitdepth, Transpose4x4Test) {
	uint16x4_t input_4x4[4];
	input_4x4[0] = vld1_u16(src_block_[0]);
	input_4x4[1] = vld1_u16(src_block_[1]);
	input_4x4[2] = vld1_u16(src_block_[2]);
	input_4x4[3] = vld1_u16(src_block_[3]);
	Transpose4x4(input_4x4);
	uint16_t output_4x4[4][4];
	for (int i = 0; i < 4; ++i) {
	vst1_u16(output_4x4[i], input_4x4[i]);
	}
	EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_4x4[0],
	4, 4, kMaxBlockWidth, 4, false));
	}

	TEST_F(TransposeTestHighBitdepth, Transpose4x8Test) {
	uint16x8_t input_4x8[4];
	for (int i = 0; i < 4; ++i) {
	input_4x8[i] = vld1q_u16(src_block_[i]);
	}
	Transpose4x8(input_4x8);
	uint16_t output_4x8[4][8];
	for (int i = 0; i < 4; ++i) {
	vst1q_u16(output_4x8[i], input_4x8[i]);
	memcpy(&expected_transpose_[i][4], &expected_transpose_[i + 4][0],
	4 * sizeof(expected_transpose_[0][0]));
	}
	EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_4x8[0],
	8, 4, kMaxBlockWidth, 8, false));
	}

	TEST_F(TransposeTestHighBitdepth, LoopFilterTranspose4x8Test) {
	uint16x8_t input_4x8[4];
	for (int i = 0; i < 4; ++i) {
	input_4x8[i] = vld1q_u16(src_block_[i]);
	}
	LoopFilterTranspose4x8(input_4x8);
	uint16_t output_4x8[4][8];
	for (int i = 0; i < 4; ++i) {
	vst1q_u16(output_4x8[i], input_4x8[i]);
	}
	// a[0]: 03 13 23 33 04 14 24 34 p0q0
	// a[1]: 02 12 22 32 05 15 25 35 p1q1
	// a[2]: 01 11 21 31 06 16 26 36 p2q2
	// a[3]: 00 10 20 30 07 17 27 37 p3q3
	static constexpr uint16_t expected_output[4][8] = {
	{0x03, 0x13, 0x23, 0x33, 0x04, 0x14, 0x24, 0x34},
	{0x02, 0x12, 0x22, 0x32, 0x05, 0x15, 0x25, 0x35},
	{0x01, 0x11, 0x21, 0x31, 0x06, 0x16, 0x26, 0x36},
	{0x00, 0x10, 0x20, 0x30, 0x07, 0x17, 0x27, 0x37},
	};
	EXPECT_TRUE(test_utils::CompareBlocks(expected_output[0], output_4x8[0], 8, 4,
	8, 8, false));
	}

	TEST_F(TransposeTestHighBitdepth, Transpose8x8Test) {
	uint16x8_t input_8x8[8];
	for (int i = 0; i < 8; ++i) {
	input_8x8[i] = vld1q_u16(src_block_[i]);
	}
	Transpose8x8(input_8x8);
	uint16_t output_8x8[8][8];
	for (int i = 0; i < 8; ++i) {
	vst1q_u16(output_8x8[i], input_8x8[i]);
	}
	EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_8x8[0],
	8, 8, kMaxBlockWidth, 8, false));
	}

	TEST_F(TransposeTestHighBitdepth, Transpose8x8SignedTest) {
	int16x8_t input_8x8[8];
	for (int i = 0; i < 8; ++i) {
	input_8x8[i] = vreinterpretq_s16_u16(vld1q_u16(src_block_[i]));
	}
	Transpose8x8(input_8x8);
	uint16_t output_8x8[8][8];
	for (int i = 0; i < 8; ++i) {
	vst1q_u16(output_8x8[i], vreinterpretq_u16_s16(input_8x8[i]));
	}
	EXPECT_TRUE(test_utils::CompareBlocks(expected_transpose_[0], output_8x8[0],
	8, 8, kMaxBlockWidth, 8, false));
	}

	} // namespace
	} // namespace dsp
	} // namespace libgav1

	#else // !LIBGAV1_ENABLE_NEON

	TEST(CommonDspTest, NEON) {
	GTEST_SKIP()
	<< "Build this module for Arm with NEON enabled to enable the tests.";
	}

	#endif // LIBGAV1_ENABLE_NEON