| /****************************************************************************** |
| * |
| * Copyright (C) 2015 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at: |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| ***************************************************************************** |
| * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
| */ |
| /** |
| ******************************************************************************* |
| * @file |
| * icv_sad.c |
| * |
| * @brief |
| * This file contains the functions to compute SAD |
| * |
| * @author |
| * Ittiam |
| * |
| * @par List of Functions: |
| * icv_sad_8x4_ssse3() |
| * |
| * @remarks |
| * None |
| * |
| ******************************************************************************* |
| */ |
| /*****************************************************************************/ |
| /* File Includes */ |
| /*****************************************************************************/ |
| /* System include files */ |
| #include <stdio.h> |
| #include <stdint.h> |
| #include <string.h> |
| #include <stdlib.h> |
| #include <assert.h> |
| #include <immintrin.h> |
| |
| /* User include files */ |
| #include "icv_datatypes.h" |
| #include "icv_macros.h" |
| #include "icv_platform_macros.h" |
| #include "icv.h" |
| |
| /** |
| ******************************************************************************* |
| * |
| * @brief |
| * Compute 8x4 SAD |
| * |
| * @par Description |
| * Compute 8x4 sum of absolute differences between source and reference block |
| * |
| * @param[in] pu1_src |
| * Source buffer |
| * |
| * @param[in] pu1_ref |
| * Reference buffer |
| * |
| * @param[in] src_strd |
| * Source stride |
| * |
| * @param[in] ref_strd |
| * Reference stride |
| * |
| * @param[in] wd |
| * Assumed to be 8 |
| * |
| * @param[in] ht |
| * Assumed to be 4 |
| |
| * @returns |
| * SAD |
| * |
| * @remarks |
| * |
| ******************************************************************************* |
| */ |
| WORD32 icv_sad_8x4_ssse3(UWORD8 *pu1_src, |
| UWORD8 *pu1_ref, |
| WORD32 src_strd, |
| WORD32 ref_strd, |
| WORD32 wd, |
| WORD32 ht) |
| { |
| WORD32 sad; |
| __m128 src_r0, src_r1; |
| __m128 ref_r0, ref_r1; |
| __m128i res_r0, res_r1; |
| |
| UNUSED(wd); |
| UNUSED(ht); |
| ASSERT(wd == 8); |
| ASSERT(ht == 4); |
| |
| /* Load source */ |
| src_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src)); |
| pu1_src += src_strd; |
| |
| src_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_src)); |
| pu1_src += src_strd; |
| |
| src_r0 = _mm_loadh_pi (src_r0, (__m64 *) (pu1_src)); |
| pu1_src += src_strd; |
| |
| src_r1 = _mm_loadh_pi (src_r1, (__m64 *) (pu1_src)); |
| pu1_src += src_strd; |
| |
| |
| /* Load reference */ |
| ref_r0 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_ref)); |
| pu1_ref += ref_strd; |
| |
| ref_r1 = (__m128)_mm_loadl_epi64((__m128i *) (pu1_ref)); |
| pu1_ref += ref_strd; |
| |
| ref_r0 = _mm_loadh_pi (ref_r0, (__m64 *) (pu1_ref)); |
| pu1_ref += ref_strd; |
| |
| ref_r1 = _mm_loadh_pi (ref_r1, (__m64 *) (pu1_ref)); |
| pu1_ref += ref_strd; |
| |
| /* Compute SAD for each row */ |
| res_r0 = _mm_sad_epu8((__m128i)src_r0, (__m128i)ref_r0); |
| res_r1 = _mm_sad_epu8((__m128i)src_r1, (__m128i)ref_r1); |
| |
| /* Accumulate SAD */ |
| res_r0 = _mm_add_epi64(res_r0, res_r1); |
| res_r0 = _mm_add_epi64(res_r0, _mm_srli_si128(res_r0, 8)); |
| |
| sad = _mm_cvtsi128_si32(res_r0); |
| |
| return sad; |
| } |