| #ifndef LIBGAV1_SRC_DSP_DSP_H_ |
| #define LIBGAV1_SRC_DSP_DSP_H_ |
| |
| #include <cstddef> // ptrdiff_t |
| #include <cstdint> |
| #include <cstdlib> |
| |
| #include "src/dsp/common.h" |
| #include "src/dsp/constants.h" |
| #include "src/dsp/cpu.h" |
| |
| namespace libgav1 { |
| namespace dsp { |
| |
| #if !defined(LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS) |
| #define LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS 0 |
| #endif |
| |
| #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) |
| #define LIBGAV1_X86_MSVC |
| #endif |
| |
| #if !defined(LIBGAV1_ENABLE_SSE4_1) |
| #if defined(__SSE4_1__) || defined(LIBGAV1_X86_MSVC) |
| #define LIBGAV1_ENABLE_SSE4_1 1 |
| #else |
| #define LIBGAV1_ENABLE_SSE4_1 0 |
| #endif |
| #endif // !defined(LIBGAV1_ENABLE_SSE4_1) |
| |
| #undef LIBGAV1_X86_MSVC |
| |
| #if !defined(LIBGAV1_ENABLE_NEON) |
| #if defined(__ARM_NEON__) || defined(__aarch64__) || \ |
| (defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))) |
| #define LIBGAV1_ENABLE_NEON 1 |
| #else |
| #define LIBGAV1_ENABLE_NEON 0 |
| #endif |
| #endif // !defined(LIBGAV1_ENABLE_NEON) |
| |
| enum IntraPredictor : uint8_t { |
| kIntraPredictorDcFill, |
| kIntraPredictorDcTop, |
| kIntraPredictorDcLeft, |
| kIntraPredictorDc, |
| kIntraPredictorVertical, |
| kIntraPredictorHorizontal, |
| kIntraPredictorPaeth, |
| kIntraPredictorSmooth, |
| kIntraPredictorSmoothVertical, |
| kIntraPredictorSmoothHorizontal, |
| kNumIntraPredictors |
| }; |
| |
| // List of valid 1D transforms. |
| enum Transform1D : uint8_t { |
| k1DTransformDct, // Discrete Cosine Transform. |
| k1DTransformAdst, // Asymmetric Discrete Sine Transform. |
| k1DTransformIdentity, |
| k1DTransformWht, // Walsh Hadamard Transform. |
| kNum1DTransforms |
| }; |
| |
| // List of valid 1D transform sizes. Not all transforms may be available for all |
| // the sizes. |
| enum TransformSize1D : uint8_t { |
| k1DTransformSize4, |
| k1DTransformSize8, |
| k1DTransformSize16, |
| k1DTransformSize32, |
| k1DTransformSize64, |
| kNum1DTransformSizes |
| }; |
| |
| // The maximum width of the loop filter, fewer pixels may be filtered depending |
| // on strength thresholds. |
| enum LoopFilterSize : uint8_t { |
| kLoopFilterSize4, |
| kLoopFilterSize6, |
| kLoopFilterSize8, |
| kLoopFilterSize14, |
| kNumLoopFilterSizes |
| }; |
| |
| //------------------------------------------------------------------------------ |
| // ToString() |
| // |
| // These functions are meant to be used only in debug logging and within tests. |
| // They are defined inline to avoid including the strings in the release |
| // library when logging is disabled; unreferenced functions will not be added to |
| // any object file in that case. |
| |
| inline const char* ToString(const IntraPredictor predictor) { |
| switch (predictor) { |
| case kIntraPredictorDcFill: |
| return "kIntraPredictorDcFill"; |
| case kIntraPredictorDcTop: |
| return "kIntraPredictorDcTop"; |
| case kIntraPredictorDcLeft: |
| return "kIntraPredictorDcLeft"; |
| case kIntraPredictorDc: |
| return "kIntraPredictorDc"; |
| case kIntraPredictorVertical: |
| return "kIntraPredictorVertical"; |
| case kIntraPredictorHorizontal: |
| return "kIntraPredictorHorizontal"; |
| case kIntraPredictorPaeth: |
| return "kIntraPredictorPaeth"; |
| case kIntraPredictorSmooth: |
| return "kIntraPredictorSmooth"; |
| case kIntraPredictorSmoothVertical: |
| return "kIntraPredictorSmoothVertical"; |
| case kIntraPredictorSmoothHorizontal: |
| return "kIntraPredictorSmoothHorizontal"; |
| case kNumIntraPredictors: |
| return "kNumIntraPredictors"; |
| } |
| abort(); |
| } |
| |
| inline const char* ToString(const Transform1D transform) { |
| switch (transform) { |
| case k1DTransformDct: |
| return "k1DTransformDct"; |
| case k1DTransformAdst: |
| return "k1DTransformAdst"; |
| case k1DTransformIdentity: |
| return "k1DTransformIdentity"; |
| case k1DTransformWht: |
| return "k1DTransformWht"; |
| case kNum1DTransforms: |
| return "kNum1DTransforms"; |
| } |
| abort(); |
| } |
| |
| inline const char* ToString(const TransformSize1D transform_size) { |
| switch (transform_size) { |
| case k1DTransformSize4: |
| return "k1DTransformSize4"; |
| case k1DTransformSize8: |
| return "k1DTransformSize8"; |
| case k1DTransformSize16: |
| return "k1DTransformSize16"; |
| case k1DTransformSize32: |
| return "k1DTransformSize32"; |
| case k1DTransformSize64: |
| return "k1DTransformSize64"; |
| case kNum1DTransformSizes: |
| return "kNum1DTransformSizes"; |
| } |
| abort(); |
| } |
| |
| inline const char* ToString(const LoopFilterSize filter_size) { |
| switch (filter_size) { |
| case kLoopFilterSize4: |
| return "kLoopFilterSize4"; |
| case kLoopFilterSize6: |
| return "kLoopFilterSize6"; |
| case kLoopFilterSize8: |
| return "kLoopFilterSize8"; |
| case kLoopFilterSize14: |
| return "kLoopFilterSize14"; |
| case kNumLoopFilterSizes: |
| return "kNumLoopFilterSizes"; |
| } |
| abort(); |
| } |
| |
| inline const char* ToString(const LoopFilterType filter_type) { |
| switch (filter_type) { |
| case kLoopFilterTypeVertical: |
| return "kLoopFilterTypeVertical"; |
| case kLoopFilterTypeHorizontal: |
| return "kLoopFilterTypeHorizontal"; |
| case kNumLoopFilterTypes: |
| return "kNumLoopFilterTypes"; |
| } |
| abort(); |
| } |
| |
| //------------------------------------------------------------------------------ |
| // Intra predictors. Section 7.11.2. |
| // These require access to one or both of the top row and left column. Some may |
| // access the top-left (top[-1]), top-right (top[width+N]), bottom-left |
| // (left[height+N]) or upper-left (left[-1]). |
| |
| // Intra predictor function signature. Sections 7.11.2.2, 7.11.2.4 (#10,#11), |
| // 7.11.2.5, 7.11.2.6. |
| // |dst| is an unaligned pointer to the output block. Pixel size is determined |
| // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to |
| // the row above |dst|. |left| is an aligned vector of the column to the left |
| // of |dst|. top-left and bottom-left may be accessed. |
| using IntraPredictorFunc = void (*)(void* dst, ptrdiff_t stride, |
| const void* top, const void* left); |
| using IntraPredictorFuncs = |
| IntraPredictorFunc[kNumTransformSizes][kNumIntraPredictors]; |
| |
| // Directional intra predictor function signature, zone 1 (0 < angle < 90). |
| // Section 7.11.2.4 (#7). |
| // |dst| is an unaligned pointer to the output block. Pixel size is determined |
| // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to |
| // the row above |dst|. |width| and |height| give the dimensions of the block. |
| // |xstep| is the scaled starting index to |top| from |
| // kDirectionalIntraPredictorDerivative. |upsampled_top| indicates whether |
| // |top| has been upsampled as described in '7.11.2.11. Intra edge upsample |
| // process'. This can occur in cases with |width| + |height| <= 16. top-right |
| // is accessed. |
| using DirectionalIntraPredictorZone1Func = void (*)(void* dst, ptrdiff_t stride, |
| const void* top, int width, |
| int height, int xstep, |
| bool upsampled_top); |
| |
| // Directional intra predictor function signature, zone 2 (90 < angle < 180). |
| // Section 7.11.2.4 (#8). |
| // |dst| is an unaligned pointer to the output block. Pixel size is determined |
| // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to |
| // the row above |dst|. |left| is an aligned vector of the column to the left of |
| // |dst|. |width| and |height| give the dimensions of the block. |xstep| and |
| // |ystep| are the scaled starting index to |top| and |left|, respectively, |
| // from kDirectionalIntraPredictorDerivative. |upsampled_top| and |
| // |upsampled_left| indicate whether |top| and |left| have been upsampled as |
| // described in '7.11.2.11. Intra edge upsample process'. This can occur in |
| // cases with |width| + |height| <= 16. top-left and upper-left are accessed, |
| // up to [-2] in each if |upsampled_top/left| are set. |
| using DirectionalIntraPredictorZone2Func = void (*)( |
| void* dst, ptrdiff_t stride, const void* top, const void* left, int width, |
| int height, int xstep, int ystep, bool upsampled_top, bool upsampled_left); |
| |
| // Directional intra predictor function signature, zone 3 (180 < angle < 270). |
| // Section 7.11.2.4 (#9). |
| // |dst| is an unaligned pointer to the output block. Pixel size is determined |
| // by bitdepth with |stride| given in bytes. |left| is an aligned vector of the |
| // column to the left of |dst|. |width| and |height| give the dimensions of the |
| // block. |ystep| is the scaled starting index to |left| from |
| // kDirectionalIntraPredictorDerivative. |upsampled_left| indicates whether |
| // |left| has been upsampled as described in '7.11.2.11. Intra edge upsample |
| // process'. This can occur in cases with |width| + |height| <= 16. bottom-left |
| // is accessed. |
| using DirectionalIntraPredictorZone3Func = void (*)(void* dst, ptrdiff_t stride, |
| const void* left, int width, |
| int height, int ystep, |
| bool upsampled_left); |
| |
| // Filter intra predictor function signature. Section 7.11.2.3. |
| // |dst| is an unaligned pointer to the output block. Pixel size is determined |
| // by bitdepth with |stride| given in bytes. |top| is an unaligned pointer to |
| // the row above |dst|. |left| is an aligned vector of the column to the left |
| // of |dst|. |width| and |height| are the size of the block in pixels. |
| using FilterIntraPredictorFunc = void (*)(void* dst, ptrdiff_t stride, |
| const void* top, const void* left, |
| FilterIntraPredictor pred, int width, |
| int height); |
| |
| //------------------------------------------------------------------------------ |
| // Chroma from Luma (Cfl) prediction. Section 7.11.5. |
| |
| // Chroma from Luma (Cfl) intra prediction function signature. |dst| is an |
| // unaligned pointer to the output block. Pixel size is determined by bitdepth |
| // with |stride| given in bytes. |luma| contains subsampled luma pixels with 3 |
| // fractional bits of precision. |alpha| is the signed Cfl alpha value for the |
| // appropriate plane. |
| using CflIntraPredictorFunc = void (*)( |
| void* dst, ptrdiff_t stride, |
| const int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], int alpha); |
| using CflIntraPredictorFuncs = CflIntraPredictorFunc[kNumTransformSizes]; |
| |
| // Chroma from Luma (Cfl) subsampler function signature. |luma| is an unaligned |
| // pointer to the output block. |src| is an unaligned pointer to the input |
| // block. Pixel size is determined by bitdepth with |stride| given in bytes. |
| using CflSubsamplerFunc = |
| void (*)(int16_t luma[kCflLumaBufferStride][kCflLumaBufferStride], |
| int max_luma_width, int max_luma_height, const void* source, |
| ptrdiff_t stride); |
| using CflSubsamplerFuncs = |
| CflSubsamplerFunc[kNumTransformSizes][kNumSubsamplingTypes]; |
| |
| //------------------------------------------------------------------------------ |
| // Intra Edge Filtering and Upsampling. Step 4 in section 7.11.2.4. |
| |
| // Intra edge filter function signature. |buffer| is a pointer to the top_row or |
| // left_column that needs to be filtered. Typically the -1'th index of |top_row| |
| // and |left_column| need to be filtered as well, so the caller can merely pass |
| // the |buffer| as top_row[-1] or left_column[-1]. Pixel size is determined by |
| // bitdepth. |size| is the number of pixels to be filtered. |strength| is the |
| // filter strength. Section 7.11.2.12 in the spec. |
| using IntraEdgeFilterFunc = void (*)(void* buffer, int size, int strength); |
| |
| // Intra edge upsampler function signature. |buffer| is a pointer to the top_row |
| // or left_column that needs to be upsampled. Pixel size is determined by |
| // bitdepth. |size| is the number of pixels to be upsampled; valid values are: |
| // 4, 8, 12, 16. This function needs access to negative indices -1 and -2 of |
| // the |buffer|. Section 7.11.2.11 in the spec. |
| using IntraEdgeUpsamplerFunc = void (*)(void* buffer, int size); |
| |
| //------------------------------------------------------------------------------ |
| // Inverse transform add function signature. |
| // |
| // Steps 2 and 3 of section 7.12.3 (contains the implementation of section |
| // 7.13.3). |
| // Apply the inverse transforms and add the residual to the destination frame |
| // for the transform type and block size |tx_size| starting at position |
| // |start_x| and |start_y|. |dst_frame| is a pointer to an Array2d. |is_row| |
| // signals the direction of the transform loop. |non_zero_coeff_count| is the |
| // number of non zero coefficients in the block. |
| using InverseTransformAddFunc = void (*)(TransformType tx_type, |
| TransformSize tx_size, |
| void* src_buffer, int start_x, |
| int start_y, void* dst_frame, |
| bool is_row, int non_zero_coeff_count); |
| using InverseTransformAddFuncs = |
| InverseTransformAddFunc[kNum1DTransformSizes][kNum1DTransforms]; |
| |
| //------------------------------------------------------------------------------ |
| // Post processing. |
| |
| // Loop filter function signature. Section 7.14. |
| // |dst| is an unaligned pointer to the output block. Pixel size is determined |
| // by bitdepth with |stride| given in bytes. |
| using LoopFilterFunc = void (*)(void* dst, ptrdiff_t stride, int outer_thresh, |
| int inner_thresh, int hev_thresh); |
| using LoopFilterFuncs = |
| LoopFilterFunc[kNumLoopFilterSizes][kNumLoopFilterTypes]; |
| |
| // Cdef direction function signature. Section 7.15.2. |
| // |src| is a pointer to the source block. Pixel size is determined by bitdepth |
| // with |stride| given in bytes. |direction| and |variance| are output |
| // parameters and must not be nullptr. |
| using CdefDirectionFunc = void (*)(const void* src, ptrdiff_t stride, |
| int* direction, int* variance); |
| |
| // Cdef filtering function signature. Section 7.15.3. |
| // |source| is a pointer to the input block. |source_stride| is given in bytes. |
| // |rows4x4| and |columns4x4| are frame sizes in units of 4x4 pixels. |
| // |curr_x| and |curr_y| are current position in units of pixels. |
| // |subsampling_x|, |subsampling_y| are the subsampling factors of current |
| // plane. |
| // |primary_strength|, |secondary_strength|, and |damping| are Cdef filtering |
| // parameters. |
| // |direction| is the filtering direction. |
| // |dest| is the output buffer. |dest_stride| is given in bytes. |
| using CdefFilteringFunc = void (*)(const void* source, ptrdiff_t source_stride, |
| int rows4x4, int columns4x4, int curr_x, |
| int curr_y, int subsampling_x, |
| int subsampling_y, int primary_strength, |
| int secondary_strength, int damping, |
| int direction, void* dest, |
| ptrdiff_t dest_stride); |
| |
| // Loop restoration function signature. Sections 7.16, 7.17. |
| // |source| is the input frame buffer, which is deblocked and cdef filtered. |
| // |dest| is the output. |
| // |restoration_info| contains loop restoration information, such as filter |
| // type, strength. |source| and |dest| share the same stride given in bytes. |
| // |buffer| contains buffers required for self guided filter and wiener filter. |
| // They must be initialized before calling. |
| using LoopRestorationFunc = void (*)( |
| const void* source, void* dest, const RestorationUnitInfo& restoration_info, |
| ptrdiff_t source_stride, ptrdiff_t dest_stride, int width, int height, |
| RestorationBuffer* buffer); |
| |
| // Index 0 is Wiener Filter. |
| // Index 1 is Self Guided Restoration Filter. |
| // This can be accessed as LoopRestorationType - 2. |
| using LoopRestorationFuncs = LoopRestorationFunc[2]; |
| |
| // Convolve function signature. Section 7.11.3.4. |
| // This function applies a horizontal filter followed by a vertical filter. |
| // |reference| is the input block (reference frame buffer). |reference_stride| |
| // is the corresponding frame stride. |
| // |vertical_filter_index|/|horizontal_filter_index| is the index to |
| // retrieve the type of filter to be applied for vertical/horizontal direction |
| // from the filter lookup table 'kSubPixelFilters'. |
| // |inter_round_bits_vertical| is the rounding precision used after vertical |
| // filtering (7 or 11). kInterRoundBitsHorizontal & |
| // kInterRoundBitsHorizontal12bpp can be used after the horizontal pass. |
| // |subpixel_x| and |subpixel_y| are starting positions in units of 1/1024. |
| // |step_x| and |step_y| are step sizes in units of 1/1024 of a pixel. |
| // |width| and |height| are width and height of the block to be filtered. |
| // |ref_last_x| and |ref_last_y| are the last pixel of the reference frame in |
| // x/y direction. |
| // |prediction| is the output block (output frame buffer). |
| using ConvolveFunc = void (*)(const void* reference, ptrdiff_t reference_stride, |
| int vertical_filter_index, |
| int horizontal_filter_index, |
| int inter_round_bits_vertical, int subpixel_x, |
| int subpixel_y, int step_x, int step_y, int width, |
| int height, void* prediction, |
| ptrdiff_t pred_stride); |
| |
| // Convolve functions signature. Each points to one convolve function with |
| // a specific setting: |
| // ConvolveFunc[is_intra_block_copy][is_compound][has_vertical_filter] |
| // [has_horizontal_filter]. |
| // If is_compound is false, the prediction is clipped to pixel. |
| // If is_compound is true, the range of prediction is: |
| // 8bpp: [0, 15471] |
| // 10bpp: [0, 61983] |
| // 12bpp: [0, 62007] |
| // See: |
| // https://docs.google.com/document/d/1f5YlLk02ETNxpilvsmjBtWgDXjtZYO33hjl6bAdvmxc |
| using ConvolveFuncs = ConvolveFunc[2][2][2][2]; |
| |
| // Convolve functions signature for scaling version. |
| // 0: single predictor. 1: compound predictor. |
| using ConvolveScaleFuncs = ConvolveFunc[2]; |
| |
| // Average blending function signature. |
| // Two predictors are averaged to generate the output. |
| // Input predictor values are int16_t. Output type is uint8_t, with actual |
| // range of Pixel value. |
| // Average blending is in the bottom of Section 7.11.3.1 (COMPOUND_AVERAGE). |
| // |prediction_0| is the first input block. |
| // |prediction_1| is the second input block. |
| // |prediction_stride_0| and |prediction_stride_1| are corresponding strides. |
| // |width| and |height| are the same for the first and second input blocks. |
| // The valid range of block size is [8x8, 128x128] for the luma plane. |
| // |dest| is the output buffer. |dest_stride| is the output buffer stride. |
| using AverageBlendFunc = void (*)(const uint16_t* prediction_0, |
| ptrdiff_t prediction_stride_0, |
| const uint16_t* prediction_1, |
| ptrdiff_t prediction_stride_1, int width, |
| int height, void* dest, |
| ptrdiff_t dest_stride); |
| |
| // Distance weighted blending function signature. |
| // Weights are generated in Section 7.11.3.15. |
| // Weighted blending is in the bottom of Section 7.11.3.1 (COMPOUND_DISTANCE). |
| // This function takes two blocks (inter frame prediction) and produces a |
| // weighted output. |
| // |prediction_0| is the first input block. |
| // |prediction_1| is the second input block. |
| // |prediction_stride_0| and |prediction_stride_1| are corresponding strides. |
| // |weight_0| is the weight for the first block. It is derived from the relative |
| // distance of the first reference frame and the current frame. |
| // |weight_1| is the weight for the second block. It is derived from the |
| // relative distance of the second reference frame and the current frame. |
| // |width| and |height| are the same for the first and second input blocks. |
| // The valid range of block size is [8x8, 128x128] for the luma plane. |
| // |dest| is the output buffer. |dest_stride| is the output buffer stride. |
| using DistanceWeightedBlendFunc = void (*)(const uint16_t* prediction_0, |
| ptrdiff_t prediction_stride_0, |
| const uint16_t* prediction_1, |
| ptrdiff_t prediction_stride_1, |
| uint8_t weight_0, uint8_t weight_1, |
| int width, int height, void* dest, |
| ptrdiff_t dest_stride); |
| |
| // Mask blending function signature. Section 7.11.3.14. |
| // This function takes two blocks and produces a blended output stored into the |
| // output block |dest|. The blending is a weighted average process, controlled |
| // by values of the mask. |
| // |prediction_0| is the first input block. When prediction mode is inter_intra |
| // (or wedge_inter_intra), this refers to the inter frame prediction. |
| // |prediction_stride_0| is the stride, given in units of uint16_t. |
| // |prediction_1| is the second input block. When prediction mode is inter_intra |
| // (or wedge_inter_intra), this refers to the intra frame prediction. |
| // |prediction_stride_1| is the stride, given in units of uint16_t. |
| // |mask| is an integer array, whose value indicates the weight of the blending. |
| // |mask_stride| is corresponding stride. |
| // |width|, |height| are the same for both input blocks. |
| // If it's inter_intra (or wedge_inter_intra), the valid range of block size is |
| // [8x8, 32x32]. Otherwise (including difference weighted prediction and |
| // compound average prediction), the valid range is [8x8, 128x128]. |
| // If there's subsampling, the corresponding width and height are halved for |
| // chroma planes. |
| // |subsampling_x|, |subsampling_y| are the subsampling factors. |
| // |is_inter_intra| stands for the prediction mode. If it is true, one of the |
| // prediction blocks is from intra prediction of current frame. Otherwise, two |
| // prediction blocks are both inter frame predictions. |
| // |is_wedge_inter_intra| indicates if the mask is for the wedge prediction. |
| // |dest| is the output block. |
| // |dest_stride| is the corresponding stride for dest. |
| using MaskBlendFunc = void (*)(const uint16_t* prediction_0, |
| ptrdiff_t prediction_stride_0, |
| const uint16_t* prediction_1, |
| ptrdiff_t prediction_stride_1, |
| const uint8_t* mask, ptrdiff_t mask_stride, |
| int width, int height, void* dest, |
| ptrdiff_t dest_stride); |
| |
| // Mask blending functions signature. Each points to one function with |
| // a specific setting: |
| // MaskBlendFunc[subsampling_x + subsampling_y][is_inter_intra]. |
| using MaskBlendFuncs = MaskBlendFunc[3][2]; |
| |
| // Obmc (overlapped block motion compensation) blending function signature. |
| // Section 7.11.3.10. |
| // This function takes two blocks and produces a blended output stored into the |
| // first input block. The blending is a weighted average process, controlled by |
| // values of the mask. |
| // Obmc is not a compound mode. It is different from other compound blending, |
| // in terms of precision. The current block is computed using convolution with |
| // clipping to the range of pixel values. Its above and left blocks are also |
| // clipped. Therefore obmc blending process doesn't need to clip the output. |
| // |prediction| is the first input block, which will be overwritten. |
| // |prediction_stride| is the stride, given in bytes. |
| // |width|, |height| are the same for both input blocks. |
| // |obmc_prediction| is the second input block. |
| // |obmc_prediction_stride| is its stride, given in bytes. |
| using ObmcBlendFunc = void (*)(void* prediction, ptrdiff_t prediction_stride, |
| int width, int height, |
| const void* obmc_prediction, |
| ptrdiff_t obmc_prediction_stride); |
| using ObmcBlendFuncs = ObmcBlendFunc[kNumObmcDirections]; |
| |
| // Warp function signature. Section 7.11.3.5. |
| // This function applies warp filtering for each 8x8 block inside the current |
| // coding block. The filtering process is similar to 2d convolve filtering. |
| // The horizontal filter is applied followed by the vertical filter. |
| // The function has to calculate corresponding pixel positions before and |
| // after warping. |
| // |source| is the input reference frame buffer. |
| // |source_stride|, |source_width|, |source_height| are corresponding frame |
| // stride, width, and height. |source_stride| is given in bytes. |
| // |warp_params| is the matrix of warp motion: warp_params[i] = mN. |
| // [x' (m2 m3 m0 [x |
| // z . y' = m4 m5 m1 * y |
| // 1] m6 m7 1) 1] |
| // |subsampling_x/y| is the current frame's plane subsampling factor. |
| // |inter_round_bits_vertical| is the rounding precision used after vertical |
| // filtering (7 or 11). kInterRoundBitsHorizontal & |
| // kInterRoundBitsHorizontal12bpp can be used for the horizontal pass. |
| // |block_start_x| and |block_start_y| are the starting position the current |
| // coding block. |
| // |block_width| and |block_height| are width and height of the current coding |
| // block. |block_width| and |block_height| are at least 8. |
| // |alpha|, |beta|, |gamma|, |delta| are valid warp parameters. See the |
| // comments in the definition of struct GlobalMotion for the range of their |
| // values. |
| // |dest| is the output buffer. It is a predictor, whose type is int16_t. |
| // |dest_stride| is the stride, in units of int16_t. |
| // |
| // NOTE: WarpFunc assumes the source frame has left and right borders that |
| // extend the frame boundary pixels. The left and right borders must be at |
| // least 13 pixels wide. In addition, Warp_NEON() may read up to 14 bytes after |
| // a row in the |source| buffer. Therefore, there must be at least one extra |
| // padding byte after the right border of the last row in the source buffer. |
| using WarpFunc = void (*)(const void* source, ptrdiff_t source_stride, |
| int source_width, int source_height, |
| const int* warp_params, int subsampling_x, |
| int subsampling_y, int inter_round_bits_vertical, |
| int block_start_x, int block_start_y, int block_width, |
| int block_height, int16_t alpha, int16_t beta, |
| int16_t gamma, int16_t delta, uint16_t* dest, |
| ptrdiff_t dest_stride); |
| |
| // Film grain synthesis function signature. Section 7.18.3. |
| // This function generates film grain noise and blends the noise with the |
| // decoded frame. |
| // |source_plane_y|, |source_plane_u|, and |source_plane_v| are the plane |
| // buffers of the decoded frame. They are blended with the film grain noise and |
| // written to |dest_plane_y|, |dest_plane_u|, and |dest_plane_v| as final |
| // output for display. |source_plane_p| and |dest_plane_p| (where p is y, u, or |
| // v) may point to the same buffer, in which case the film grain noise is added |
| // in place. |
| // |film_grain_params| are parameters read from frame header. |
| // |is_monochrome| is true indicates only Y plane needs to be processed. |
| // |color_matrix_is_identity| is true if the matrix_coefficients field in the |
| // sequence header's color config is is MC_IDENTITY. |
| // |width| is the upscaled width of the frame. |
| // |height| is the frame height. |
| // |subsampling_x| and |subsampling_y| are subsamplings for UV planes, not used |
| // if |is_monochrome| is true. |
| // Returns true on success, or false on failure (e.g., out of memory). |
| using FilmGrainSynthesisFunc = bool (*)( |
| const void* source_plane_y, ptrdiff_t source_stride_y, |
| const void* source_plane_u, ptrdiff_t source_stride_u, |
| const void* source_plane_v, ptrdiff_t source_stride_v, |
| const FilmGrainParams& film_grain_params, bool is_monochrome, |
| bool color_matrix_is_identity, int width, int height, int subsampling_x, |
| int subsampling_y, void* dest_plane_y, ptrdiff_t dest_stride_y, |
| void* dest_plane_u, ptrdiff_t dest_stride_u, void* dest_plane_v, |
| ptrdiff_t dest_stride_v); |
| //------------------------------------------------------------------------------ |
| |
| struct Dsp { |
| IntraPredictorFuncs intra_predictors; |
| DirectionalIntraPredictorZone1Func directional_intra_predictor_zone1; |
| DirectionalIntraPredictorZone2Func directional_intra_predictor_zone2; |
| DirectionalIntraPredictorZone3Func directional_intra_predictor_zone3; |
| FilterIntraPredictorFunc filter_intra_predictor; |
| CflIntraPredictorFuncs cfl_intra_predictors; |
| CflSubsamplerFuncs cfl_subsamplers; |
| IntraEdgeFilterFunc intra_edge_filter; |
| IntraEdgeUpsamplerFunc intra_edge_upsampler; |
| InverseTransformAddFuncs inverse_transforms; |
| LoopFilterFuncs loop_filters; |
| CdefDirectionFunc cdef_direction; |
| CdefFilteringFunc cdef_filter; |
| LoopRestorationFuncs loop_restorations; |
| ConvolveFuncs convolve; |
| ConvolveScaleFuncs convolve_scale; |
| AverageBlendFunc average_blend; |
| DistanceWeightedBlendFunc distance_weighted_blend; |
| MaskBlendFuncs mask_blend; |
| ObmcBlendFuncs obmc_blend; |
| WarpFunc warp; |
| FilmGrainSynthesisFunc film_grain_synthesis; |
| }; |
| |
| // Initializes function pointers based on build config and runtime environment. |
| // Must be called once before first use. This function is thread-safe. |
| void DspInit(); |
| |
| // Returns the appropriate Dsp table for |bitdepth| or nullptr if one doesn't |
| // exist. |
| const Dsp* GetDspTable(int bitdepth); |
| |
| } // namespace dsp |
| |
| namespace dsp_internal { |
| |
| // Returns true if a more highly optimized version of |func| is not defined for |
| // the associated bitdepth or if it is forcibly enabled with |
| // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS. The define checked for |func| corresponds |
| // to the LIBGAV1_Dsp<bitdepth>bpp_|func| define in the header file associated |
| // with the module. |
| // |func| is one of: |
| // - FunctionName, e.g., SelfGuidedFilter. |
| // - [sub-table-index1][...-indexN] e.g., |
| // TransformSize4x4_IntraPredictorDc. The indices correspond to enum values |
| // used as lookups with leading 'k' removed. |
| // |
| // NEON support is the only extension available for ARM and it is always |
| // required. Because of this restriction DSP_ENABLED_8BPP_NEON(func) is always |
| // true and can be omitted. |
| #define DSP_ENABLED_8BPP_SSE4_1(func) \ |
| (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \ |
| LIBGAV1_Dsp8bpp_##func == LIBGAV1_DSP_SSE4_1) |
| #define DSP_ENABLED_10BPP_SSE4_1(func) \ |
| (LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS || \ |
| LIBGAV1_Dsp10bpp_##func == LIBGAV1_DSP_SSE4_1) |
| |
| // Returns the appropriate Dsp table for |bitdepth| or nullptr if one doesn't |
| // exist. This version is meant for use by test or dsp/*Init() functions only. |
| dsp::Dsp* GetWritableDspTable(int bitdepth); |
| |
| } // namespace dsp_internal |
| } // namespace libgav1 |
| |
| #endif // LIBGAV1_SRC_DSP_DSP_H_ |