blob: 079e7f88792eba5b508dcf61195d2e807a65cd05 [file] [log] [blame]
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_
#define WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_
#include "webrtc/common_types.h"
#include "webrtc/typedefs.h"
/******************************************************/
/* Quality Modes: Resolution and Robustness settings */
/******************************************************/
namespace webrtc {
struct VideoContentMetrics;
struct VCMResolutionScale {
VCMResolutionScale()
: codec_width(640),
codec_height(480),
frame_rate(30.0f),
spatial_width_fact(1.0f),
spatial_height_fact(1.0f),
temporal_fact(1.0f),
change_resolution_spatial(false),
change_resolution_temporal(false) {
}
uint16_t codec_width;
uint16_t codec_height;
float frame_rate;
float spatial_width_fact;
float spatial_height_fact;
float temporal_fact;
bool change_resolution_spatial;
bool change_resolution_temporal;
};
enum ImageType {
kQCIF = 0, // 176x144
kHCIF, // 264x216 = half(~3/4x3/4) CIF.
kQVGA, // 320x240 = quarter VGA.
kCIF, // 352x288
kHVGA, // 480x360 = half(~3/4x3/4) VGA.
kVGA, // 640x480
kQFULLHD, // 960x540 = quarter FULLHD, and half(~3/4x3/4) WHD.
kWHD, // 1280x720
kFULLHD, // 1920x1080
kNumImageTypes
};
const uint32_t kSizeOfImageType[kNumImageTypes] =
{ 25344, 57024, 76800, 101376, 172800, 307200, 518400, 921600, 2073600 };
enum FrameRateLevelClass {
kFrameRateLow,
kFrameRateMiddle1,
kFrameRateMiddle2,
kFrameRateHigh
};
enum ContentLevelClass {
kLow,
kHigh,
kDefault
};
struct VCMContFeature {
VCMContFeature()
: value(0.0f),
level(kDefault) {
}
void Reset() {
value = 0.0f;
level = kDefault;
}
float value;
ContentLevelClass level;
};
enum UpDownAction {
kUpResolution,
kDownResolution
};
enum SpatialAction {
kNoChangeSpatial,
kOneHalfSpatialUniform, // 3/4 x 3/4: 9/6 ~1/2 pixel reduction.
kOneQuarterSpatialUniform, // 1/2 x 1/2: 1/4 pixel reduction.
kNumModesSpatial
};
enum TemporalAction {
kNoChangeTemporal,
kTwoThirdsTemporal, // 2/3 frame rate reduction
kOneHalfTemporal, // 1/2 frame rate reduction
kNumModesTemporal
};
struct ResolutionAction {
ResolutionAction()
: spatial(kNoChangeSpatial),
temporal(kNoChangeTemporal) {
}
SpatialAction spatial;
TemporalAction temporal;
};
// Down-sampling factors for spatial (width and height), and temporal.
const float kFactorWidthSpatial[kNumModesSpatial] =
{ 1.0f, 4.0f / 3.0f, 2.0f };
const float kFactorHeightSpatial[kNumModesSpatial] =
{ 1.0f, 4.0f / 3.0f, 2.0f };
const float kFactorTemporal[kNumModesTemporal] =
{ 1.0f, 1.5f, 2.0f };
enum EncoderState {
kStableEncoding, // Low rate mis-match, stable buffer levels.
kStressedEncoding, // Significant over-shooting of target rate,
// Buffer under-flow, etc.
kEasyEncoding // Significant under-shooting of target rate.
};
// QmMethod class: main class for resolution and robustness settings
class VCMQmMethod {
public:
VCMQmMethod();
virtual ~VCMQmMethod();
// Reset values
void ResetQM();
virtual void Reset() = 0;
// Compute content class.
uint8_t ComputeContentClass();
// Update with the content metrics.
void UpdateContent(const VideoContentMetrics* content_metrics);
// Compute spatial texture magnitude and level.
// Spatial texture is a spatial prediction error measure.
void ComputeSpatial();
// Compute motion magnitude and level for NFD metric.
// NFD is normalized frame difference (normalized by spatial variance).
void ComputeMotionNFD();
// Get the imageType (CIF, VGA, HD, etc) for the system width/height.
ImageType GetImageType(uint16_t width, uint16_t height);
// Return the closest image type.
ImageType FindClosestImageType(uint16_t width, uint16_t height);
// Get the frame rate level.
FrameRateLevelClass FrameRateLevel(float frame_rate);
protected:
// Content Data.
const VideoContentMetrics* content_metrics_;
// Encoder frame sizes and native frame sizes.
uint16_t width_;
uint16_t height_;
float user_frame_rate_;
uint16_t native_width_;
uint16_t native_height_;
float native_frame_rate_;
float aspect_ratio_;
// Image type and frame rate leve, for the current encoder resolution.
ImageType image_type_;
FrameRateLevelClass framerate_level_;
// Content class data.
VCMContFeature motion_;
VCMContFeature spatial_;
uint8_t content_class_;
bool init_;
};
// Resolution settings class
class VCMQmResolution : public VCMQmMethod {
public:
VCMQmResolution();
virtual ~VCMQmResolution();
// Reset all quantities.
virtual void Reset();
// Reset rate quantities and counters after every SelectResolution() call.
void ResetRates();
// Reset down-sampling state.
void ResetDownSamplingState();
// Get the encoder state.
EncoderState GetEncoderState();
// Initialize after SetEncodingData in media_opt.
int Initialize(float bitrate,
float user_framerate,
uint16_t width,
uint16_t height,
int num_layers);
// Update the encoder frame size.
void UpdateCodecParameters(float frame_rate, uint16_t width, uint16_t height);
// Update with actual bit rate (size of the latest encoded frame)
// and frame type, after every encoded frame.
void UpdateEncodedSize(size_t encoded_size);
// Update with new target bitrate, actual encoder sent rate, frame_rate,
// loss rate: every ~1 sec from SetTargetRates in media_opt.
void UpdateRates(float target_bitrate,
float encoder_sent_rate,
float incoming_framerate,
uint8_t packet_loss);
// Extract ST (spatio-temporal) resolution action.
// Inputs: qm: Reference to the quality modes pointer.
// Output: the spatial and/or temporal scale change.
int SelectResolution(VCMResolutionScale** qm);
private:
// Set the default resolution action.
void SetDefaultAction();
// Compute rates for the selection of down-sampling action.
void ComputeRatesForSelection();
// Compute the encoder state.
void ComputeEncoderState();
// Return true if the action is to go back up in resolution.
bool GoingUpResolution();
// Return true if the action is to go down in resolution.
bool GoingDownResolution();
// Check the condition for going up in resolution by the scale factors:
// |facWidth|, |facHeight|, |facTemp|.
// |scaleFac| is a scale factor for the transition rate.
bool ConditionForGoingUp(float fac_width,
float fac_height,
float fac_temp,
float scale_fac);
// Get the bitrate threshold for the resolution action.
// The case |facWidth|=|facHeight|=|facTemp|==1 is for down-sampling action.
// |scaleFac| is a scale factor for the transition rate.
float GetTransitionRate(float fac_width,
float fac_height,
float fac_temp,
float scale_fac);
// Update the down-sampling state.
void UpdateDownsamplingState(UpDownAction up_down);
// Update the codec frame size and frame rate.
void UpdateCodecResolution();
// Return a state based on average target rate relative transition rate.
uint8_t RateClass(float transition_rate);
// Adjust the action selected from the table.
void AdjustAction();
// Covert 2 stages of 3/4 (=9/16) spatial decimation to 1/2.
void ConvertSpatialFractionalToWhole();
// Returns true if the new frame sizes, under the selected spatial action,
// are of even size.
bool EvenFrameSize();
// Insert latest down-sampling action into the history list.
void InsertLatestDownAction();
// Remove the last (first element) down-sampling action from the list.
void RemoveLastDownAction();
// Check constraints on the amount of down-sampling allowed.
void ConstrainAmountOfDownSampling();
// For going up in resolution: pick spatial or temporal action,
// if both actions were separately selected.
void PickSpatialOrTemporal();
// Select the directional (1x2 or 2x1) spatial down-sampling action.
void SelectSpatialDirectionMode(float transition_rate);
enum { kDownActionHistorySize = 10};
VCMResolutionScale* qm_;
// Encoder rate control parameters.
float target_bitrate_;
float incoming_framerate_;
float per_frame_bandwidth_;
float buffer_level_;
// Data accumulated every ~1sec from MediaOpt.
float sum_target_rate_;
float sum_incoming_framerate_;
float sum_rate_MM_;
float sum_rate_MM_sgn_;
float sum_packet_loss_;
// Counters.
uint32_t frame_cnt_;
uint32_t frame_cnt_delta_;
uint32_t update_rate_cnt_;
uint32_t low_buffer_cnt_;
// Resolution state parameters.
float state_dec_factor_spatial_;
float state_dec_factor_temporal_;
// Quantities used for selection.
float avg_target_rate_;
float avg_incoming_framerate_;
float avg_ratio_buffer_low_;
float avg_rate_mismatch_;
float avg_rate_mismatch_sgn_;
float avg_packet_loss_;
EncoderState encoder_state_;
ResolutionAction action_;
// Short history of the down-sampling actions from the Initialize() state.
// This is needed for going up in resolution. Since the total amount of
// down-sampling actions are constrained, the length of the list need not be
// large: i.e., (4/3) ^{kDownActionHistorySize} <= kMaxDownSample.
ResolutionAction down_action_history_[kDownActionHistorySize];
int num_layers_;
};
// Robustness settings class.
class VCMQmRobustness : public VCMQmMethod {
public:
VCMQmRobustness();
~VCMQmRobustness();
virtual void Reset();
// Adjust FEC rate based on content: every ~1 sec from SetTargetRates.
// Returns an adjustment factor.
float AdjustFecFactor(uint8_t code_rate_delta,
float total_rate,
float framerate,
int64_t rtt_time,
uint8_t packet_loss);
// Set the UEP protection on/off.
bool SetUepProtection(uint8_t code_rate_delta,
float total_rate,
uint8_t packet_loss,
bool frame_type);
private:
// Previous state of network parameters.
float prev_total_rate_;
int64_t prev_rtt_time_;
uint8_t prev_packet_loss_;
uint8_t prev_code_rate_delta_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_VIDEO_CODING_QM_SELECT_H_