| // Copyright 2020 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef CAST_STANDALONE_SENDER_STREAMING_VP8_ENCODER_H_ |
| #define CAST_STANDALONE_SENDER_STREAMING_VP8_ENCODER_H_ |
| |
| #include <vpx/vpx_encoder.h> |
| #include <vpx/vpx_image.h> |
| |
| #include <algorithm> |
| #include <condition_variable> // NOLINT |
| #include <functional> |
| #include <memory> |
| #include <mutex> |
| #include <queue> |
| #include <thread> |
| #include <vector> |
| |
| #include "absl/base/thread_annotations.h" |
| #include "cast/streaming/frame_id.h" |
| #include "cast/streaming/rtp_time.h" |
| #include "platform/api/task_runner.h" |
| #include "platform/api/time.h" |
| |
| namespace openscreen { |
| |
| class TaskRunner; |
| |
| namespace cast { |
| |
| class Sender; |
| |
| // Uses libvpx to encode VP8 video and streams it to a Sender. Includes |
| // extensive logic for fine-tuning the encoder parameters in real-time, to |
| // provide the best quality results given external, uncontrollable factors: |
| // CPU/network availability, and the complexity of the video frame content. |
| // |
| // Internally, a separate encode thread is created and used to prevent blocking |
| // the main thread while frames are being encoded. All public API methods are |
| // assumed to be called on the same sequence/thread as the main TaskRunner |
| // (injected via the constructor). |
| // |
| // Usage: |
| // |
| // 1. EncodeAndSend() is used to queue-up video frames for encoding and sending, |
| // which will be done on a best-effort basis. |
| // |
| // 2. The client is expected to call SetTargetBitrate() frequently based on its |
| // own bandwidth estimates and congestion control logic. In addition, a client |
| // may provide a callback for each frame's encode statistics, which can be used |
| // to further optimize the user experience. For example, the stats can be used |
| // as a signal to reduce the data volume (i.e., resolution and/or frame rate) |
| // coming from the video capture source. |
| class StreamingVp8Encoder { |
| public: |
| // Configurable parameters passed to the StreamingVp8Encoder constructor. |
| struct Parameters { |
| // Number of threads to parallelize frame encoding. This should be set based |
| // on the number of CPU cores available for encoding, but no more than 8. |
| int num_encode_threads = |
| std::min(std::max<int>(std::thread::hardware_concurrency(), 1), 8); |
| |
| // Best-quality quantizer (lower is better quality). Range: [0,63] |
| int min_quantizer = 4; |
| |
| // Worst-quality quantizer (lower is better quality). Range: [0,63] |
| int max_quantizer = 63; |
| |
| // Worst-quality quantizer to use when the CPU is extremely constrained. |
| // Range: [min_quantizer,max_quantizer] |
| int max_cpu_saver_quantizer = 25; |
| |
| // Maximum amount of wall-time a frame's encode can take, relative to the |
| // frame's duration, before the CPU-saver logic is activated. The default |
| // (70%) is appropriate for systems with four or more cores, but should be |
| // reduced (e.g., 50%) for systems with fewer than three cores. |
| // |
| // Example: For 30 FPS (continuous) video, the frame duration is ~33.3ms, |
| // and a value of 0.5 here would mean that the CPU-saver logic starts |
| // sacrificing quality when frame encodes start taking longer than ~16.7ms. |
| double max_time_utilization = 0.7; |
| }; |
| |
| // Represents an input VideoFrame, passed to EncodeAndSend(). |
| struct VideoFrame { |
| // Image width and height. |
| int width; |
| int height; |
| |
| // I420 format image pointers and row strides (the number of bytes between |
| // the start of successive rows). The pointers only need to remain valid |
| // until the EncodeAndSend() call returns. |
| const uint8_t* yuv_planes[3]; |
| int yuv_strides[3]; |
| |
| // How long this frame will be held before the next frame will be displayed, |
| // or zero if unknown. The frame duration is passed to the VP8 codec, |
| // affecting a number of important behaviors, including: per-frame |
| // bandwidth, CPU time spent encoding, temporal quality trade-offs, and |
| // key/golden/alt-ref frame generation intervals. |
| Clock::duration duration; |
| }; |
| |
| // Performance statistics for a single frame's encode. |
| // |
| // For full details on how to use these stats in an end-to-end system, see: |
| // https://www.chromium.org/developers/design-documents/ |
| // auto-throttled-screen-capture-and-mirroring |
| // and https://source.chromium.org/chromium/chromium/src/+/master: |
| // media/cast/sender/performance_metrics_overlay.h |
| struct Stats { |
| // The Cast Streaming ID that was assigned to the frame. |
| FrameId frame_id; |
| |
| // The RTP timestamp of the frame. |
| RtpTimeTicks rtp_timestamp; |
| |
| // How long the frame took to encode. This is wall time, not CPU time or |
| // some other load metric. |
| Clock::duration encode_wall_time; |
| |
| // The frame's predicted duration; or, the actual duration if it was |
| // provided in the VideoFrame. |
| Clock::duration frame_duration; |
| |
| // The encoded frame's size in bytes. |
| int encoded_size; |
| |
| // The average size of an encoded frame in bytes, having this |
| // |frame_duration| and current target bitrate. |
| double target_size; |
| |
| // The actual quantizer the VP8 encoder used, in the range [0,63]. |
| int quantizer; |
| |
| // The "hindsight" quantizer value that would have produced the best quality |
| // encoding of the frame at the current target bitrate. The nominal range is |
| // [0.0,63.0]. If it is larger than 63.0, then it was impossible for VP8 to |
| // encode the frame within the current target bitrate (e.g., too much |
| // "entropy" in the image, or too low a target bitrate). |
| double perfect_quantizer; |
| |
| // Utilization feedback metrics. The nominal range for each of these is |
| // [0.0,1.0] where 1.0 means "the entire budget available for the frame was |
| // exhausted." Going above 1.0 is okay for one or a few frames, since it's |
| // the average over many frames that matters before the system is considered |
| // "redlining." |
| // |
| // The max of these three provides an overall utilization control signal. |
| // The usual approach is for upstream control logic to increase/decrease the |
| // data volume (e.g., video resolution and/or frame rate) to maintain a good |
| // target point. |
| double time_utilization() const { |
| return static_cast<double>(encode_wall_time.count()) / |
| frame_duration.count(); |
| } |
| double space_utilization() const { return encoded_size / target_size; } |
| double entropy_utilization() const { |
| return perfect_quantizer / kMaxQuantizer; |
| } |
| }; |
| |
| StreamingVp8Encoder(const Parameters& params, |
| TaskRunner* task_runner, |
| Sender* sender); |
| |
| ~StreamingVp8Encoder(); |
| |
| // Get/Set the target bitrate. This may be changed at any time, as frequently |
| // as desired, and it will take effect internally as soon as possible. |
| int GetTargetBitrate() const; |
| void SetTargetBitrate(int new_bitrate); |
| |
| // Encode |frame| using the VP8 encoder, assemble an EncodedFrame, and enqueue |
| // into the Sender. The frame may be dropped if too many frames are in-flight. |
| // If provided, the |stats_callback| is run after the frame is enqueued in the |
| // Sender (via the main TaskRunner). |
| void EncodeAndSend(const VideoFrame& frame, |
| Clock::time_point reference_time, |
| std::function<void(Stats)> stats_callback); |
| |
| static constexpr int kMinQuantizer = 0; |
| static constexpr int kMaxQuantizer = 63; |
| |
| private: |
| // Syntactic convenience to wrap the vpx_image_t alloc/free API in a smart |
| // pointer. |
| struct VpxImageDeleter { |
| void operator()(vpx_image_t* ptr) const { vpx_img_free(ptr); } |
| }; |
| using VpxImageUniquePtr = std::unique_ptr<vpx_image_t, VpxImageDeleter>; |
| |
| // Represents the state of one frame encode. This is created in |
| // EncodeAndSend(), and passed to the encode thread via the |encode_queue_|. |
| struct WorkUnit { |
| VpxImageUniquePtr image; |
| Clock::duration duration; |
| Clock::time_point reference_time; |
| RtpTimeTicks rtp_timestamp; |
| std::function<void(Stats)> stats_callback; |
| }; |
| |
| // Same as WorkUnit, but with additional fields to carry the encode results. |
| struct WorkUnitWithResults : public WorkUnit { |
| std::vector<uint8_t> payload; |
| bool is_key_frame; |
| Stats stats; |
| }; |
| |
| bool is_encoder_initialized() const { return config_.g_threads != 0; } |
| |
| // Destroys the VP8 encoder context if it has been initialized. |
| void DestroyEncoder(); |
| |
| // The procedure for the |encode_thread_| that loops, processing work units |
| // from the |encode_queue_| by calling Encode() until it's time to end the |
| // thread. |
| void ProcessWorkUnitsUntilTimeToQuit(); |
| |
| // If the |encoder_| is live, attempt reconfiguration to allow it to encode |
| // frames at a new frame size, target bitrate, or "CPU encoding speed." If |
| // reconfiguration is not possible, destroy the existing instance and |
| // re-create a new |encoder_| instance. |
| void PrepareEncoder(int width, int height, int target_bitrate); |
| |
| // Wraps the complex libvpx vpx_codec_encode() call using inputs from |
| // |work_unit| and populating results there. |
| void EncodeFrame(bool force_key_frame, WorkUnitWithResults* work_unit); |
| |
| // Computes and populates |work_unit.stats| after the last call to |
| // EncodeFrame(). |
| void ComputeFrameEncodeStats(Clock::duration encode_wall_time, |
| int target_bitrate, |
| WorkUnitWithResults* work_unit); |
| |
| // Updates the |ideal_speed_setting_|, to take effect with the next frame |
| // encode, based on the given performance |stats|. |
| void UpdateSpeedSettingForNextFrame(const Stats& stats); |
| |
| // Assembles and enqueues an EncodedFrame with the Sender on the main thread. |
| void SendEncodedFrame(WorkUnitWithResults results); |
| |
| // Allocates a vpx_image_t and copies the content from |frame| to it. |
| static VpxImageUniquePtr CloneAsVpxImage(const VideoFrame& frame); |
| |
| const Parameters params_; |
| TaskRunner* const main_task_runner_; |
| Sender* const sender_; |
| |
| // The reference time of the first frame passed to EncodeAndSend(). |
| Clock::time_point start_time_ = Clock::time_point::min(); |
| |
| // The RTP timestamp of the last frame that was pushed into the |
| // |encode_queue_| by EncodeAndSend(). This is used to check whether |
| // timestamps are monotonically increasing. |
| RtpTimeTicks last_enqueued_rtp_timestamp_; |
| |
| // Guards a few members shared by both the main and encode threads. |
| std::mutex mutex_; |
| |
| // Used by the encode thread to sleep until more work is available. |
| std::condition_variable cv_ ABSL_GUARDED_BY(mutex_); |
| |
| // These encode parameters not passed in the WorkUnit struct because it is |
| // desirable for them to be applied as soon as possible, with the very next |
| // WorkUnit popped from the |encode_queue_| on the encode thread, and not to |
| // wait until some later WorkUnit is processed. |
| bool needs_key_frame_ ABSL_GUARDED_BY(mutex_) = true; |
| int target_bitrate_ ABSL_GUARDED_BY(mutex_) = 2 << 20; // Default: 2 Mbps. |
| |
| // The queue of frame encodes. The size of this queue is implicitly bounded by |
| // EncodeAndSend(), where it checks for the total in-flight media duration and |
| // maybe drops a frame. |
| std::queue<WorkUnit> encode_queue_ ABSL_GUARDED_BY(mutex_); |
| |
| // Current VP8 encoder configuration. Most of the fields are unchanging, and |
| // are populated in the ctor; but thereafter, only the encode thread accesses |
| // this struct. |
| // |
| // The speed setting is controlled via a separate libvpx API (see members |
| // below). |
| vpx_codec_enc_cfg_t config_{}; |
| |
| // These represent the magnitude of the VP8 speed setting, where larger values |
| // (i.e., faster speed) request less CPU usage but will provide lower video |
| // quality. Only the encode thread accesses these. |
| double ideal_speed_setting_; // A time-weighted average, from measurements. |
| int current_speed_setting_; // Current |encoder_| speed setting. |
| |
| // libvpx VP8 encoder instance. Only the encode thread accesses this. |
| vpx_codec_ctx_t encoder_; |
| |
| // This member should be last in the class since the thread should not start |
| // until all above members have been initialized by the constructor. |
| std::thread encode_thread_; |
| }; |
| |
| } // namespace cast |
| } // namespace openscreen |
| |
| #endif // CAST_STANDALONE_SENDER_STREAMING_VP8_ENCODER_H_ |