cricket::VideoAdapter: Drop frames before spending time converting/scaling, not after.

In VideoCapturer::OnFrameCaptured, we currently convert cricket::CapturedFrame to cricket::VideoFrame and then send that to VideoAdapter::AdaptFrame. AdaptFrame may then decide to drop the frame. It would be faster to drop the frame before converting to cricket::VideoFrame.

This CL refactors VideoAdapter with a new function AdaptFrameResolution that takes captured resolution as input and output adapted resolution, or 0x0 if the frame should be dropped. Using that function, frames can be dropped before any conversion takes place.

R=fbarchard@google.com, perkj@webrtc.org, tommi@webrtc.org

Committed: https://code.google.com/p/webrtc/source/detail?r=7702

Committed: https://code.google.com/p/webrtc/source/detail?r=7707

Review URL: https://webrtc-codereview.appspot.com/29949004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@7721 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/talk/libjingle.gyp b/talk/libjingle.gyp
index 90d1f41..7405324 100755
--- a/talk/libjingle.gyp
+++ b/talk/libjingle.gyp
@@ -377,6 +377,7 @@
         'media/base/videocommon.h',
         'media/base/videoframe.cc',
         'media/base/videoframe.h',
+        'media/base/videoframefactory.cc',
         'media/base/videoframefactory.h',
         'media/base/videoprocessor.h',
         'media/base/videorenderer.h',
diff --git a/talk/media/base/videoadapter.cc b/talk/media/base/videoadapter.cc
index f8dcd38..7fdf254 100644
--- a/talk/media/base/videoadapter.cc
+++ b/talk/media/base/videoadapter.cc
@@ -144,7 +144,9 @@
 
 // There are several frame sizes used by Adapter.  This explains them
 // input_format - set once by server to frame size expected from the camera.
+//   The input frame size is also updated in every call to AdaptFrame.
 // output_format - size that output would like to be.  Includes framerate.
+//   The output frame size is also updated in every call to AdaptFrame.
 // output_num_pixels - size that output should be constrained to.  Used to
 //   compute output_format from in_frame.
 // in_frame - actual camera captured frame size, which is typically the same
@@ -250,6 +252,11 @@
   black_output_ = black;
 }
 
+bool VideoAdapter::IsBlackOutput() {
+  rtc::CritScope cs(&critical_section_);
+  return black_output_;
+}
+
 // Constrain output resolution to this many pixels overall
 void VideoAdapter::SetOutputNumPixels(int num_pixels) {
   output_num_pixels_ = num_pixels;
@@ -259,21 +266,12 @@
   return output_num_pixels_;
 }
 
-// TODO(fbarchard): Add AdaptFrameRate function that only drops frames but
-// not resolution.
-bool VideoAdapter::AdaptFrame(VideoFrame* in_frame,
-                              VideoFrame** out_frame) {
+VideoFormat VideoAdapter::AdaptFrameResolution(int in_width, int in_height) {
   rtc::CritScope cs(&critical_section_);
-  if (!in_frame || !out_frame) {
-    return false;
-  }
   ++frames_in_;
 
-  // Update input to actual frame dimensions.
-  VideoFormat format(static_cast<int>(in_frame->GetWidth()),
-                     static_cast<int>(in_frame->GetHeight()),
-                     input_format_.interval, input_format_.fourcc);
-  SetInputFormat(format);
+  SetInputFormat(VideoFormat(
+      in_width, in_height, input_format_.interval, input_format_.fourcc));
 
   // Drop the input frame if necessary.
   bool should_drop = false;
@@ -303,48 +301,23 @@
                    << " / out " << frames_out_
                    << " / in " << frames_in_
                    << " Changes: " << adaption_changes_
-                   << " Input: " << in_frame->GetWidth()
-                   << "x" << in_frame->GetHeight()
+                   << " Input: " << in_width
+                   << "x" << in_height
                    << " i" << input_format_.interval
                    << " Output: i" << output_format_.interval;
     }
-    *out_frame = NULL;
-    return true;
+
+    return VideoFormat();  // Drop frame.
   }
 
-  float scale = 1.f;
-  if (output_num_pixels_ < input_format_.width * input_format_.height) {
-    scale = VideoAdapter::FindClosestViewScale(
-        static_cast<int>(in_frame->GetWidth()),
-        static_cast<int>(in_frame->GetHeight()),
-        output_num_pixels_);
-    output_format_.width = static_cast<int>(in_frame->GetWidth() * scale + .5f);
-    output_format_.height = static_cast<int>(in_frame->GetHeight() * scale +
-                                             .5f);
-  } else {
-    output_format_.width = static_cast<int>(in_frame->GetWidth());
-    output_format_.height = static_cast<int>(in_frame->GetHeight());
-  }
-
-  if (!black_output_ &&
-      in_frame->GetWidth() == static_cast<size_t>(output_format_.width) &&
-      in_frame->GetHeight() == static_cast<size_t>(output_format_.height)) {
-    // The dimensions are correct and we aren't muting, so use the input frame.
-    *out_frame = in_frame;
-  } else {
-    if (!StretchToOutputFrame(in_frame)) {
-      LOG(LS_VERBOSE) << "VAdapt Stretch Failed.";
-      return false;
-    }
-
-    *out_frame = output_frame_.get();
-  }
+  const float scale = VideoAdapter::FindClosestViewScale(
+      in_width, in_height, output_num_pixels_);
+  const int output_width = static_cast<int>(in_width * scale + .5f);
+  const int output_height = static_cast<int>(in_height * scale + .5f);
 
   ++frames_out_;
-  if (in_frame->GetWidth() != (*out_frame)->GetWidth() ||
-      in_frame->GetHeight() != (*out_frame)->GetHeight()) {
+  if (scale != 1)
     ++frames_scaled_;
-  }
   // Show VAdapt log every 90 frames output. (3 seconds)
   // TODO(fbarchard): Consider GetLogSeverity() to change interval to less
   // for LS_VERBOSE and more for LS_INFO.
@@ -354,8 +327,8 @@
   // resolution changes as well.  Consider dropping the statistics into their
   // own class which could be queried publically.
   bool changed = false;
-  if (previous_width_ && (previous_width_ != (*out_frame)->GetWidth() ||
-      previous_height_ != (*out_frame)->GetHeight())) {
+  if (previous_width_ && (previous_width_ != output_width ||
+                          previous_height_ != output_height)) {
     show = true;
     ++adaption_changes_;
     changed = true;
@@ -367,17 +340,53 @@
                  << " / out " << frames_out_
                  << " / in " << frames_in_
                  << " Changes: " << adaption_changes_
-                 << " Input: " << in_frame->GetWidth()
-                 << "x" << in_frame->GetHeight()
+                 << " Input: " << in_width
+                 << "x" << in_height
                  << " i" << input_format_.interval
                  << " Scale: " << scale
-                 << " Output: " << (*out_frame)->GetWidth()
-                 << "x" << (*out_frame)->GetHeight()
+                 << " Output: " << output_width
+                 << "x" << output_height
                  << " i" << output_format_.interval
                  << " Changed: " << (changed ? "true" : "false");
   }
-  previous_width_ = (*out_frame)->GetWidth();
-  previous_height_ = (*out_frame)->GetHeight();
+
+  output_format_.width = output_width;
+  output_format_.height = output_height;
+  previous_width_ = output_width;
+  previous_height_ = output_height;
+
+  return output_format_;
+}
+
+// TODO(fbarchard): Add AdaptFrameRate function that only drops frames but
+// not resolution.
+bool VideoAdapter::AdaptFrame(VideoFrame* in_frame, VideoFrame** out_frame) {
+  if (!in_frame || !out_frame)
+    return false;
+
+  const VideoFormat adapted_format =
+      AdaptFrameResolution(static_cast<int>(in_frame->GetWidth()),
+                           static_cast<int>(in_frame->GetHeight()));
+
+  rtc::CritScope cs(&critical_section_);
+  if (adapted_format.IsSize0x0()) {
+    *out_frame = NULL;
+    return true;
+  }
+
+  if (!black_output_ &&
+      in_frame->GetWidth() == static_cast<size_t>(adapted_format.width) &&
+      in_frame->GetHeight() == static_cast<size_t>(adapted_format.height)) {
+    // The dimensions are correct and we aren't muting, so use the input frame.
+    *out_frame = in_frame;
+  } else {
+    if (!StretchToOutputFrame(in_frame)) {
+      LOG(LS_VERBOSE) << "VAdapt Stretch Failed.";
+      return false;
+    }
+
+    *out_frame = output_frame_.get();
+  }
 
   return true;
 }
diff --git a/talk/media/base/videoadapter.h b/talk/media/base/videoadapter.h
index 0a9e27e..9aab3d2 100644
--- a/talk/media/base/videoadapter.h
+++ b/talk/media/base/videoadapter.h
@@ -56,6 +56,11 @@
   const VideoFormat& output_format();
   // If the parameter black is true, the adapted frames will be black.
   void SetBlackOutput(bool black);
+  bool IsBlackOutput();
+
+  // Return the adapted resolution given the input resolution. The returned
+  // resolution will be 0x0 if the frame should be dropped.
+  VideoFormat AdaptFrameResolution(int in_width, int in_height);
 
   // Adapt the input frame from the input format to the output format. Return
   // true and set the output frame to NULL if the input frame is dropped. Return
diff --git a/talk/media/base/videoadapter_unittest.cc b/talk/media/base/videoadapter_unittest.cc
index 04bf3d1..518c183 100755
--- a/talk/media/base/videoadapter_unittest.cc
+++ b/talk/media/base/videoadapter_unittest.cc
@@ -292,6 +292,52 @@
   EXPECT_GT(listener_->GetStats().dropped_frames, 0);
 }
 
+// Set a very high output pixel resolution. Expect no resolution change.
+TEST_F(VideoAdapterTest, AdaptFrameResolutionHighLimit) {
+  adapter_->SetOutputNumPixels(INT_MAX);
+  VideoFormat adapted_format = adapter_->AdaptFrameResolution(
+      capture_format_.width, capture_format_.height);
+  EXPECT_EQ(capture_format_.width, adapted_format.width);
+  EXPECT_EQ(capture_format_.height, adapted_format.height);
+
+  adapter_->SetOutputNumPixels(987654321);
+  adapted_format = capture_format_,
+  adapter_->AdaptFrameResolution(capture_format_.width, capture_format_.height);
+  EXPECT_EQ(capture_format_.width, adapted_format.width);
+  EXPECT_EQ(capture_format_.height, adapted_format.height);
+}
+
+// Adapt the frame resolution to be the same as capture resolution. Expect no
+// resolution change.
+TEST_F(VideoAdapterTest, AdaptFrameResolutionIdentical) {
+  adapter_->SetOutputFormat(capture_format_);
+  const VideoFormat adapted_format = adapter_->AdaptFrameResolution(
+      capture_format_.width, capture_format_.height);
+  EXPECT_EQ(capture_format_.width, adapted_format.width);
+  EXPECT_EQ(capture_format_.height, adapted_format.height);
+}
+
+// Adapt the frame resolution to be a quarter of the capture resolution. Expect
+// resolution change.
+TEST_F(VideoAdapterTest, AdaptFrameResolutionQuarter) {
+  VideoFormat request_format = capture_format_;
+  request_format.width /= 2;
+  request_format.height /= 2;
+  adapter_->SetOutputFormat(request_format);
+  const VideoFormat adapted_format = adapter_->AdaptFrameResolution(
+      request_format.width, request_format.height);
+  EXPECT_EQ(request_format.width, adapted_format.width);
+  EXPECT_EQ(request_format.height, adapted_format.height);
+}
+
+// Adapt the pixel resolution to 0. Expect frame drop.
+TEST_F(VideoAdapterTest, AdaptFrameResolutionDrop) {
+  adapter_->SetOutputNumPixels(0);
+  EXPECT_TRUE(
+      adapter_->AdaptFrameResolution(capture_format_.width,
+                                     capture_format_.height).IsSize0x0());
+}
+
 // Adapt the frame resolution to be a quarter of the capture resolution at the
 // beginning. Expect resolution change.
 TEST_F(VideoAdapterTest, AdaptResolution) {
diff --git a/talk/media/base/videocapturer.cc b/talk/media/base/videocapturer.cc
index c722012..3000038 100644
--- a/talk/media/base/videocapturer.cc
+++ b/talk/media/base/videocapturer.cc
@@ -482,8 +482,8 @@
   // adapter to better match ratio_w_ x ratio_h_.
   // Note that abs() of frame height is passed in, because source may be
   // inverted, but output will be positive.
-  int desired_width = captured_frame->width;
-  int desired_height = captured_frame->height;
+  int cropped_width = captured_frame->width;
+  int cropped_height = captured_frame->height;
 
   // TODO(fbarchard): Improve logic to pad or crop.
   // MJPG can crop vertically, but not horizontally.  This logic disables crop.
@@ -503,7 +503,21 @@
     ComputeCrop(ratio_w_, ratio_h_, captured_frame->width,
                 abs(captured_frame->height), captured_frame->pixel_width,
                 captured_frame->pixel_height, captured_frame->rotation,
-                &desired_width, &desired_height);
+                &cropped_width, &cropped_height);
+  }
+
+  int adapted_width = cropped_width;
+  int adapted_height = cropped_height;
+  if (enable_video_adapter_ && !IsScreencast()) {
+    const VideoFormat adapted_format =
+        video_adapter_.AdaptFrameResolution(cropped_width, cropped_height);
+    if (adapted_format.IsSize0x0()) {
+      // VideoAdapter dropped the frame.
+      ++adapt_frame_drops_;
+      return;
+    }
+    adapted_width = adapted_format.width;
+    adapted_height = adapted_format.height;
   }
 
   if (!frame_factory_) {
@@ -511,39 +525,29 @@
     return;
   }
 
-  rtc::scoped_ptr<VideoFrame> i420_frame(
-      frame_factory_->CreateAliasedFrame(
-          captured_frame, desired_width, desired_height));
-  if (!i420_frame) {
+  rtc::scoped_ptr<VideoFrame> adapted_frame(
+      frame_factory_->CreateAliasedFrame(captured_frame,
+                                         cropped_width, cropped_height,
+                                         adapted_width, adapted_height));
+
+  if (!adapted_frame) {
     // TODO(fbarchard): LOG more information about captured frame attributes.
     LOG(LS_ERROR) << "Couldn't convert to I420! "
                   << "From " << ToString(captured_frame) << " To "
-                  << desired_width << " x " << desired_height;
+                  << cropped_width << " x " << cropped_height;
     return;
   }
 
-  VideoFrame* adapted_frame = i420_frame.get();
-  if (enable_video_adapter_ && !IsScreencast()) {
-    VideoFrame* out_frame = NULL;
-    video_adapter_.AdaptFrame(adapted_frame, &out_frame);
-    if (!out_frame) {
-      // VideoAdapter dropped the frame.
-      ++adapt_frame_drops_;
-      return;
-    }
-    adapted_frame = out_frame;
-  }
-
-  if (!muted_ && !ApplyProcessors(adapted_frame)) {
+  if (!muted_ && !ApplyProcessors(adapted_frame.get())) {
     // Processor dropped the frame.
     ++effect_frame_drops_;
     return;
   }
-  if (muted_) {
+  if (muted_ || (enable_video_adapter_ && video_adapter_.IsBlackOutput())) {
     // TODO(pthatcher): Use frame_factory_->CreateBlackFrame() instead.
     adapted_frame->SetToBlack();
   }
-  SignalVideoFrame(this, adapted_frame);
+  SignalVideoFrame(this, adapted_frame.get());
 
   UpdateStats(captured_frame);
 }
diff --git a/talk/media/base/videoframefactory.cc b/talk/media/base/videoframefactory.cc
index b5a7919..9c01b1b 100644
--- a/talk/media/base/videoframefactory.cc
+++ b/talk/media/base/videoframefactory.cc
@@ -23,11 +23,46 @@
 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
-// This file is intentionally left empty. The purpose of this file is to add a
-// new file in libjingle without breaking Chromium in the process. The plan is
-// to do the following:
-// 1. Land a no-op videoframefactory.cc in webrtc (this file).
-// 2. Wait for it to roll into Chromium.
-// 3. Modify libjingle.gyp in Chromium to include this file.
-// 4. Make the real change in webrtc with the real implementation of this file.
-// 5. Wait for the change to roll into Chromium.
+
+#include "talk/media/base/videoframefactory.h"
+
+#include "talk/media/base/videocapturer.h"
+
+namespace cricket {
+
+VideoFrame* VideoFrameFactory::CreateAliasedFrame(
+    const CapturedFrame* input_frame,
+    int cropped_input_width,
+    int cropped_input_height,
+    int output_width,
+    int output_height) const {
+  rtc::scoped_ptr<VideoFrame> cropped_input_frame(CreateAliasedFrame(
+      input_frame, cropped_input_width, cropped_input_height));
+
+  if (cropped_input_width == output_width &&
+      cropped_input_height == output_height) {
+    // No scaling needed.
+    return cropped_input_frame.release();
+  }
+
+  // Create and stretch the output frame if it has not been created yet or its
+  // size is not same as the expected.
+  if (!output_frame_ ||
+      output_frame_->GetWidth() != static_cast<size_t>(output_width) ||
+      output_frame_->GetHeight() != static_cast<size_t>(output_height)) {
+    output_frame_.reset(
+        cropped_input_frame->Stretch(output_width, output_height, true, true));
+    if (!output_frame_) {
+      LOG(LS_WARNING) << "Failed to stretch frame to " << output_width << "x"
+                      << output_height;
+      return NULL;
+    }
+  } else {
+    cropped_input_frame->StretchToFrame(output_frame_.get(), true, true);
+    output_frame_->SetElapsedTime(cropped_input_frame->GetElapsedTime());
+    output_frame_->SetTimeStamp(cropped_input_frame->GetTimeStamp());
+  }
+  return output_frame_->Copy();
+}
+
+}  // namespace cricket
diff --git a/talk/media/base/videoframefactory.h b/talk/media/base/videoframefactory.h
old mode 100755
new mode 100644
index 301ec1c..efefd81
--- a/talk/media/base/videoframefactory.h
+++ b/talk/media/base/videoframefactory.h
@@ -27,6 +27,9 @@
 #ifndef TALK_MEDIA_BASE_VIDEOFRAMEFACTORY_H_
 #define TALK_MEDIA_BASE_VIDEOFRAMEFACTORY_H_
 
+#include "talk/media/base/videoframe.h"
+#include "webrtc/base/scoped_ptr.h"
+
 namespace cricket {
 
 struct CapturedFrame;
@@ -43,8 +46,28 @@
   // space allows for aliasing, otherwise a color conversion will
   // occur.  For safety, |input_frame| must outlive the returned
   // frame.  Returns NULL if conversion fails.
-  virtual VideoFrame* CreateAliasedFrame(
-      const CapturedFrame* input_frame, int width, int height) const = 0;
+
+  // The returned frame will be a center crop of |input_frame| with
+  // size |cropped_width| x |cropped_height|.
+  virtual VideoFrame* CreateAliasedFrame(const CapturedFrame* input_frame,
+                                         int cropped_width,
+                                         int cropped_height) const = 0;
+
+  // The returned frame will be a center crop of |input_frame| with size
+  // |cropped_width| x |cropped_height|, scaled to |output_width| x
+  // |output_height|. If scaling has taken place, i.e. cropped input
+  // resolution != output resolution, the returned frame will remain valid
+  // until this function is called again.
+  virtual VideoFrame* CreateAliasedFrame(const CapturedFrame* input_frame,
+                                         int cropped_input_width,
+                                         int cropped_input_height,
+                                         int output_width,
+                                         int output_height) const;
+
+ private:
+  // An internal frame buffer to avoid reallocations. It is mutable because it
+  // does not affect behaviour, only performance.
+  mutable rtc::scoped_ptr<VideoFrame> output_frame_;
 };
 
 }  // namespace cricket