Add h.264 AVC SPS parsing for resolution (re-land)

Re-land of noharic@'s CL at  https://webrtc-codereview.appspot.com/48129004
which was reverted due to a Mac compile error which most
likely was a Goma flake (it passed on all trybots).

TBR=stefan@webrtc.org, noharic@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/44329005

Cr-Commit-Position: refs/heads/master@{#9079}
diff --git a/webrtc/modules/modules.gyp b/webrtc/modules/modules.gyp
index 771e672..5fbd501 100644
--- a/webrtc/modules/modules.gyp
+++ b/webrtc/modules/modules.gyp
@@ -223,6 +223,7 @@
             'rtp_rtcp/source/fec_receiver_unittest.cc',
             'rtp_rtcp/source/fec_test_helper.cc',
             'rtp_rtcp/source/fec_test_helper.h',
+            'rtp_rtcp/source/h264_sps_parser_unittest.cc',
             'rtp_rtcp/source/nack_rtx_unittest.cc',
             'rtp_rtcp/source/producer_fec_unittest.cc',
             'rtp_rtcp/source/receive_statistics_unittest.cc',
diff --git a/webrtc/modules/rtp_rtcp/BUILD.gn b/webrtc/modules/rtp_rtcp/BUILD.gn
index 528637f..bbf69f4 100644
--- a/webrtc/modules/rtp_rtcp/BUILD.gn
+++ b/webrtc/modules/rtp_rtcp/BUILD.gn
@@ -67,6 +67,8 @@
     "source/forward_error_correction.h",
     "source/forward_error_correction_internal.cc",
     "source/forward_error_correction_internal.h",
+    "source/h264_sps_parser.cc",
+    "source/h264_sps_parser.h",
     "source/producer_fec.cc",
     "source/producer_fec.h",
     "source/rtp_packet_history.cc",
diff --git a/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi b/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi
index 7a144e4..e73b43a 100644
--- a/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi
+++ b/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi
@@ -74,6 +74,8 @@
         'source/forward_error_correction.h',
         'source/forward_error_correction_internal.cc',
         'source/forward_error_correction_internal.h',
+        'source/h264_sps_parser.cc',
+        'source/h264_sps_parser.h',
         'source/producer_fec.cc',
         'source/producer_fec.h',
         'source/rtp_packet_history.cc',
diff --git a/webrtc/modules/rtp_rtcp/source/h264_sps_parser.cc b/webrtc/modules/rtp_rtcp/source/h264_sps_parser.cc
new file mode 100644
index 0000000..aa9d3f3
--- /dev/null
+++ b/webrtc/modules/rtp_rtcp/source/h264_sps_parser.cc
@@ -0,0 +1,226 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h"
+
+#include "webrtc/base/bytebuffer.h"
+#include "webrtc/base/bitbuffer.h"
+#include "webrtc/system_wrappers/interface/logging.h"
+
+#define RETURN_FALSE_ON_FAIL(x) \
+  if (!(x)) {                   \
+    return false;               \
+  }
+
+namespace webrtc {
+
+H264SpsParser::H264SpsParser(const uint8* sps, size_t byte_length)
+    : sps_(sps), byte_length_(byte_length), width_(), height_() {
+}
+
+bool H264SpsParser::Parse() {
+  // General note: this is based off the 02/2014 version of the H.264 standard.
+  // You can find it on this page:
+  // http://www.itu.int/rec/T-REC-H.264
+
+  const char* sps_bytes = reinterpret_cast<const char*>(sps_);
+  // First, parse out rbsp, which is basically the source buffer minus emulation
+  // bytes (0x03). RBSP is defined in section 7.3.1 of the H.264 standard.
+  rtc::ByteBuffer rbsp_buffer;
+  for (size_t i = 0; i < byte_length_;) {
+    if (i < byte_length_ - 3 && sps_[i + 3] == 3) {
+      // Two rbsp bytes + the emulation byte.
+      rbsp_buffer.WriteBytes(sps_bytes + i, 2);
+      i += 3;
+    } else {
+      // Single rbsp byte.
+      rbsp_buffer.WriteBytes(sps_bytes + i, 1);
+      i++;
+    }
+  }
+
+  // Now, we need to use a bit buffer to parse through the actual AVC SPS
+  // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the
+  // H.264 standard for a complete description.
+  // Since we only care about resolution, we ignore the majority of fields, but
+  // we still have to actively parse through a lot of the data, since many of
+  // the fields have variable size.
+  // We're particularly interested in:
+  // chroma_format_idc -> affects crop units
+  // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16).
+  // frame_crop_*_offset -> crop information
+  rtc::BitBuffer parser(reinterpret_cast<const uint8*>(rbsp_buffer.Data()),
+                        rbsp_buffer.Length());
+
+  // The golomb values we have to read, not just consume.
+  uint32 golomb_ignored;
+
+  // separate_colour_plane_flag is optional (assumed 0), but has implications
+  // about the ChromaArrayType, which modifies how we treat crop coordinates.
+  uint32 separate_colour_plane_flag = 0;
+  // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is
+  // 0. It defaults to 1, when not specified.
+  uint32 chroma_format_idc = 1;
+
+  // profile_idc: u(8). We need it to determine if we need to read/skip chroma
+  // formats.
+  uint8 profile_idc;
+  RETURN_FALSE_ON_FAIL(parser.ReadUInt8(&profile_idc));
+  // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits
+  // 1 bit each for the flags + 2 bits = 8 bits = 1 byte.
+  RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1));
+  // level_idc: u(8)
+  RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1));
+  // seq_parameter_set_id: ue(v)
+  RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
+  // See if profile_idc has chroma format information.
+  if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 ||
+      profile_idc == 244 || profile_idc == 44 || profile_idc == 83 ||
+      profile_idc == 86 || profile_idc == 118 || profile_idc == 128 ||
+      profile_idc == 138 || profile_idc == 139 || profile_idc == 134) {
+    // chroma_format_idc: ue(v)
+    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&chroma_format_idc));
+    if (chroma_format_idc == 3) {
+      // separate_colour_plane_flag: u(1)
+      RETURN_FALSE_ON_FAIL(parser.ReadBits(&separate_colour_plane_flag, 1));
+    }
+    // bit_depth_luma_minus8: ue(v)
+    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
+    // bit_depth_chroma_minus8: ue(v)
+    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
+    // qpprime_y_zero_transform_bypass_flag: u(1)
+    RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
+    // seq_scaling_matrix_present_flag: u(1)
+    uint32 seq_scaling_matrix_present_flag;
+    RETURN_FALSE_ON_FAIL(parser.ReadBits(&seq_scaling_matrix_present_flag, 1));
+    if (seq_scaling_matrix_present_flag) {
+      // seq_scaling_list_present_flags. Either 8 or 12, depending on
+      // chroma_format_idc.
+      uint32 seq_scaling_list_present_flags;
+      if (chroma_format_idc != 3) {
+        RETURN_FALSE_ON_FAIL(
+            parser.ReadBits(&seq_scaling_list_present_flags, 8));
+      } else {
+        RETURN_FALSE_ON_FAIL(
+            parser.ReadBits(&seq_scaling_list_present_flags, 12));
+      }
+      // We don't support reading the sequence scaling list, and we don't really
+      // see/use them in practice, so we'll just reject the full sps if we see
+      // any provided.
+      if (seq_scaling_list_present_flags > 0) {
+        LOG(LS_WARNING) << "SPS contains scaling lists, which are unsupported.";
+        return false;
+      }
+    }
+  }
+  // log2_max_frame_num_minus4: ue(v)
+  RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
+  // pic_order_cnt_type: ue(v)
+  uint32 pic_order_cnt_type;
+  RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_order_cnt_type));
+  if (pic_order_cnt_type == 0) {
+    // log2_max_pic_order_cnt_lsb_minus4: ue(v)
+    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
+  } else if (pic_order_cnt_type == 1) {
+    // delta_pic_order_always_zero_flag: u(1)
+    RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
+    // offset_for_non_ref_pic: se(v)
+    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
+    // offset_for_top_to_bottom_field: se(v)
+    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
+    // num_ref_frames_in_pic_order_cnt_cycle: ue(v)
+    uint32 num_ref_frames_in_pic_order_cnt_cycle;
+    RETURN_FALSE_ON_FAIL(
+        parser.ReadExponentialGolomb(&num_ref_frames_in_pic_order_cnt_cycle));
+    for (size_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) {
+      // offset_for_ref_frame[i]: se(v)
+      RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
+    }
+  }
+  // max_num_ref_frames: ue(v)
+  RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
+  // gaps_in_frame_num_value_allowed_flag: u(1)
+  RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
+  //
+  // IMPORTANT ONES! Now we're getting to resolution. First we read the pic
+  // width/height in macroblocks (16x16), which gives us the base resolution,
+  // and then we continue on until we hit the frame crop offsets, which are used
+  // to signify resolutions that aren't multiples of 16.
+  //
+  // pic_width_in_mbs_minus1: ue(v)
+  uint32 pic_width_in_mbs_minus1;
+  RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_width_in_mbs_minus1));
+  // pic_height_in_map_units_minus1: ue(v)
+  uint32 pic_height_in_map_units_minus1;
+  RETURN_FALSE_ON_FAIL(
+      parser.ReadExponentialGolomb(&pic_height_in_map_units_minus1));
+  // frame_mbs_only_flag: u(1)
+  uint32 frame_mbs_only_flag;
+  RETURN_FALSE_ON_FAIL(parser.ReadBits(&frame_mbs_only_flag, 1));
+  if (!frame_mbs_only_flag) {
+    // mb_adaptive_frame_field_flag: u(1)
+    RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
+  }
+  // direct_8x8_inference_flag: u(1)
+  RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
+  //
+  // MORE IMPORTANT ONES! Now we're at the frame crop information.
+  //
+  // frame_cropping_flag: u(1)
+  uint32 frame_cropping_flag;
+  uint32 frame_crop_left_offset = 0;
+  uint32 frame_crop_right_offset = 0;
+  uint32 frame_crop_top_offset = 0;
+  uint32 frame_crop_bottom_offset = 0;
+  RETURN_FALSE_ON_FAIL(parser.ReadBits(&frame_cropping_flag, 1));
+  if (frame_cropping_flag) {
+    // frame_crop_{left, right, top, bottom}_offset: ue(v)
+    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&frame_crop_left_offset));
+    RETURN_FALSE_ON_FAIL(
+        parser.ReadExponentialGolomb(&frame_crop_right_offset));
+    RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&frame_crop_top_offset));
+    RETURN_FALSE_ON_FAIL(
+        parser.ReadExponentialGolomb(&frame_crop_bottom_offset));
+  }
+
+  // Far enough! We don't use the rest of the SPS.
+
+  // Start with the resolution determined by the pic_width/pic_height fields.
+  int width = 16 * (pic_width_in_mbs_minus1 + 1);
+  int height =
+      16 * (2 - frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1);
+
+  // Figure out the crop units in pixels. That's based on the chroma format's
+  // sampling, which is indicated by chroma_format_idc.
+  if (separate_colour_plane_flag || chroma_format_idc == 0) {
+    frame_crop_bottom_offset *= (2 - frame_mbs_only_flag);
+    frame_crop_top_offset *= (2 - frame_mbs_only_flag);
+  } else if (!separate_colour_plane_flag && chroma_format_idc > 0) {
+    // Width multipliers for formats 1 (4:2:0) and 2 (4:2:2).
+    if (chroma_format_idc == 1 || chroma_format_idc == 2) {
+      frame_crop_left_offset *= 2;
+      frame_crop_right_offset *= 2;
+    }
+    // Height multipliers for format 1 (4:2:0).
+    if (chroma_format_idc == 1) {
+      frame_crop_top_offset *= 2;
+      frame_crop_bottom_offset *= 2;
+    }
+  }
+  // Subtract the crop for each dimension.
+  width -= (frame_crop_left_offset + frame_crop_right_offset);
+  height -= (frame_crop_top_offset + frame_crop_bottom_offset);
+
+  width_ = width;
+  height_ = height;
+  return true;
+}
+
+}  // namespace webrtc
diff --git a/webrtc/modules/rtp_rtcp/source/h264_sps_parser.h b/webrtc/modules/rtp_rtcp/source/h264_sps_parser.h
new file mode 100644
index 0000000..ab8cca3
--- /dev/null
+++ b/webrtc/modules/rtp_rtcp/source/h264_sps_parser.h
@@ -0,0 +1,37 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_RTP_RTCP_SOURCE_H264_SPS_PARSER_H_
+#define WEBRTC_MODULES_RTP_RTCP_SOURCE_H264_SPS_PARSER_H_
+
+#include "webrtc/base/common.h"
+
+namespace webrtc {
+
+// A class for parsing out sequence parameter set (SPS) data from an H264 NALU.
+// Currently, only resolution is read without being ignored.
+class H264SpsParser {
+ public:
+  H264SpsParser(const uint8* sps, size_t byte_length);
+  // Parses the SPS to completion. Returns true if the SPS was parsed correctly.
+  bool Parse();
+  uint16 width() { return width_; }
+  uint16 height() { return height_; }
+
+ private:
+  const uint8* const sps_;
+  const size_t byte_length_;
+
+  uint16 width_;
+  uint16 height_;
+};
+
+}  // namespace webrtc
+#endif  // WEBRTC_MODULES_RTP_RTCP_SOURCE_H264_SPS_PARSER_H_
diff --git a/webrtc/modules/rtp_rtcp/source/h264_sps_parser_unittest.cc b/webrtc/modules/rtp_rtcp/source/h264_sps_parser_unittest.cc
new file mode 100644
index 0000000..c8d9754
--- /dev/null
+++ b/webrtc/modules/rtp_rtcp/source/h264_sps_parser_unittest.cc
@@ -0,0 +1,68 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace webrtc {
+
+// Example SPS can be generated with ffmpeg. Here's an example set of commands,
+// runnable on OS X:
+// 1) Generate a video, from the camera:
+// ffmpeg -f avfoundation -i "0" -video_size 640x360 camera.mov
+//
+// 2) Scale the video to the desired size:
+// ffmpeg -i camera.mov -vf scale=640x360 scaled.mov
+//
+// 3) Get just the H.264 bitstream in AnnexB:
+// ffmpeg -i scaled.mov -vcodec copy -vbsf h264_mp4toannexb -an out.h264
+//
+// 4) Open out.h264 and find the SPS, generally everything between the first
+// two start codes (0 0 0 1 or 0 0 1). The first byte should be 0x67,
+// which should be stripped out before being passed to the parser.
+
+TEST(H264SpsParserTest, TestSampleSPSHdLandscape) {
+  // SPS for a 1280x720 camera capture from ffmpeg on osx. Contains
+  // emulation bytes but no cropping.
+  const uint8 buffer[] = {0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, 0x05,
+                          0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, 0x00,
+                          0x00, 0x2A, 0xE0, 0xF1, 0x83, 0x19, 0x60};
+  H264SpsParser parser = H264SpsParser(buffer, ARRAY_SIZE(buffer));
+  EXPECT_TRUE(parser.Parse());
+  EXPECT_EQ(1280u, parser.width());
+  EXPECT_EQ(720u, parser.height());
+}
+
+TEST(H264SpsParserTest, TestSampleSPSVgaLandscape) {
+  // SPS for a 640x360 camera capture from ffmpeg on osx. Contains emulation
+  // bytes and cropping (360 isn't divisible by 16).
+  const uint8 buffer[] = {0x7A, 0x00, 0x1E, 0xBC, 0xD9, 0x40, 0xA0, 0x2F,
+                          0xF8, 0x98, 0x40, 0x00, 0x00, 0x03, 0x01, 0x80,
+                          0x00, 0x00, 0x56, 0x83, 0xC5, 0x8B, 0x65, 0x80};
+  H264SpsParser parser = H264SpsParser(buffer, ARRAY_SIZE(buffer));
+  EXPECT_TRUE(parser.Parse());
+  EXPECT_EQ(640u, parser.width());
+  EXPECT_EQ(360u, parser.height());
+}
+
+TEST(H264SpsParserTest, TestSampleSPSWeirdResolution) {
+  // SPS for a 200x400 camera capture from ffmpeg on osx. Horizontal and
+  // veritcal crop (neither dimension is divisible by 16).
+  const uint8 buffer[] = {0x7A, 0x00, 0x0D, 0xBC, 0xD9, 0x43, 0x43, 0x3E,
+                          0x5E, 0x10, 0x00, 0x00, 0x03, 0x00, 0x60, 0x00,
+                          0x00, 0x15, 0xA0, 0xF1, 0x42, 0x99, 0x60};
+  H264SpsParser parser = H264SpsParser(buffer, ARRAY_SIZE(buffer));
+  EXPECT_TRUE(parser.Parse());
+  EXPECT_EQ(200u, parser.width());
+  EXPECT_EQ(400u, parser.height());
+}
+
+}  // namespace webrtc
diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
index e297b7c..ebd46b0 100644
--- a/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc
@@ -12,6 +12,7 @@
 
 #include "webrtc/modules/interface/module_common_types.h"
 #include "webrtc/modules/rtp_rtcp/source/byte_io.h"
+#include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h"
 #include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h"
 
 namespace webrtc {
@@ -30,6 +31,7 @@
 static const size_t kNalHeaderSize = 1;
 static const size_t kFuAHeaderSize = 2;
 static const size_t kLengthFieldSize = 2;
+static const size_t kStapAHeaderSize = kNalHeaderSize + kLengthFieldSize;
 
 // Bit masks for FU (A and B) indicators.
 enum NalDefs { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F };
@@ -47,15 +49,28 @@
   RTPVideoHeaderH264* h264_header =
       &parsed_payload->type.Video.codecHeader.H264;
 
+  const uint8_t* nalu_start = payload_data + kNalHeaderSize;
+  size_t nalu_length = payload_data_length - kNalHeaderSize;
   uint8_t nal_type = payload_data[0] & kTypeMask;
   if (nal_type == kStapA) {
-    nal_type = payload_data[3] & kTypeMask;
+    // Skip the StapA header (StapA nal type + length).
+    nal_type = payload_data[kStapAHeaderSize] & kTypeMask;
+    nalu_start += kStapAHeaderSize;
+    nalu_length -= kStapAHeaderSize;
     h264_header->packetization_type = kH264StapA;
   } else {
     h264_header->packetization_type = kH264SingleNalu;
   }
   h264_header->nalu_type = nal_type;
 
+  // We can read resolution out of sps packets.
+  if (nal_type == kSps) {
+    H264SpsParser parser(nalu_start, nalu_length);
+    if (parser.Parse()) {
+      parsed_payload->type.Video.width = parser.width();
+      parsed_payload->type.Video.height = parser.height();
+    }
+  }
   switch (nal_type) {
     case kSps:
     case kPps:
diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc
index 5ccd853..66a19dd 100644
--- a/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc
+++ b/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc
@@ -411,6 +411,23 @@
   EXPECT_EQ(kIdr, payload.type.Video.codecHeader.H264.nalu_type);
 }
 
+TEST_F(RtpDepacketizerH264Test, TestSingleNaluSpsWithResolution) {
+  uint8_t packet[] = {kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50,
+                      0x05, 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0,
+                      0x00, 0x00, 0x2A, 0xE0, 0xF1, 0x83, 0x19, 0x60};
+  RtpDepacketizer::ParsedPayload payload;
+
+  ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
+  ExpectPacket(&payload, packet, sizeof(packet));
+  EXPECT_EQ(kVideoFrameKey, payload.frame_type);
+  EXPECT_EQ(kRtpVideoH264, payload.type.Video.codec);
+  EXPECT_TRUE(payload.type.Video.isFirstPacket);
+  EXPECT_EQ(kH264SingleNalu,
+            payload.type.Video.codecHeader.H264.packetization_type);
+  EXPECT_EQ(1280u, payload.type.Video.width);
+  EXPECT_EQ(720u, payload.type.Video.height);
+}
+
 TEST_F(RtpDepacketizerH264Test, TestStapAKey) {
   uint8_t packet[16] = {kStapA,  // F=0, NRI=0, Type=24.
                         // Length, nal header, payload.
@@ -429,6 +446,26 @@
   EXPECT_EQ(kSps, payload.type.Video.codecHeader.H264.nalu_type);
 }
 
+TEST_F(RtpDepacketizerH264Test, TestStapANaluSpsWithResolution) {
+  uint8_t packet[] = {kStapA,  // F=0, NRI=0, Type=24.
+                      // Length (2 bytes), nal header, payload.
+                      0,      24,   kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9,
+                      0x40,   0x50, 0x05, 0xBA, 0x10, 0x00, 0x00, 0x03,
+                      0x00,   0xC0, 0x00, 0x00, 0x2A, 0xE0, 0xF1, 0x83,
+                      0x19,   0x60, 0,    0x03, kIdr, 0xFF, 0x00, 0,
+                      0x04,   kIdr, 0xFF, 0x00, 0x11};
+  RtpDepacketizer::ParsedPayload payload;
+
+  ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
+  ExpectPacket(&payload, packet, sizeof(packet));
+  EXPECT_EQ(kVideoFrameKey, payload.frame_type);
+  EXPECT_EQ(kRtpVideoH264, payload.type.Video.codec);
+  EXPECT_TRUE(payload.type.Video.isFirstPacket);
+  EXPECT_EQ(kH264StapA, payload.type.Video.codecHeader.H264.packetization_type);
+  EXPECT_EQ(1280u, payload.type.Video.width);
+  EXPECT_EQ(720u, payload.type.Video.height);
+}
+
 TEST_F(RtpDepacketizerH264Test, TestStapADelta) {
   uint8_t packet[16] = {kStapA,  // F=0, NRI=0, Type=24.
                         // Length, nal header, payload.