Don't use the bitrate of the xing header

Use the one for the mp3 frame that follows instead.

Bug: 19622311
Bug: https://code.google.com/p/android/issues/detail?id=158627

Change-Id: I7f1ad702cad06aa29100a66f9796ca920aaa0592
diff --git a/media/libstagefright/MP3Extractor.cpp b/media/libstagefright/MP3Extractor.cpp
index 55e3c19..2e54e8c 100644
--- a/media/libstagefright/MP3Extractor.cpp
+++ b/media/libstagefright/MP3Extractor.cpp
@@ -282,6 +282,41 @@
 
     mFirstFramePos = pos;
     mFixedHeader = header;
+    mMeta = new MetaData;
+    sp<XINGSeeker> seeker = XINGSeeker::CreateFromSource(mDataSource, mFirstFramePos);
+
+    if (seeker == NULL) {
+        mSeeker = VBRISeeker::CreateFromSource(mDataSource, post_id3_pos);
+    } else {
+        mSeeker = seeker;
+        int encd = seeker->getEncoderDelay();
+        int encp = seeker->getEncoderPadding();
+        if (encd != 0 || encp != 0) {
+            mMeta->setInt32(kKeyEncoderDelay, encd);
+            mMeta->setInt32(kKeyEncoderPadding, encp);
+        }
+    }
+
+    if (mSeeker != NULL) {
+        // While it is safe to send the XING/VBRI frame to the decoder, this will
+        // result in an extra 1152 samples being output. In addition, the bitrate
+        // of the Xing header might not match the rest of the file, which could
+        // lead to problems when seeking. The real first frame to decode is after
+        // the XING/VBRI frame, so skip there.
+        size_t frame_size;
+        int sample_rate;
+        int num_channels;
+        int bitrate;
+        GetMPEGAudioFrameSize(
+                header, &frame_size, &sample_rate, &num_channels, &bitrate);
+        pos += frame_size;
+        if (!Resync(mDataSource, 0, &pos, &post_id3_pos, &header)) {
+            // mInitCheck will remain NO_INIT
+            return;
+        }
+        mFirstFramePos = pos;
+        mFixedHeader = header;
+    }
 
     size_t frame_size;
     int sample_rate;
@@ -292,8 +327,6 @@
 
     unsigned layer = 4 - ((header >> 17) & 3);
 
-    mMeta = new MetaData;
-
     switch (layer) {
         case 1:
             mMeta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG_LAYER_I);
@@ -312,27 +345,6 @@
     mMeta->setInt32(kKeyBitRate, bitrate * 1000);
     mMeta->setInt32(kKeyChannelCount, num_channels);
 
-    sp<XINGSeeker> seeker = XINGSeeker::CreateFromSource(mDataSource, mFirstFramePos);
-
-    if (seeker == NULL) {
-        mSeeker = VBRISeeker::CreateFromSource(mDataSource, post_id3_pos);
-    } else {
-        mSeeker = seeker;
-        int encd = seeker->getEncoderDelay();
-        int encp = seeker->getEncoderPadding();
-        if (encd != 0 || encp != 0) {
-            mMeta->setInt32(kKeyEncoderDelay, encd);
-            mMeta->setInt32(kKeyEncoderPadding, encp);
-        }
-    }
-
-    if (mSeeker != NULL) {
-        // While it is safe to send the XING/VBRI frame to the decoder, this will
-        // result in an extra 1152 samples being output. The real first frame to
-        // decode is after the XING/VBRI frame, so skip there.
-        mFirstFramePos += frame_size;
-    }
-
     int64_t durationUs;
 
     if (mSeeker == NULL || !mSeeker->getDuration(&durationUs)) {