blob: a3572e6a33fcc34caa3a31586c7d95c86de7a229 [file] [log] [blame]
/*
* Copyright (C) 2009 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
//#define LOG_NDEBUG 0
#define LOG_TAG "MPEG4Extractor"
#include <ctype.h>
#include <inttypes.h>
#include <memory>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <log/log.h>
#include <utils/Log.h>
#include "AC4Parser.h"
#include "MPEG4Extractor.h"
#include "SampleTable.h"
#include "ItemTable.h"
#include "include/ESDS.h"
#include <media/DataSourceBase.h>
#include <media/ExtractorUtils.h>
#include <media/stagefright/foundation/ABitReader.h>
#include <media/stagefright/foundation/ABuffer.h>
#include <media/stagefright/foundation/ADebug.h>
#include <media/stagefright/foundation/AMessage.h>
#include <media/stagefright/foundation/AudioPresentationInfo.h>
#include <media/stagefright/foundation/AUtils.h>
#include <media/stagefright/foundation/ByteUtils.h>
#include <media/stagefright/foundation/ColorUtils.h>
#include <media/stagefright/foundation/avc_utils.h>
#include <media/stagefright/foundation/hexdump.h>
#include <media/stagefright/foundation/OpusHeader.h>
#include <media/stagefright/MediaBufferGroup.h>
#include <media/stagefright/MediaDefs.h>
#include <media/stagefright/MetaDataBase.h>
#include <utils/String8.h>
#include <byteswap.h>
#include "include/ID3.h"
#ifndef UINT32_MAX
#define UINT32_MAX (4294967295U)
#endif
#define ALAC_SPECIFIC_INFO_SIZE (36)
namespace android {
enum {
// max track header chunk to return
kMaxTrackHeaderSize = 32,
// maximum size of an atom. Some atoms can be bigger according to the spec,
// but we only allow up to this size.
kMaxAtomSize = 64 * 1024 * 1024,
};
class MPEG4Source : public MediaTrackHelper {
static const size_t kMaxPcmFrameSize = 8192;
public:
// Caller retains ownership of both "dataSource" and "sampleTable".
MPEG4Source(AMediaFormat *format,
DataSourceHelper *dataSource,
int32_t timeScale,
const sp<SampleTable> &sampleTable,
Vector<SidxEntry> &sidx,
const Trex *trex,
off64_t firstMoofOffset,
const sp<ItemTable> &itemTable,
uint64_t elstShiftStartTicks);
virtual status_t init();
virtual media_status_t start();
virtual media_status_t stop();
virtual media_status_t getFormat(AMediaFormat *);
virtual media_status_t read(MediaBufferHelper **buffer, const ReadOptions *options = NULL);
bool supportsNonBlockingRead() override { return true; }
virtual media_status_t fragmentedRead(
MediaBufferHelper **buffer, const ReadOptions *options = NULL);
virtual ~MPEG4Source();
private:
Mutex mLock;
AMediaFormat *mFormat;
DataSourceHelper *mDataSource;
int32_t mTimescale;
sp<SampleTable> mSampleTable;
uint32_t mCurrentSampleIndex;
uint32_t mCurrentFragmentIndex;
Vector<SidxEntry> &mSegments;
const Trex *mTrex;
off64_t mFirstMoofOffset;
off64_t mCurrentMoofOffset;
off64_t mNextMoofOffset;
uint32_t mCurrentTime; // in media timescale ticks
int32_t mLastParsedTrackId;
int32_t mTrackId;
int32_t mCryptoMode; // passed in from extractor
int32_t mDefaultIVSize; // passed in from extractor
uint8_t mCryptoKey[16]; // passed in from extractor
int32_t mDefaultEncryptedByteBlock;
int32_t mDefaultSkipByteBlock;
uint32_t mCurrentAuxInfoType;
uint32_t mCurrentAuxInfoTypeParameter;
int32_t mCurrentDefaultSampleInfoSize;
uint32_t mCurrentSampleInfoCount;
uint32_t mCurrentSampleInfoAllocSize;
uint8_t* mCurrentSampleInfoSizes;
uint32_t mCurrentSampleInfoOffsetCount;
uint32_t mCurrentSampleInfoOffsetsAllocSize;
uint64_t* mCurrentSampleInfoOffsets;
bool mIsAVC;
bool mIsHEVC;
bool mIsAC4;
bool mIsPcm;
size_t mNALLengthSize;
bool mStarted;
MediaBufferHelper *mBuffer;
uint8_t *mSrcBuffer;
bool mIsHeif;
bool mIsAudio;
sp<ItemTable> mItemTable;
// Start offset from composition time to presentation time.
// Support shift only for video tracks through mElstShiftStartTicks for now.
uint64_t mElstShiftStartTicks;
size_t parseNALSize(const uint8_t *data) const;
status_t parseChunk(off64_t *offset);
status_t parseTrackFragmentHeader(off64_t offset, off64_t size);
status_t parseTrackFragmentRun(off64_t offset, off64_t size);
status_t parseSampleAuxiliaryInformationSizes(off64_t offset, off64_t size);
status_t parseSampleAuxiliaryInformationOffsets(off64_t offset, off64_t size);
status_t parseClearEncryptedSizes(off64_t offset, bool isSubsampleEncryption, uint32_t flags);
status_t parseSampleEncryption(off64_t offset);
// returns -1 for invalid layer ID
int32_t parseHEVCLayerId(const uint8_t *data, size_t size);
struct TrackFragmentHeaderInfo {
enum Flags {
kBaseDataOffsetPresent = 0x01,
kSampleDescriptionIndexPresent = 0x02,
kDefaultSampleDurationPresent = 0x08,
kDefaultSampleSizePresent = 0x10,
kDefaultSampleFlagsPresent = 0x20,
kDurationIsEmpty = 0x10000,
};
uint32_t mTrackID;
uint32_t mFlags;
uint64_t mBaseDataOffset;
uint32_t mSampleDescriptionIndex;
uint32_t mDefaultSampleDuration;
uint32_t mDefaultSampleSize;
uint32_t mDefaultSampleFlags;
uint64_t mDataOffset;
};
TrackFragmentHeaderInfo mTrackFragmentHeaderInfo;
struct Sample {
off64_t offset;
size_t size;
uint32_t duration;
int32_t compositionOffset;
uint8_t iv[16];
Vector<size_t> clearsizes;
Vector<size_t> encryptedsizes;
};
Vector<Sample> mCurrentSamples;
MPEG4Source(const MPEG4Source &);
MPEG4Source &operator=(const MPEG4Source &);
};
// This custom data source wraps an existing one and satisfies requests
// falling entirely within a cached range from the cache while forwarding
// all remaining requests to the wrapped datasource.
// This is used to cache the full sampletable metadata for a single track,
// possibly wrapping multiple times to cover all tracks, i.e.
// Each CachedRangedDataSource caches the sampletable metadata for a single track.
class CachedRangedDataSource : public DataSourceHelper {
public:
explicit CachedRangedDataSource(DataSourceHelper *source);
virtual ~CachedRangedDataSource();
ssize_t readAt(off64_t offset, void *data, size_t size) override;
status_t getSize(off64_t *size) override;
uint32_t flags() override;
status_t setCachedRange(off64_t offset, size_t size, bool assumeSourceOwnershipOnSuccess);
private:
Mutex mLock;
DataSourceHelper *mSource;
bool mOwnsDataSource;
off64_t mCachedOffset;
size_t mCachedSize;
uint8_t *mCache;
void clearCache();
CachedRangedDataSource(const CachedRangedDataSource &);
CachedRangedDataSource &operator=(const CachedRangedDataSource &);
};
CachedRangedDataSource::CachedRangedDataSource(DataSourceHelper *source)
: DataSourceHelper(source),
mSource(source),
mOwnsDataSource(false),
mCachedOffset(0),
mCachedSize(0),
mCache(NULL) {
}
CachedRangedDataSource::~CachedRangedDataSource() {
clearCache();
if (mOwnsDataSource) {
delete mSource;
}
}
void CachedRangedDataSource::clearCache() {
if (mCache) {
free(mCache);
mCache = NULL;
}
mCachedOffset = 0;
mCachedSize = 0;
}
ssize_t CachedRangedDataSource::readAt(off64_t offset, void *data, size_t size) {
Mutex::Autolock autoLock(mLock);
if (isInRange(mCachedOffset, mCachedSize, offset, size)) {
memcpy(data, &mCache[offset - mCachedOffset], size);
return size;
}
return mSource->readAt(offset, data, size);
}
status_t CachedRangedDataSource::getSize(off64_t *size) {
return mSource->getSize(size);
}
uint32_t CachedRangedDataSource::flags() {
return mSource->flags();
}
status_t CachedRangedDataSource::setCachedRange(off64_t offset,
size_t size,
bool assumeSourceOwnershipOnSuccess) {
Mutex::Autolock autoLock(mLock);
clearCache();
mCache = (uint8_t *)malloc(size);
if (mCache == NULL) {
return -ENOMEM;
}
mCachedOffset = offset;
mCachedSize = size;
ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
if (err < (ssize_t)size) {
clearCache();
return ERROR_IO;
}
mOwnsDataSource = assumeSourceOwnershipOnSuccess;
return OK;
}
////////////////////////////////////////////////////////////////////////////////
static const bool kUseHexDump = false;
static const char *FourCC2MIME(uint32_t fourcc) {
switch (fourcc) {
case FOURCC("mp4a"):
return MEDIA_MIMETYPE_AUDIO_AAC;
case FOURCC("samr"):
return MEDIA_MIMETYPE_AUDIO_AMR_NB;
case FOURCC("sawb"):
return MEDIA_MIMETYPE_AUDIO_AMR_WB;
case FOURCC("ec-3"):
return MEDIA_MIMETYPE_AUDIO_EAC3;
case FOURCC("mp4v"):
return MEDIA_MIMETYPE_VIDEO_MPEG4;
case FOURCC("s263"):
case FOURCC("h263"):
case FOURCC("H263"):
return MEDIA_MIMETYPE_VIDEO_H263;
case FOURCC("avc1"):
return MEDIA_MIMETYPE_VIDEO_AVC;
case FOURCC("hvc1"):
case FOURCC("hev1"):
return MEDIA_MIMETYPE_VIDEO_HEVC;
case FOURCC("ac-4"):
return MEDIA_MIMETYPE_AUDIO_AC4;
case FOURCC("Opus"):
return MEDIA_MIMETYPE_AUDIO_OPUS;
case FOURCC("twos"):
case FOURCC("sowt"):
return MEDIA_MIMETYPE_AUDIO_RAW;
case FOURCC("alac"):
return MEDIA_MIMETYPE_AUDIO_ALAC;
case FOURCC("fLaC"):
return MEDIA_MIMETYPE_AUDIO_FLAC;
case FOURCC("av01"):
return MEDIA_MIMETYPE_VIDEO_AV1;
case FOURCC(".mp3"):
case 0x6D730055: // "ms U" mp3 audio
return MEDIA_MIMETYPE_AUDIO_MPEG;
default:
ALOGW("Unknown fourcc: %c%c%c%c",
(fourcc >> 24) & 0xff,
(fourcc >> 16) & 0xff,
(fourcc >> 8) & 0xff,
fourcc & 0xff
);
return "application/octet-stream";
}
}
static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, FourCC2MIME(fourcc))) {
// AMR NB audio is always mono, 8kHz
*channels = 1;
*rate = 8000;
return true;
} else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, FourCC2MIME(fourcc))) {
// AMR WB audio is always mono, 16kHz
*channels = 1;
*rate = 16000;
return true;
}
return false;
}
MPEG4Extractor::MPEG4Extractor(DataSourceHelper *source, const char *mime)
: mMoofOffset(0),
mMoofFound(false),
mMdatFound(false),
mDataSource(source),
mInitCheck(NO_INIT),
mHeaderTimescale(0),
mIsQT(false),
mIsHeif(false),
mHasMoovBox(false),
mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
mFirstTrack(NULL),
mLastTrack(NULL) {
ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
mFileMetaData = AMediaFormat_new();
}
MPEG4Extractor::~MPEG4Extractor() {
Track *track = mFirstTrack;
while (track) {
Track *next = track->next;
delete track;
track = next;
}
mFirstTrack = mLastTrack = NULL;
for (size_t i = 0; i < mPssh.size(); i++) {
delete [] mPssh[i].data;
}
mPssh.clear();
delete mDataSource;
AMediaFormat_delete(mFileMetaData);
}
uint32_t MPEG4Extractor::flags() const {
return CAN_PAUSE |
((mMoofOffset == 0 || mSidxEntries.size() != 0) ?
(CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK) : 0);
}
media_status_t MPEG4Extractor::getMetaData(AMediaFormat *meta) {
status_t err;
if ((err = readMetaData()) != OK) {
return AMEDIA_ERROR_UNKNOWN;
}
AMediaFormat_copy(meta, mFileMetaData);
return AMEDIA_OK;
}
size_t MPEG4Extractor::countTracks() {
status_t err;
if ((err = readMetaData()) != OK) {
ALOGV("MPEG4Extractor::countTracks: no tracks");
return 0;
}
size_t n = 0;
Track *track = mFirstTrack;
while (track) {
++n;
track = track->next;
}
ALOGV("MPEG4Extractor::countTracks: %zu tracks", n);
return n;
}
media_status_t MPEG4Extractor::getTrackMetaData(
AMediaFormat *meta,
size_t index, uint32_t flags) {
status_t err;
if ((err = readMetaData()) != OK) {
return AMEDIA_ERROR_UNKNOWN;
}
Track *track = mFirstTrack;
while (index > 0) {
if (track == NULL) {
return AMEDIA_ERROR_UNKNOWN;
}
track = track->next;
--index;
}
if (track == NULL) {
return AMEDIA_ERROR_UNKNOWN;
}
[=] {
int64_t duration;
int32_t samplerate;
// Only for audio track.
if (track->has_elst && mHeaderTimescale != 0 &&
AMediaFormat_getInt64(track->meta, AMEDIAFORMAT_KEY_DURATION, &duration) &&
AMediaFormat_getInt32(track->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, &samplerate)) {
// Elst has to be processed only the first time this function is called.
track->has_elst = false;
if (track->elst_segment_duration > INT64_MAX) {
return;
}
int64_t segment_duration = track->elst_segment_duration;
int64_t media_time = track->elst_media_time;
int64_t halfscale = track->timescale / 2;
ALOGV("segment_duration = %" PRId64 ", media_time = %" PRId64
", halfscale = %" PRId64 ", mdhd_timescale = %d, track_timescale = %u",
segment_duration, media_time,
halfscale, mHeaderTimescale, track->timescale);
if ((uint32_t)samplerate != track->timescale){
ALOGV("samplerate:%" PRId32 ", track->timescale and samplerate are different!",
samplerate);
}
// Both delay and paddingsamples have to be set inorder for either to be
// effective in the lower layers.
int64_t delay = 0;
if (media_time > 0) { // Gapless playback
// delay = ((media_time * samplerate) + halfscale) / track->timescale;
if (__builtin_mul_overflow(media_time, samplerate, &delay) ||
__builtin_add_overflow(delay, halfscale, &delay) ||
(delay /= track->timescale, false) ||
delay > INT32_MAX ||
delay < INT32_MIN) {
ALOGW("ignoring edit list with bogus values");
return;
}
}
ALOGV("delay = %" PRId64, delay);
AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_DELAY, delay);
int64_t paddingsamples = 0;
if (segment_duration > 0) {
int64_t scaled_duration;
// scaled_duration = duration * mHeaderTimescale;
if (__builtin_mul_overflow(duration, mHeaderTimescale, &scaled_duration)) {
return;
}
ALOGV("scaled_duration = %" PRId64, scaled_duration);
int64_t segment_end;
int64_t padding;
int64_t segment_duration_e6;
int64_t media_time_scaled_e6;
int64_t media_time_scaled;
// padding = scaled_duration - ((segment_duration * 1000000) +
// ((media_time * mHeaderTimescale * 1000000)/track->timescale) )
// segment_duration is based on timescale in movie header box(mdhd)
// media_time is based on timescale track header/media timescale
if (__builtin_mul_overflow(segment_duration, 1000000, &segment_duration_e6) ||
__builtin_mul_overflow(media_time, mHeaderTimescale, &media_time_scaled) ||
__builtin_mul_overflow(media_time_scaled, 1000000, &media_time_scaled_e6)) {
return;
}
media_time_scaled_e6 /= track->timescale;
if (__builtin_add_overflow(segment_duration_e6, media_time_scaled_e6, &segment_end)
|| __builtin_sub_overflow(scaled_duration, segment_end, &padding)) {
return;
}
ALOGV("segment_end = %" PRId64 ", padding = %" PRId64, segment_end, padding);
// track duration from media header (which is what AMEDIAFORMAT_KEY_DURATION is)
// might be slightly shorter than the segment duration, which would make the
// padding negative. Clamp to zero.
if (padding > 0) {
int64_t halfscale_mht = mHeaderTimescale / 2;
int64_t halfscale_e6;
int64_t timescale_e6;
// paddingsamples = ((padding * samplerate) + (halfscale_mht * 1000000))
// / (mHeaderTimescale * 1000000);
if (__builtin_mul_overflow(padding, samplerate, &paddingsamples) ||
__builtin_mul_overflow(halfscale_mht, 1000000, &halfscale_e6) ||
__builtin_mul_overflow(mHeaderTimescale, 1000000, &timescale_e6) ||
__builtin_add_overflow(paddingsamples, halfscale_e6, &paddingsamples) ||
(paddingsamples /= timescale_e6, false) ||
paddingsamples > INT32_MAX) {
return;
}
}
}
ALOGV("paddingsamples = %" PRId64, paddingsamples);
AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_ENCODER_PADDING, paddingsamples);
}
}();
if ((flags & kIncludeExtensiveMetaData)
&& !track->includes_expensive_metadata) {
track->includes_expensive_metadata = true;
const char *mime;
CHECK(AMediaFormat_getString(track->meta, AMEDIAFORMAT_KEY_MIME, &mime));
if (!strncasecmp("video/", mime, 6)) {
// MPEG2 tracks do not provide CSD, so read the stream header
if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG2)) {
off64_t offset;
size_t size;
if (track->sampleTable->getMetaDataForSample(
0 /* sampleIndex */, &offset, &size, NULL /* sampleTime */) == OK) {
if (size > kMaxTrackHeaderSize) {
size = kMaxTrackHeaderSize;
}
uint8_t header[kMaxTrackHeaderSize];
if (mDataSource->readAt(offset, &header, size) == (ssize_t)size) {
AMediaFormat_setBuffer(track->meta,
AMEDIAFORMAT_KEY_MPEG2_STREAM_HEADER, header, size);
}
}
}
if (mMoofOffset > 0) {
int64_t duration;
if (AMediaFormat_getInt64(track->meta,
AMEDIAFORMAT_KEY_DURATION, &duration)) {
// nothing fancy, just pick a frame near 1/4th of the duration
AMediaFormat_setInt64(track->meta,
AMEDIAFORMAT_KEY_THUMBNAIL_TIME, duration / 4);
}
} else {
uint32_t sampleIndex;
uint64_t sampleTime;
if (track->timescale != 0 &&
track->sampleTable->findThumbnailSample(&sampleIndex) == OK
&& track->sampleTable->getMetaDataForSample(
sampleIndex, NULL /* offset */, NULL /* size */,
&sampleTime) == OK) {
AMediaFormat_setInt64(track->meta,
AMEDIAFORMAT_KEY_THUMBNAIL_TIME,
((int64_t)sampleTime * 1000000) / track->timescale);
}
}
}
}
AMediaFormat_copy(meta, track->meta);
return AMEDIA_OK;
}
status_t MPEG4Extractor::readMetaData() {
if (mInitCheck != NO_INIT) {
return mInitCheck;
}
off64_t offset = 0;
status_t err;
bool sawMoovOrSidx = false;
while (!((mHasMoovBox && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
(mIsHeif && (mPreferHeif || !mHasMoovBox) &&
(mItemTable != NULL) && mItemTable->isValid()))) {
off64_t orig_offset = offset;
err = parseChunk(&offset, 0);
if (err != OK && err != UNKNOWN_ERROR) {
break;
} else if (offset <= orig_offset) {
// only continue parsing if the offset was advanced,
// otherwise we might end up in an infinite loop
ALOGE("did not advance: %lld->%lld", (long long)orig_offset, (long long)offset);
err = ERROR_MALFORMED;
break;
} else if (err == UNKNOWN_ERROR) {
sawMoovOrSidx = true;
}
}
if (mIsHeif && (mItemTable != NULL) && (mItemTable->countImages() > 0)) {
off64_t exifOffset;
size_t exifSize;
if (mItemTable->getExifOffsetAndSize(&exifOffset, &exifSize) == OK) {
AMediaFormat_setInt64(mFileMetaData,
AMEDIAFORMAT_KEY_EXIF_OFFSET, (int64_t)exifOffset);
AMediaFormat_setInt64(mFileMetaData,
AMEDIAFORMAT_KEY_EXIF_SIZE, (int64_t)exifSize);
}
for (uint32_t imageIndex = 0;
imageIndex < mItemTable->countImages(); imageIndex++) {
AMediaFormat *meta = mItemTable->getImageMeta(imageIndex);
if (meta == NULL) {
ALOGE("heif image %u has no meta!", imageIndex);
continue;
}
// Some heif files advertise image sequence brands (eg. 'hevc') in
// ftyp box, but don't have any valid tracks in them. Instead of
// reporting the entire file as malformed, we override the error
// to allow still images to be extracted.
if (err != OK) {
ALOGW("Extracting still images only");
err = OK;
}
mInitCheck = OK;
ALOGV("adding HEIF image track %u", imageIndex);
Track *track = new Track;
if (mLastTrack != NULL) {
mLastTrack->next = track;
} else {
mFirstTrack = track;
}
mLastTrack = track;
track->meta = meta;
AMediaFormat_setInt32(track->meta, AMEDIAFORMAT_KEY_TRACK_ID, imageIndex);
track->timescale = 1000000;
}
}
if (mInitCheck == OK) {
if (findTrackByMimePrefix("video/") != NULL) {
AMediaFormat_setString(mFileMetaData,
AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_MPEG4);
} else if (findTrackByMimePrefix("audio/") != NULL) {
AMediaFormat_setString(mFileMetaData,
AMEDIAFORMAT_KEY_MIME, "audio/mp4");
} else if (findTrackByMimePrefix(
MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
AMediaFormat_setString(mFileMetaData,
AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_CONTAINER_HEIF);
} else {
AMediaFormat_setString(mFileMetaData,
AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
}
} else {
mInitCheck = err;
}
CHECK_NE(err, (status_t)NO_INIT);
// copy pssh data into file metadata
uint64_t psshsize = 0;
for (size_t i = 0; i < mPssh.size(); i++) {
psshsize += 20 + mPssh[i].datalen;
}
if (psshsize > 0 && psshsize <= UINT32_MAX) {
char *buf = (char*)malloc(psshsize);
if (!buf) {
ALOGE("b/28471206");
return NO_MEMORY;
}
char *ptr = buf;
for (size_t i = 0; i < mPssh.size(); i++) {
memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
ptr += (20 + mPssh[i].datalen);
}
AMediaFormat_setBuffer(mFileMetaData, AMEDIAFORMAT_KEY_PSSH, buf, psshsize);
free(buf);
}
return mInitCheck;
}
struct PathAdder {
PathAdder(Vector<uint32_t> *path, uint32_t chunkType)
: mPath(path) {
mPath->push(chunkType);
}
~PathAdder() {
mPath->pop();
}
private:
Vector<uint32_t> *mPath;
PathAdder(const PathAdder &);
PathAdder &operator=(const PathAdder &);
};
static bool underMetaDataPath(const Vector<uint32_t> &path) {
return path.size() >= 5
&& path[0] == FOURCC("moov")
&& path[1] == FOURCC("udta")
&& path[2] == FOURCC("meta")
&& path[3] == FOURCC("ilst");
}
static bool underQTMetaPath(const Vector<uint32_t> &path, int32_t depth) {
return path.size() >= 2
&& path[0] == FOURCC("moov")
&& path[1] == FOURCC("meta")
&& (depth == 2
|| (depth == 3
&& (path[2] == FOURCC("hdlr")
|| path[2] == FOURCC("ilst")
|| path[2] == FOURCC("keys"))));
}
// Given a time in seconds since Jan 1 1904, produce a human-readable string.
static bool convertTimeToDate(int64_t time_1904, String8 *s) {
// delta between mpeg4 time and unix epoch time
static const int64_t delta = (((66 * 365 + 17) * 24) * 3600);
if (time_1904 < INT64_MIN + delta) {
return false;
}
time_t time_1970 = time_1904 - delta;
char tmp[32];
struct tm* tm = gmtime(&time_1970);
if (tm != NULL &&
strftime(tmp, sizeof(tmp), "%Y%m%dT%H%M%S.000Z", tm) > 0) {
s->setTo(tmp);
return true;
}
return false;
}
status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
ALOGV("entering parseChunk %lld/%d", (long long)*offset, depth);
if (*offset < 0) {
ALOGE("b/23540914");
return ERROR_MALFORMED;
}
if (depth > 100) {
ALOGE("b/27456299");
return ERROR_MALFORMED;
}
uint32_t hdr[2];
if (mDataSource->readAt(*offset, hdr, 8) < 8) {
return ERROR_IO;
}
uint64_t chunk_size = ntohl(hdr[0]);
int32_t chunk_type = ntohl(hdr[1]);
off64_t data_offset = *offset + 8;
if (chunk_size == 1) {
if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
return ERROR_IO;
}
chunk_size = ntoh64(chunk_size);
data_offset += 8;
if (chunk_size < 16) {
// The smallest valid chunk is 16 bytes long in this case.
return ERROR_MALFORMED;
}
} else if (chunk_size == 0) {
if (depth == 0) {
// atom extends to end of file
off64_t sourceSize;
if (mDataSource->getSize(&sourceSize) == OK) {
chunk_size = (sourceSize - *offset);
} else {
// XXX could we just pick a "sufficiently large" value here?
ALOGE("atom size is 0, and data source has no size");
return ERROR_MALFORMED;
}
} else {
// not allowed for non-toplevel atoms, skip it
*offset += 4;
return OK;
}
} else if (chunk_size < 8) {
// The smallest valid chunk is 8 bytes long.
ALOGE("invalid chunk size: %" PRIu64, chunk_size);
return ERROR_MALFORMED;
}
char chunk[5];
MakeFourCCString(chunk_type, chunk);
ALOGV("chunk: %s @ %lld, %d", chunk, (long long)*offset, depth);
if (kUseHexDump) {
static const char kWhitespace[] = " ";
const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
printf("%sfound chunk '%s' of size %" PRIu64 "\n", indent, chunk, chunk_size);
char buffer[256];
size_t n = chunk_size;
if (n > sizeof(buffer)) {
n = sizeof(buffer);
}
if (mDataSource->readAt(*offset, buffer, n)
< (ssize_t)n) {
return ERROR_IO;
}
hexdump(buffer, n);
}
PathAdder autoAdder(&mPath, chunk_type);
// (data_offset - *offset) is either 8 or 16
off64_t chunk_data_size = chunk_size - (data_offset - *offset);
if (chunk_data_size < 0) {
ALOGE("b/23540914");
return ERROR_MALFORMED;
}
if (chunk_type != FOURCC("mdat") && chunk_data_size > kMaxAtomSize) {
char errMsg[100];
sprintf(errMsg, "%s atom has size %" PRId64, chunk, chunk_data_size);
ALOGE("%s (b/28615448)", errMsg);
android_errorWriteWithInfoLog(0x534e4554, "28615448", -1, errMsg, strlen(errMsg));
return ERROR_MALFORMED;
}
if (chunk_type != FOURCC("cprt")
&& chunk_type != FOURCC("covr")
&& mPath.size() == 5 && underMetaDataPath(mPath)) {
off64_t stop_offset = *offset + chunk_size;
*offset = data_offset;
while (*offset < stop_offset) {
status_t err = parseChunk(offset, depth + 1);
if (err != OK) {
return err;
}
}
if (*offset != stop_offset) {
return ERROR_MALFORMED;
}
return OK;
}
switch(chunk_type) {
case FOURCC("moov"):
case FOURCC("trak"):
case FOURCC("mdia"):
case FOURCC("minf"):
case FOURCC("dinf"):
case FOURCC("stbl"):
case FOURCC("mvex"):
case FOURCC("moof"):
case FOURCC("traf"):
case FOURCC("mfra"):
case FOURCC("udta"):
case FOURCC("ilst"):
case FOURCC("sinf"):
case FOURCC("schi"):
case FOURCC("edts"):
case FOURCC("wave"):
{
if (chunk_type == FOURCC("moov") && depth != 0) {
ALOGE("moov: depth %d", depth);
return ERROR_MALFORMED;
}
if (chunk_type == FOURCC("moov") && mInitCheck == OK) {
ALOGE("duplicate moov");
return ERROR_MALFORMED;
}
if (chunk_type == FOURCC("moof") && !mMoofFound) {
// store the offset of the first segment
mMoofFound = true;
mMoofOffset = *offset;
}
if (chunk_type == FOURCC("stbl")) {
ALOGV("sampleTable chunk is %" PRIu64 " bytes long.", chunk_size);
if (mDataSource->flags()
& (DataSourceBase::kWantsPrefetching
| DataSourceBase::kIsCachingDataSource)) {
CachedRangedDataSource *cachedSource =
new CachedRangedDataSource(mDataSource);
if (cachedSource->setCachedRange(
*offset, chunk_size,
true /* assume ownership on success */) == OK) {
mDataSource = cachedSource;
} else {
delete cachedSource;
}
}
if (mLastTrack == NULL) {
return ERROR_MALFORMED;
}
mLastTrack->sampleTable = new SampleTable(mDataSource);
}
bool isTrack = false;
if (chunk_type == FOURCC("trak")) {
if (depth != 1) {
ALOGE("trak: depth %d", depth);
return ERROR_MALFORMED;
}
isTrack = true;
ALOGV("adding new track");
Track *track = new Track;
if (mLastTrack) {
mLastTrack->next = track;
} else {
mFirstTrack = track;
}
mLastTrack = track;
track->meta = AMediaFormat_new();
AMediaFormat_setString(track->meta,
AMEDIAFORMAT_KEY_MIME, "application/octet-stream");
}
off64_t stop_offset = *offset + chunk_size;
*offset = data_offset;
while (*offset < stop_offset) {
// pass udata terminate
if (mIsQT && stop_offset - *offset == 4 && chunk_type == FOURCC("udta")) {
// handle the case that udta terminates with terminate code x00000000
// note that 0 terminator is optional and we just handle this case.
uint32_t terminate_code = 1;
mDataSource->readAt(*offset, &terminate_code, 4);
if (0 == terminate_code) {
*offset += 4;
ALOGD("Terminal code for udta");
continue;
} else {
ALOGW("invalid udta Terminal code");
}
}
status_t err = parseChunk(offset, depth + 1);
if (err != OK) {
if (isTrack) {
mLastTrack->skipTrack = true;
break;
}
return err;
}
}
if (*offset != stop_offset) {
return ERROR_MALFORMED;
}
if (isTrack) {
int32_t trackId;
// There must be exactly one track header per track.
if (!AMediaFormat_getInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
mLastTrack->skipTrack = true;
}
status_t err = verifyTrack(mLastTrack);
if (err != OK) {
mLastTrack->skipTrack = true;
}
if (mLastTrack->skipTrack) {
ALOGV("skipping this track...");
Track *cur = mFirstTrack;
if (cur == mLastTrack) {
delete cur;
mFirstTrack = mLastTrack = NULL;
} else {
while (cur && cur->next != mLastTrack) {
cur = cur->next;
}
if (cur) {
cur->next = NULL;
}
delete mLastTrack;
mLastTrack = cur;
}
return OK;
}
// place things we built elsewhere into their final locations
// put aggregated tx3g data into the metadata
if (mLastTrack->mTx3gFilled > 0) {
ALOGV("Putting %zu bytes of tx3g data into meta data",
mLastTrack->mTx3gFilled);
AMediaFormat_setBuffer(mLastTrack->meta,
AMEDIAFORMAT_KEY_TEXT_FORMAT_DATA,
mLastTrack->mTx3gBuffer, mLastTrack->mTx3gFilled);
// drop it now to reduce our footprint
free(mLastTrack->mTx3gBuffer);
mLastTrack->mTx3gBuffer = NULL;
}
} else if (chunk_type == FOURCC("moov")) {
mInitCheck = OK;
return UNKNOWN_ERROR; // Return a dummy error.
}
break;
}
case FOURCC("schm"):
{
*offset += chunk_size;
if (!mLastTrack) {
return ERROR_MALFORMED;
}
uint32_t scheme_type;
if (mDataSource->readAt(data_offset + 4, &scheme_type, 4) < 4) {
return ERROR_IO;
}
scheme_type = ntohl(scheme_type);
int32_t mode = kCryptoModeUnencrypted;
switch(scheme_type) {
case FOURCC("cbc1"):
{
mode = kCryptoModeAesCbc;
break;
}
case FOURCC("cbcs"):
{
mode = kCryptoModeAesCbc;
mLastTrack->subsample_encryption = true;
break;
}
case FOURCC("cenc"):
{
mode = kCryptoModeAesCtr;
break;
}
case FOURCC("cens"):
{
mode = kCryptoModeAesCtr;
mLastTrack->subsample_encryption = true;
break;
}
}
if (mode != kCryptoModeUnencrypted) {
AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_MODE, mode);
}
break;
}
case FOURCC("elst"):
{
*offset += chunk_size;
if (!mLastTrack) {
return ERROR_MALFORMED;
}
// See 14496-12 8.6.6
uint8_t version;
if (mDataSource->readAt(data_offset, &version, 1) < 1) {
return ERROR_IO;
}
uint32_t entry_count;
if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
return ERROR_IO;
}
if (entry_count != 1) {
// we only support a single entry at the moment, for gapless playback
// or start offset
ALOGW("ignoring edit list with %d entries", entry_count);
} else {
off64_t entriesoffset = data_offset + 8;
uint64_t segment_duration;
int64_t media_time;
if (version == 1) {
if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
!mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
return ERROR_IO;
}
} else if (version == 0) {
uint32_t sd;
int32_t mt;
if (!mDataSource->getUInt32(entriesoffset, &sd) ||
!mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
return ERROR_IO;
}
segment_duration = sd;
media_time = mt;
} else {
return ERROR_IO;
}
// save these for later, because the elst atom might precede
// the atoms that actually gives us the duration and sample rate
// needed to calculate the padding and delay values
mLastTrack->has_elst = true;
mLastTrack->elst_media_time = media_time;
mLastTrack->elst_segment_duration = segment_duration;
}
break;
}
case FOURCC("frma"):
{
*offset += chunk_size;
uint32_t original_fourcc;
if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
return ERROR_IO;
}
original_fourcc = ntohl(original_fourcc);
ALOGV("read original format: %d", original_fourcc);
if (mLastTrack == NULL) {
return ERROR_MALFORMED;
}
AMediaFormat_setString(mLastTrack->meta,
AMEDIAFORMAT_KEY_MIME, FourCC2MIME(original_fourcc));
uint32_t num_channels = 0;
uint32_t sample_rate = 0;
if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
AMediaFormat_setInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
AMediaFormat_setInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
}
if (!mIsQT && original_fourcc == FOURCC("alac")) {
off64_t tmpOffset = *offset;
status_t err = parseALACSampleEntry(&tmpOffset);
if (err != OK) {
ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
return err;
}
*offset = tmpOffset + 8;
}
break;
}
case FOURCC("tenc"):
{
*offset += chunk_size;
if (chunk_size < 32) {
return ERROR_MALFORMED;
}
// tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
// default IV size, 16 bytes default KeyID
// (ISO 23001-7)
uint8_t version;
if (mDataSource->readAt(data_offset, &version, sizeof(version))
< (ssize_t)sizeof(version)) {
return ERROR_IO;
}
uint8_t buf[4];
memset(buf, 0, 4);
if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
return ERROR_IO;
}
if (mLastTrack == NULL) {
return ERROR_MALFORMED;
}
uint8_t defaultEncryptedByteBlock = 0;
uint8_t defaultSkipByteBlock = 0;
uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
if (version == 1) {
uint32_t pattern = buf[2];
defaultEncryptedByteBlock = pattern >> 4;
defaultSkipByteBlock = pattern & 0xf;
if (defaultEncryptedByteBlock == 0 && defaultSkipByteBlock == 0) {
// use (1,0) to mean "encrypt everything"
defaultEncryptedByteBlock = 1;
}
} else if (mLastTrack->subsample_encryption) {
ALOGW("subsample_encryption should be version 1");
} else if (defaultAlgorithmId > 1) {
// only 0 (clear) and 1 (AES-128) are valid
ALOGW("defaultAlgorithmId: %u is a reserved value", defaultAlgorithmId);
defaultAlgorithmId = 1;
}
memset(buf, 0, 4);
if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
return ERROR_IO;
}
uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
if (defaultAlgorithmId == 0 && defaultIVSize != 0) {
// only unencrypted data must have 0 IV size
return ERROR_MALFORMED;
} else if (defaultIVSize != 0 &&
defaultIVSize != 8 &&
defaultIVSize != 16) {
return ERROR_MALFORMED;
}
uint8_t defaultKeyId[16];
if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
return ERROR_IO;
}
sp<ABuffer> defaultConstantIv;
if (defaultAlgorithmId != 0 && defaultIVSize == 0) {
uint8_t ivlength;
if (mDataSource->readAt(data_offset + 24, &ivlength, sizeof(ivlength))
< (ssize_t)sizeof(ivlength)) {
return ERROR_IO;
}
if (ivlength != 8 && ivlength != 16) {
ALOGW("unsupported IV length: %u", ivlength);
return ERROR_MALFORMED;
}
defaultConstantIv = new ABuffer(ivlength);
if (mDataSource->readAt(data_offset + 25, defaultConstantIv->data(), ivlength)
< (ssize_t)ivlength) {
return ERROR_IO;
}
defaultConstantIv->setRange(0, ivlength);
}
int32_t tmpAlgorithmId;
if (!AMediaFormat_getInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_CRYPTO_MODE, &tmpAlgorithmId)) {
AMediaFormat_setInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_CRYPTO_MODE, defaultAlgorithmId);
}
AMediaFormat_setInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_CRYPTO_DEFAULT_IV_SIZE, defaultIVSize);
AMediaFormat_setBuffer(mLastTrack->meta,
AMEDIAFORMAT_KEY_CRYPTO_KEY, defaultKeyId, 16);
AMediaFormat_setInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_CRYPTO_ENCRYPTED_BYTE_BLOCK, defaultEncryptedByteBlock);
AMediaFormat_setInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_CRYPTO_SKIP_BYTE_BLOCK, defaultSkipByteBlock);
if (defaultConstantIv != NULL) {
AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CRYPTO_IV,
defaultConstantIv->data(), defaultConstantIv->size());
}
break;
}
case FOURCC("tkhd"):
{
*offset += chunk_size;
status_t err;
if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
return err;
}
break;
}
case FOURCC("tref"):
{
off64_t stop_offset = *offset + chunk_size;
*offset = data_offset;
while (*offset < stop_offset) {
status_t err = parseChunk(offset, depth + 1);
if (err != OK) {
return err;
}
}
if (*offset != stop_offset) {
return ERROR_MALFORMED;
}
break;
}
case FOURCC("thmb"):
{
*offset += chunk_size;
if (mLastTrack != NULL) {
// Skip thumbnail track for now since we don't have an
// API to retrieve it yet.
// The thumbnail track can't be accessed by negative index or time,
// because each timed sample has its own corresponding thumbnail
// in the thumbnail track. We'll need a dedicated API to retrieve
// thumbnail at time instead.
mLastTrack->skipTrack = true;
}
break;
}
case FOURCC("pssh"):
{
*offset += chunk_size;
PsshInfo pssh;
if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
return ERROR_IO;
}
uint32_t psshdatalen = 0;
if (mDataSource->readAt(data_offset + 20, &psshdatalen, 4) < 4) {
return ERROR_IO;
}
pssh.datalen = ntohl(psshdatalen);
ALOGV("pssh data size: %d", pssh.datalen);
if (chunk_size < 20 || pssh.datalen > chunk_size - 20) {
// pssh data length exceeds size of containing box
return ERROR_MALFORMED;
}
pssh.data = new (std::nothrow) uint8_t[pssh.datalen];
if (pssh.data == NULL) {
return ERROR_MALFORMED;
}
ALOGV("allocated pssh @ %p", pssh.data);
ssize_t requested = (ssize_t) pssh.datalen;
if (mDataSource->readAt(data_offset + 24, pssh.data, requested) < requested) {
delete[] pssh.data;
return ERROR_IO;
}
mPssh.push_back(pssh);
break;
}
case FOURCC("mdhd"):
{
*offset += chunk_size;
if (chunk_data_size < 4 || mLastTrack == NULL) {
return ERROR_MALFORMED;
}
uint8_t version;
if (mDataSource->readAt(
data_offset, &version, sizeof(version))
< (ssize_t)sizeof(version)) {
return ERROR_IO;
}
off64_t timescale_offset;
if (version == 1) {
timescale_offset = data_offset + 4 + 16;
} else if (version == 0) {
timescale_offset = data_offset + 4 + 8;
} else {
return ERROR_IO;
}
uint32_t timescale;
if (mDataSource->readAt(
timescale_offset, &timescale, sizeof(timescale))
< (ssize_t)sizeof(timescale)) {
return ERROR_IO;
}
if (!timescale) {
ALOGE("timescale should not be ZERO.");
return ERROR_MALFORMED;
}
mLastTrack->timescale = ntohl(timescale);
// 14496-12 says all ones means indeterminate, but some files seem to use
// 0 instead. We treat both the same.
int64_t duration = 0;
if (version == 1) {
if (mDataSource->readAt(
timescale_offset + 4, &duration, sizeof(duration))
< (ssize_t)sizeof(duration)) {
return ERROR_IO;
}
if (duration != -1) {
duration = ntoh64(duration);
}
} else {
uint32_t duration32;
if (mDataSource->readAt(
timescale_offset + 4, &duration32, sizeof(duration32))
< (ssize_t)sizeof(duration32)) {
return ERROR_IO;
}
if (duration32 != 0xffffffff) {
duration = ntohl(duration32);
}
}
if (duration != 0 && mLastTrack->timescale != 0) {
long double durationUs = ((long double)duration * 1000000) / mLastTrack->timescale;
if (durationUs < 0 || durationUs > INT64_MAX) {
ALOGE("cannot represent %lld * 1000000 / %lld in 64 bits",
(long long) duration, (long long) mLastTrack->timescale);
return ERROR_MALFORMED;
}
AMediaFormat_setInt64(mLastTrack->meta, AMEDIAFORMAT_KEY_DURATION, durationUs);
}
uint8_t lang[2];
off64_t lang_offset;
if (version == 1) {
lang_offset = timescale_offset + 4 + 8;
} else if (version == 0) {
lang_offset = timescale_offset + 4 + 4;
} else {
return ERROR_IO;
}
if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
< (ssize_t)sizeof(lang)) {
return ERROR_IO;
}
// To get the ISO-639-2/T three character language code
// 1 bit pad followed by 3 5-bits characters. Each character
// is packed as the difference between its ASCII value and 0x60.
char lang_code[4];
lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
lang_code[2] = (lang[1] & 0x1f) + 0x60;
lang_code[3] = '\0';
AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_LANGUAGE, lang_code);
break;
}
case FOURCC("stsd"):
{
uint8_t buffer[8];
if (chunk_data_size < (off64_t)sizeof(buffer)) {
return ERROR_MALFORMED;
}
if (mDataSource->readAt(
data_offset, buffer, 8) < 8) {
return ERROR_IO;
}
if (U32_AT(buffer) != 0) {
// Should be version 0, flags 0.
return ERROR_MALFORMED;
}
uint32_t entry_count = U32_AT(&buffer[4]);
if (entry_count > 1) {
// For 3GPP timed text, there could be multiple tx3g boxes contain
// multiple text display formats. These formats will be used to
// display the timed text.
// For encrypted files, there may also be more than one entry.
const char *mime;
if (mLastTrack == NULL)
return ERROR_MALFORMED;
CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
strcasecmp(mime, "application/octet-stream")) {
// For now we only support a single type of media per track.
mLastTrack->skipTrack = true;
*offset += chunk_size;
break;
}
}
off64_t stop_offset = *offset + chunk_size;
*offset = data_offset + 8;
for (uint32_t i = 0; i < entry_count; ++i) {
status_t err = parseChunk(offset, depth + 1);
if (err != OK) {
return err;
}
}
if (*offset != stop_offset) {
return ERROR_MALFORMED;
}
break;
}
case FOURCC("mett"):
{
*offset += chunk_size;
if (mLastTrack == NULL)
return ERROR_MALFORMED;
auto buffer = heapbuffer<uint8_t>(chunk_data_size);
if (buffer.get() == NULL) {
return NO_MEMORY;
}
if (mDataSource->readAt(
data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
return ERROR_IO;
}
// Prior to API 29, the metadata track was not compliant with ISO/IEC
// 14496-12-2015. This led to some ISO-compliant parsers failing to read the
// metatrack. As of API 29 and onwards, a change was made to metadata track to
// make it compliant with the standard. The workaround is to write the
// null-terminated mime_format string twice. This allows compliant parsers to
// read the missing reserved, data_reference_index, and content_encoding fields
// from the first mime_type string. The actual mime_format field would then be
// read correctly from the second string. The non-compliant Android frameworks
// from API 28 and earlier would still be able to read the mime_format correctly
// as it would only read the first null-terminated mime_format string. To enable
// reading metadata tracks generated from both the non-compliant and compliant
// formats, a check needs to be done to see which format is used.
int null_pos = 0;
const unsigned char *str = buffer.get();
while (null_pos < chunk_data_size) {
if (*(str + null_pos) == '\0') {
break;
}
++null_pos;
}
if (null_pos == chunk_data_size - 1) {
// This is not a standard ompliant metadata track.
String8 mimeFormat((const char *)(buffer.get()), chunk_data_size);
AMediaFormat_setString(mLastTrack->meta,
AMEDIAFORMAT_KEY_MIME, mimeFormat.string());
} else {
// This is a standard compliant metadata track.
String8 contentEncoding((const char *)(buffer.get() + 8));
String8 mimeFormat((const char *)(buffer.get() + 8 + contentEncoding.size() + 1),
chunk_data_size - 8 - contentEncoding.size() - 1);
AMediaFormat_setString(mLastTrack->meta,
AMEDIAFORMAT_KEY_MIME, mimeFormat.string());
}
break;
}
case FOURCC("mp4a"):
case FOURCC("enca"):
case FOURCC("samr"):
case FOURCC("sawb"):
case FOURCC("Opus"):
case FOURCC("twos"):
case FOURCC("sowt"):
case FOURCC("alac"):
case FOURCC("fLaC"):
case FOURCC(".mp3"):
case 0x6D730055: // "ms U" mp3 audio
{
if (mIsQT && depth >= 1 && mPath[depth - 1] == FOURCC("wave")) {
if (chunk_type == FOURCC("alac")) {
off64_t offsetTmp = *offset;
status_t err = parseALACSampleEntry(&offsetTmp);
if (err != OK) {
ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
return err;
}
}
// Ignore all atoms embedded in QT wave atom
ALOGV("Ignore all atoms embedded in QT wave atom");
*offset += chunk_size;
break;
}
uint8_t buffer[8 + 20];
if (chunk_data_size < (ssize_t)sizeof(buffer)) {
// Basic AudioSampleEntry size.
return ERROR_MALFORMED;
}
if (mDataSource->readAt(
data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
return ERROR_IO;
}
uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
uint16_t version = U16_AT(&buffer[8]);
uint32_t num_channels = U16_AT(&buffer[16]);
uint16_t sample_size = U16_AT(&buffer[18]);
uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
if (mLastTrack == NULL)
return ERROR_MALFORMED;
off64_t stop_offset = *offset + chunk_size;
*offset = data_offset + sizeof(buffer);
if (mIsQT) {
if (version == 1) {
if (mDataSource->readAt(*offset, buffer, 16) < 16) {
return ERROR_IO;
}
#if 0
U32_AT(buffer); // samples per packet
U32_AT(&buffer[4]); // bytes per packet
U32_AT(&buffer[8]); // bytes per frame
U32_AT(&buffer[12]); // bytes per sample
#endif
*offset += 16;
} else if (version == 2) {
uint8_t v2buffer[36];
if (mDataSource->readAt(*offset, v2buffer, 36) < 36) {
return ERROR_IO;
}
#if 0
U32_AT(v2buffer); // size of struct only
sample_rate = (uint32_t)U64_AT(&v2buffer[4]); // audio sample rate
num_channels = U32_AT(&v2buffer[12]); // num audio channels
U32_AT(&v2buffer[16]); // always 0x7f000000
sample_size = (uint16_t)U32_AT(&v2buffer[20]); // const bits per channel
U32_AT(&v2buffer[24]); // format specifc flags
U32_AT(&v2buffer[28]); // const bytes per audio packet
U32_AT(&v2buffer[32]); // const LPCM frames per audio packet
#endif
*offset += 36;
}
}
if (chunk_type != FOURCC("enca")) {
// if the chunk type is enca, we'll get the type from the frma box later
AMediaFormat_setString(mLastTrack->meta,
AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_RAW, FourCC2MIME(chunk_type))) {
AMediaFormat_setInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_BITS_PER_SAMPLE, sample_size);
if (chunk_type == FOURCC("twos")) {
AMediaFormat_setInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_PCM_BIG_ENDIAN, 1);
}
}
}
ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
chunk, num_channels, sample_size, sample_rate);
AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_CHANNEL_COUNT, num_channels);
AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_SAMPLE_RATE, sample_rate);
if (chunk_type == FOURCC("Opus")) {
uint8_t opusInfo[AOPUS_OPUSHEAD_MAXSIZE];
data_offset += sizeof(buffer);
size_t opusInfoSize = chunk_data_size - sizeof(buffer);
if (opusInfoSize < AOPUS_OPUSHEAD_MINSIZE ||
opusInfoSize > AOPUS_OPUSHEAD_MAXSIZE) {
return ERROR_MALFORMED;
}
// Read Opus Header
if (mDataSource->readAt(
data_offset, opusInfo, opusInfoSize) < opusInfoSize) {
return ERROR_IO;
}
// OpusHeader must start with this magic sequence, overwrite first 8 bytes
// http://wiki.xiph.org/OggOpus#ID_Header
strncpy((char *)opusInfo, "OpusHead", 8);
// Version shall be 0 as per mp4 Opus Specific Box
// (https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2)
if (opusInfo[8]) {
return ERROR_MALFORMED;
}
// Force version to 1 as per OpusHead definition
// (http://wiki.xiph.org/OggOpus#ID_Header)
opusInfo[8] = 1;
// Read Opus Specific Box values
size_t opusOffset = 10;
uint16_t pre_skip = U16_AT(&opusInfo[opusOffset]);
uint32_t sample_rate = U32_AT(&opusInfo[opusOffset + 2]);
uint16_t out_gain = U16_AT(&opusInfo[opusOffset + 6]);
// Convert Opus Specific Box values. ParseOpusHeader expects
// the values in LE, however MP4 stores these values as BE
// https://opus-codec.org/docs/opus_in_isobmff.html#4.3.2
memcpy(&opusInfo[opusOffset], &pre_skip, sizeof(pre_skip));
memcpy(&opusInfo[opusOffset + 2], &sample_rate, sizeof(sample_rate));
memcpy(&opusInfo[opusOffset + 6], &out_gain, sizeof(out_gain));
static const int64_t kSeekPreRollNs = 80000000; // Fixed 80 msec
static const int32_t kOpusSampleRate = 48000;
int64_t codecDelay = pre_skip * 1000000000ll / kOpusSampleRate;
AMediaFormat_setBuffer(mLastTrack->meta,
AMEDIAFORMAT_KEY_CSD_0, opusInfo, opusInfoSize);
AMediaFormat_setBuffer(mLastTrack->meta,
AMEDIAFORMAT_KEY_CSD_1, &codecDelay, sizeof(codecDelay));
AMediaFormat_setBuffer(mLastTrack->meta,
AMEDIAFORMAT_KEY_CSD_2, &kSeekPreRollNs, sizeof(kSeekPreRollNs));
data_offset += opusInfoSize;
*offset = data_offset;
CHECK_EQ(*offset, stop_offset);
}
if (!mIsQT && chunk_type == FOURCC("alac")) {
data_offset += sizeof(buffer);
status_t err = parseALACSampleEntry(&data_offset);
if (err != OK) {
ALOGE("parseALACSampleEntry err:%d Line:%d", err, __LINE__);
return err;
}
*offset = data_offset;
CHECK_EQ(*offset, stop_offset);
}
if (chunk_type == FOURCC("fLaC")) {
// From https://github.com/xiph/flac/blob/master/doc/isoflac.txt
// 4 for mime, 4 for blockType and BlockLen, 34 for metadata
uint8_t flacInfo[4 + 4 + 34];
// skipping dFla, version
data_offset += sizeof(buffer) + 12;
size_t flacOffset = 4;
// Add flaC header mime type to CSD
strncpy((char *)flacInfo, "fLaC", 4);
if (mDataSource->readAt(
data_offset, flacInfo + flacOffset, sizeof(flacInfo) - flacOffset) <
(ssize_t)sizeof(flacInfo) - flacOffset) {
return ERROR_IO;
}
data_offset += sizeof(flacInfo) - flacOffset;
AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_0, flacInfo,
sizeof(flacInfo));
*offset = data_offset;
CHECK_EQ(*offset, stop_offset);
}
while (*offset < stop_offset) {
status_t err = parseChunk(offset, depth + 1);
if (err != OK) {
return err;
}
}
if (*offset != stop_offset) {
return ERROR_MALFORMED;
}
break;
}
case FOURCC("mp4v"):
case FOURCC("encv"):
case FOURCC("s263"):
case FOURCC("H263"):
case FOURCC("h263"):
case FOURCC("avc1"):
case FOURCC("hvc1"):
case FOURCC("hev1"):
case FOURCC("av01"):
{
uint8_t buffer[78];
if (chunk_data_size < (ssize_t)sizeof(buffer)) {
// Basic VideoSampleEntry size.
return ERROR_MALFORMED;
}
if (mDataSource->readAt(
data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
return ERROR_IO;
}
uint16_t data_ref_index __unused = U16_AT(&buffer[6]);
uint16_t width = U16_AT(&buffer[6 + 18]);
uint16_t height = U16_AT(&buffer[6 + 20]);
// The video sample is not standard-compliant if it has invalid dimension.
// Use some default width and height value, and
// let the decoder figure out the actual width and height (and thus
// be prepared for INFO_FOMRAT_CHANGED event).
if (width == 0) width = 352;
if (height == 0) height = 288;
// printf("*** coding='%s' width=%d height=%d\n",
// chunk, width, height);
if (mLastTrack == NULL)
return ERROR_MALFORMED;
if (chunk_type != FOURCC("encv")) {
// if the chunk type is encv, we'll get the type from the frma box later
AMediaFormat_setString(mLastTrack->meta,
AMEDIAFORMAT_KEY_MIME, FourCC2MIME(chunk_type));
}
AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_WIDTH, width);
AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_HEIGHT, height);
off64_t stop_offset = *offset + chunk_size;
*offset = data_offset + sizeof(buffer);
while (*offset < stop_offset) {
status_t err = parseChunk(offset, depth + 1);
if (err != OK) {
return err;
}
}
if (*offset != stop_offset) {
return ERROR_MALFORMED;
}
break;
}
case FOURCC("stco"):
case FOURCC("co64"):
{
if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
return ERROR_MALFORMED;
}
status_t err =
mLastTrack->sampleTable->setChunkOffsetParams(
chunk_type, data_offset, chunk_data_size);
*offset += chunk_size;
if (err != OK) {
return err;
}
break;
}
case FOURCC("stsc"):
{
if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
return ERROR_MALFORMED;
status_t err =
mLastTrack->sampleTable->setSampleToChunkParams(
data_offset, chunk_data_size);
*offset += chunk_size;
if (err != OK) {
return err;
}
break;
}
case FOURCC("stsz"):
case FOURCC("stz2"):
{
if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL)) {
return ERROR_MALFORMED;
}
status_t err =
mLastTrack->sampleTable->setSampleSizeParams(
chunk_type, data_offset, chunk_data_size);
*offset += chunk_size;
if (err != OK) {
return err;
}
adjustRawDefaultFrameSize();
size_t max_size;
err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
if (err != OK) {
return err;
}
if (max_size != 0) {
// Assume that a given buffer only contains at most 10 chunks,
// each chunk originally prefixed with a 2 byte length will
// have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
// and thus will grow by 2 bytes per chunk.
if (max_size > SIZE_MAX - 10 * 2) {
ALOGE("max sample size too big: %zu", max_size);
return ERROR_MALFORMED;
}
AMediaFormat_setInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size + 10 * 2);
} else {
// No size was specified. Pick a conservatively large size.
uint32_t width, height;
if (!AMediaFormat_getInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_WIDTH, (int32_t*)&width) ||
!AMediaFormat_getInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_HEIGHT,(int32_t*) &height)) {
ALOGE("No width or height, assuming worst case 1080p");
width = 1920;
height = 1080;
} else {
// A resolution was specified, check that it's not too big. The values below
// were chosen so that the calculations below don't cause overflows, they're
// not indicating that resolutions up to 32kx32k are actually supported.
if (width > 32768 || height > 32768) {
ALOGE("can't support %u x %u video", width, height);
return ERROR_MALFORMED;
}
}
const char *mime;
CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
if (!strncmp(mime, "audio/", 6)) {
// for audio, use 128KB
max_size = 1024 * 128;
} else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
|| !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
// AVC & HEVC requires compression ratio of at least 2, and uses
// macroblocks
max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
} else {
// For all other formats there is no minimum compression
// ratio. Use compression ratio of 1.
max_size = width * height * 3 / 2;
}
// HACK: allow 10% overhead
// TODO: read sample size from traf atom for fragmented MPEG4.
max_size += max_size / 10;
AMediaFormat_setInt32(mLastTrack->meta, AMEDIAFORMAT_KEY_MAX_INPUT_SIZE, max_size);
}
// NOTE: setting another piece of metadata invalidates any pointers (such as the
// mimetype) previously obtained, so don't cache them.
const char *mime;
CHECK(AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime));
// Calculate average frame rate.
if (!strncasecmp("video/", mime, 6)) {
size_t nSamples = mLastTrack->sampleTable->countSamples();
if (nSamples == 0) {
int32_t trackId;
if (AMediaFormat_getInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_TRACK_ID, &trackId)) {
for (size_t i = 0; i < mTrex.size(); i++) {
Trex *t = &mTrex.editItemAt(i);
if (t->track_ID == (uint32_t) trackId) {
if (t->default_sample_duration > 0) {
int32_t frameRate =
mLastTrack->timescale / t->default_sample_duration;
AMediaFormat_setInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
}
break;
}
}
}
} else {
int64_t durationUs;
if (AMediaFormat_getInt64(mLastTrack->meta,
AMEDIAFORMAT_KEY_DURATION, &durationUs)) {
if (durationUs > 0) {
int32_t frameRate = (nSamples * 1000000LL +
(durationUs >> 1)) / durationUs;
AMediaFormat_setInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_FRAME_RATE, frameRate);
}
}
ALOGV("setting frame count %zu", nSamples);
AMediaFormat_setInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_FRAME_COUNT, nSamples);
}
}
break;
}
case FOURCC("stts"):
{
if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
return ERROR_MALFORMED;
*offset += chunk_size;
if (depth >= 1 && mPath[depth - 1] != FOURCC("stbl")) {
char chunk[5];
MakeFourCCString(mPath[depth - 1], chunk);
ALOGW("stts's parent box (%s) is not stbl, skip it.", chunk);
break;
}
status_t err =
mLastTrack->sampleTable->setTimeToSampleParams(
data_offset, chunk_data_size);
if (err != OK) {
return err;
}
break;
}
case FOURCC("ctts"):
{
if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
return ERROR_MALFORMED;
*offset += chunk_size;
status_t err =
mLastTrack->sampleTable->setCompositionTimeToSampleParams(
data_offset, chunk_data_size);
if (err != OK) {
return err;
}
break;
}
case FOURCC("stss"):
{
if ((mLastTrack == NULL) || (mLastTrack->sampleTable == NULL))
return ERROR_MALFORMED;
*offset += chunk_size;
status_t err =
mLastTrack->sampleTable->setSyncSampleParams(
data_offset, chunk_data_size);
if (err != OK) {
return err;
}
break;
}
// \xA9xyz
case FOURCC("\251xyz"):
{
*offset += chunk_size;
// Best case the total data length inside "\xA9xyz" box would
// be 9, for instance "\xA9xyz" + "\x00\x05\x15\xc7" + "+0+0/",
// where "\x00\x05" is the text string length with value = 5,
// "\0x15\xc7" is the language code = en, and "+0+0/" is a
// location (string) value with longitude = 0 and latitude = 0.
// Since some devices encountered in the wild omit the trailing
// slash, we'll allow that.
if (chunk_data_size < 8) { // 8 instead of 9 to allow for missing /
return ERROR_MALFORMED;
}
uint16_t len;
if (!mDataSource->getUInt16(data_offset, &len)) {
return ERROR_IO;
}
// allow "+0+0" without trailing slash
if (len < 4 || len > chunk_data_size - 4) {
return ERROR_MALFORMED;
}
// The location string following the language code is formatted
// according to ISO 6709:2008 (https://en.wikipedia.org/wiki/ISO_6709).
// Allocate 2 extra bytes, in case we need to add a trailing slash,
// and to add a terminating 0.
std::unique_ptr<char[]> buffer(new (std::nothrow) char[len+2]());
if (!buffer) {
return NO_MEMORY;
}
if (mDataSource->readAt(
data_offset + 4, &buffer[0], len) < len) {
return ERROR_IO;
}
len = strlen(&buffer[0]);
if (len < 4) {
return ERROR_MALFORMED;
}
// Add a trailing slash if there wasn't one.
if (buffer[len - 1] != '/') {
buffer[len] = '/';
}
AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_LOCATION, &buffer[0]);
break;
}
case FOURCC("esds"):
{
*offset += chunk_size;
if (chunk_data_size < 4) {
return ERROR_MALFORMED;
}
auto tmp = heapbuffer<uint8_t>(chunk_data_size);
uint8_t *buffer = tmp.get();
if (buffer == NULL) {
return -ENOMEM;
}
if (mDataSource->readAt(
data_offset, buffer, chunk_data_size) < chunk_data_size) {
return ERROR_IO;
}
if (U32_AT(buffer) != 0) {
// Should be version 0, flags 0.
return ERROR_MALFORMED;
}
if (mLastTrack == NULL)
return ERROR_MALFORMED;
AMediaFormat_setBuffer(mLastTrack->meta,
AMEDIAFORMAT_KEY_ESDS, &buffer[4], chunk_data_size - 4);
if (mPath.size() >= 2
&& mPath[mPath.size() - 2] == FOURCC("mp4a")) {
// Information from the ESDS must be relied on for proper
// setup of sample rate and channel count for MPEG4 Audio.
// The generic header appears to only contain generic
// information...
status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
&buffer[4], chunk_data_size - 4);
if (err != OK) {
return err;
}
}
if (mPath.size() >= 2
&& mPath[mPath.size() - 2] == FOURCC("mp4v")) {
// Check if the video is MPEG2
ESDS esds(&buffer[4], chunk_data_size - 4);
uint8_t objectTypeIndication;
if (esds.getObjectTypeIndication(&objectTypeIndication) == OK) {
if (objectTypeIndication >= 0x60 && objectTypeIndication <= 0x65) {
AMediaFormat_setString(mLastTrack->meta,
AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_MPEG2);
}
}
}
break;
}
case FOURCC("btrt"):
{
*offset += chunk_size;
if (mLastTrack == NULL) {
return ERROR_MALFORMED;
}
uint8_t buffer[12];
if (chunk_data_size != sizeof(buffer)) {
return ERROR_MALFORMED;
}
if (mDataSource->readAt(
data_offset, buffer, chunk_data_size) < chunk_data_size) {
return ERROR_IO;
}
uint32_t maxBitrate = U32_AT(&buffer[4]);
uint32_t avgBitrate = U32_AT(&buffer[8]);
if (maxBitrate > 0 && maxBitrate < INT32_MAX) {
AMediaFormat_setInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_MAX_BIT_RATE, (int32_t)maxBitrate);
}
if (avgBitrate > 0 && avgBitrate < INT32_MAX) {
AMediaFormat_setInt32(mLastTrack->meta,
AMEDIAFORMAT_KEY_BIT_RATE, (int32_t)avgBitrate);
}
break;
}
case FOURCC("avcC"):
{
*offset += chunk_size;
auto buffer = heapbuffer<uint8_t>(chunk_data_size);
if (buffer.get() == NULL) {
ALOGE("b/28471206");
return NO_MEMORY;
}
if (mDataSource->readAt(
data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
return ERROR_IO;
}
if (mLastTrack == NULL)
return ERROR_MALFORMED;
AMediaFormat_setBuffer(mLastTrack->meta,
AMEDIAFORMAT_KEY_CSD_AVC, buffer.get(), chunk_data_size);
break;
}
case FOURCC("hvcC"):
{
auto buffer = heapbuffer<uint8_t>(chunk_data_size);
if (buffer.get() == NULL) {
ALOGE("b/28471206");
return NO_MEMORY;
}
if (mDataSource->readAt(
data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
return ERROR_IO;
}
if (mLastTrack == NULL)
return ERROR_MALFORMED;
AMediaFormat_setBuffer(mLastTrack->meta,
AMEDIAFORMAT_KEY_CSD_HEVC, buffer.get(), chunk_data_size);
*offset += chunk_size;
break;
}
case FOURCC("av1C"):
{
auto buffer = heapbuffer<uint8_t>(chunk_data_size);
if (buffer.get() == NULL) {
ALOGE("b/28471206");
return NO_MEMORY;
}
if (mDataSource->readAt(
data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
return ERROR_IO;
}
if (mLastTrack == NULL)
return ERROR_MALFORMED;
AMediaFormat_setBuffer(mLastTrack->meta,
AMEDIAFORMAT_KEY_CSD_0, buffer.get(), chunk_data_size);
*offset += chunk_size;
break;
}
case FOURCC("d263"):
{
*offset += chunk_size;
/*
* d263 contains a fixed 7 bytes part:
* vendor - 4 bytes
* version - 1 byte
* level - 1 byte
* profile - 1 byte
* optionally, "d263" box itself may contain a 16-byte
* bit rate box (bitr)
* average bit rate - 4 bytes
* max bit rate - 4 bytes
*/
char buffer[23];
if (chunk_data_size != 7 &&
chunk_data_size != 23) {
ALOGE("Incorrect D263 box size %lld", (long long)chunk_data_size);
return ERROR_MALFORMED;
}
if (mDataSource->readAt(
data_offset, buffer, chunk_data_size) < chunk_data_size) {
return ERROR_IO;
}
if (mLastTrack == NULL)
return ERROR_MALFORMED;
AMediaFormat_setBuffer(mLastTrack->meta,
AMEDIAFORMAT_KEY_D263, buffer, chunk_data_size);
break;
}
case FOURCC("meta"):
{
off64_t stop_offset = *offset + chunk_size;
*offset = data_offset;
bool isParsingMetaKeys = underQTMetaPath(mPath, 2);
if (!isParsingMetaKeys) {
uint8_t buffer[4];
if (chunk_data_size < (off64_t)sizeof(buffer)) {
*offset = stop_offset;
return ERROR_MALFORMED;
}
if (mDataSource->readAt(
data_offset, buffer, 4) < 4) {
*offset = stop_offset;
return ERROR_IO;
}
if (U32_AT(buffer) != 0) {
// Should be version 0, flags 0.
// If it's not, let's assume this is one of those
// apparently malformed chunks that don't have flags
// and completely different semantics than what's
// in the MPEG4 specs and skip it.
*offset = stop_offset;
return OK;
}
*offset += sizeof(buffer);
}
while (*offset < stop_offset) {
status_t err = parseChunk(offset, depth + 1);
if (err != OK) {
return err;
}
}
if (*offset != stop_offset) {
return ERROR_MALFORMED;
}
break;
}
case FOURCC("iloc"):
case FOURCC("iinf"):
case FOURCC("iprp"):
case FOURCC("pitm"):
case FOURCC("idat"):
case FOURCC("iref"):
case FOURCC("ipro"):
{
if (mIsHeif) {
if (mItemTable == NULL) {
mItemTable = new ItemTable(mDataSource);
}
status_t err = mItemTable->parse(
chunk_type, data_offset, chunk_data_size);
if (err != OK) {
return err;
}
}
*offset += chunk_size;
break;
}
case FOURCC("mean"):
case FOURCC("name"):
case FOURCC("data"):
{
*offset += chunk_size;
if (mPath.size() == 6 && underMetaDataPath(mPath)) {
status_t err = parseITunesMetaData(data_offset, chunk_data_size);
if (err != OK) {
return err;
}
}
break;
}
case FOURCC("mvhd"):
{
*offset += chunk_size;
if (depth != 1) {
ALOGE("mvhd: depth %d", depth);
return ERROR_MALFORMED;
}
if (chunk_data_size < 32) {
return ERROR_MALFORMED;
}
uint8_t header[32];
if (mDataSource->readAt(
data_offset, header, sizeof(header))
< (ssize_t)sizeof(header)) {
return ERROR_IO;
}
uint64_t creationTime;
uint64_t duration = 0;
if (header[0] == 1) {
creationTime = U64_AT(&header[4]);
mHeaderTimescale = U32_AT(&header[20]);
duration = U64_AT(&header[24]);
if (duration == 0xffffffffffffffff) {
duration = 0;
}
} else if (header[0] != 0) {
return ERROR_MALFORMED;
} else {
creationTime = U32_AT(&header[4]);
mHeaderTimescale = U32_AT(&header[12]);
uint32_t d32 = U32_AT(&header[16]);
if (d32 == 0xffffffff) {
d32 = 0;
}
duration = d32;
}
if (duration != 0 && mHeaderTimescale != 0 && duration < UINT64_MAX / 1000000) {
AMediaFormat_setInt64(mFileMetaData,
AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
}
String8 s;
if (convertTimeToDate(creationTime, &s)) {
AMediaFormat_setString(mFileMetaData, AMEDIAFORMAT_KEY_DATE, s.string());
}
break;
}
case FOURCC("mehd"):
{
*offset += chunk_size;
if (chunk_data_size < 8) {
return ERROR_MALFORMED;
}
uint8_t flags[4];
if (mDataSource->readAt(
data_offset, flags, sizeof(flags))
< (ssize_t)sizeof(flags)) {
return ERROR_IO;
}
uint64_t duration = 0;
if (flags[0] == 1) {
// 64 bit
if (chunk_data_size < 12) {
return ERROR_MALFORMED;
}
mDataSource->getUInt64(data_offset + 4, &duration);
if (duration == 0xffffffffffffffff) {
duration = 0;
}
} else if (flags[0] == 0) {
// 32 bit
uint32_t d32;
mDataSource->getUInt32(data_offset + 4, &d32);
if (d32 == 0xffffffff) {
d32 = 0;
}
duration = d32;
} else {
return ERROR_MALFORMED;
}
if (duration != 0 && mHeaderTimescale != 0) {
AMediaFormat_setInt64(mFileMetaData,
AMEDIAFORMAT_KEY_DURATION, duration * 1000000 / mHeaderTimescale);
}
break;
}
case FOURCC("mdat"):
{
mMdatFound = true;
*offset += chunk_size;
break;
}
case FOURCC("hdlr"):
{
*offset += chunk_size;
if (underQTMetaPath(mPath, 3)) {
break;
}
uint32_t buffer;
if (mDataSource->readAt(
data_offset + 8, &buffer, 4) < 4) {
return ERROR_IO;
}
uint32_t type = ntohl(buffer);
// For the 3GPP file format, the handler-type within the 'hdlr' box
// shall be 'text'. We also want to support 'sbtl' handler type
// for a practical reason as various MPEG4 containers use it.
if (type == FOURCC("text") || type == FOURCC("sbtl")) {
if (mLastTrack != NULL) {
AMediaFormat_setString(mLastTrack->meta,
AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_TEXT_3GPP);
}
}
break;
}
case FOURCC("keys"):
{
*offset += chunk_size;
if (underQTMetaPath(mPath, 3)) {
status_t err = parseQTMetaKey(data_offset, chunk_data_size);
if (err != OK) {
return err;
}
}
break;
}
case FOURCC("trex"):
{
*offset += chunk_size;
if (chunk_data_size < 24) {
return ERROR_IO;
}
Trex trex;
if (!mDataSource->getUInt32(data_offset + 4, &trex.track_ID) ||
!mDataSource->getUInt32(data_offset + 8, &trex.default_sample_description_index) ||
!mDataSource->getUInt32(data_offset + 12, &trex.default_sample_duration) ||
!mDataSource->getUInt32(data_offset + 16, &trex.default_sample_size) ||
!mDataSource->getUInt32(data_offset + 20, &trex.default_sample_flags)) {
return ERROR_IO;
}
mTrex.add(trex);
break;
}
case FOURCC("tx3g"):
{
if (mLastTrack == NULL)
return ERROR_MALFORMED;
// complain about ridiculous chunks
if (chunk_size > kMaxAtomSize) {
return ERROR_MALFORMED;
}
// complain about empty atoms
if (chunk_data_size <= 0) {
ALOGE("b/124330204");
android_errorWriteLog(0x534e4554, "124330204");
return ERROR_MALFORMED;
}
// should fill buffer based on "data_offset" and "chunk_data_size"
// instead of *offset and chunk_size;
// but we've been feeding the extra data to consumers for multiple releases and
// if those apps are compensating for it, we'd break them with such a change
//
if (mLastTrack->mTx3gSize - mLastTrack->mTx3gFilled < chunk_size) {
size_t growth = kTx3gGrowth;
if (growth < chunk_size) {
growth = chunk_size;
}
// although this disallows 2 tx3g atoms of nearly kMaxAtomSize...
if ((uint64_t) mLastTrack->mTx3gSize + growth > kMaxAtomSize) {
ALOGE("b/124330204 - too much space");
android_errorWriteLog(0x534e4554, "124330204");
return ERROR_MALFORMED;
}
uint8_t *updated = (uint8_t *)realloc(mLastTrack->mTx3gBuffer,
mLastTrack->mTx3gSize + growth);
if (updated == NULL) {
return ERROR_MALFORMED;
}
mLastTrack->mTx3gBuffer = updated;
mLastTrack->mTx3gSize += growth;
}
if ((size_t)(mDataSource->readAt(*offset,
mLastTrack->mTx3gBuffer + mLastTrack->mTx3gFilled,
chunk_size))
< chunk_size) {
// advance read pointer so we don't end up reading this again
*offset += chunk_size;
return ERROR_IO;
}
mLastTrack->mTx3gFilled += chunk_size;
*offset += chunk_size;
break;
}
case FOURCC("covr"):
{
*offset += chunk_size;
ALOGV("chunk_data_size = %" PRId64 " and data_offset = %" PRId64,
chunk_data_size, data_offset);
if (chunk_data_size < 0 || static_cast<uint64_t>(chunk_data_size) >= SIZE_MAX - 1) {
return ERROR_MALFORMED;
}
auto buffer = heapbuffer<uint8_t>(chunk_data_size);
if (buffer.get() == NULL) {
ALOGE("b/28471206");
return NO_MEMORY;
}
if (mDataSource->readAt(
data_offset, buffer.get(), chunk_data_size) != (ssize_t)chunk_data_size) {
return ERROR_IO;
}
const int kSkipBytesOfDataBox = 16;
if (chunk_data_size <= kSkipBytesOfDataBox) {
return ERROR_MALFORMED;
}
AMediaFormat_setBuffer(mFileMetaData,
AMEDIAFORMAT_KEY_ALBUMART,
buffer.get() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
break;
}
case FOURCC("colr"):
{
*offset += chunk_size;
// this must be in a VisualSampleEntry box under the Sample Description Box ('stsd')
// ignore otherwise
if (depth >= 2 && mPath[depth - 2] == FOURCC("stsd")) {
status_t err = parseColorInfo(data_offset, chunk_data_size);
if (err != OK) {
return err;
}
}
break;
}
case FOURCC("titl"):
case FOURCC("perf"):
case FOURCC("auth"):
case FOURCC("gnre"):
case FOURCC("albm"):
case FOURCC("yrrc"):
{
*offset += chunk_size;
status_t err = parse3GPPMetaData(data_offset, chunk_data_size, depth);
if (err != OK) {
return err;
}
break;
}
case FOURCC("ID32"):
{
*offset += chunk_size;
if (chunk_data_size < 6) {
return ERROR_MALFORMED;
}
parseID3v2MetaData(data_offset + 6);
break;
}
case FOURCC("----"):
{
mLastCommentMean.clear();
mLastCommentName.clear();
mLastCommentData.clear();
*offset += chunk_size;
break;
}
case FOURCC("sidx"):
{
status_t err = parseSegmentIndex(data_offset, chunk_data_size);
if (err != OK) {
return err;
}
*offset += chunk_size;
return UNKNOWN_ERROR; // stop parsing after sidx
}
case FOURCC("ac-3"):
{
*offset += chunk_size;
// bypass ac-3 if parse fail
if (parseAC3SpecificBox(data_offset) != OK) {
if (mLastTrack != NULL) {
ALOGW("Fail to parse ac-3");
mLastTrack->skipTrack = true;
}
}
return OK;
}
case FOURCC("ec-3"):
{
*offset += chunk_size;
// bypass ec-3 if parse fail
if (parseEAC3SpecificBox(data_offset) != OK) {
if (mLastTrack != NULL) {
ALOGW("Fail to parse ec-3");
mLastTrack->skipTrack = true;
}
}
return OK;
}
case FOURCC("ac-4"):
{
*offset += chunk_size;
// bypass ac-4 if parse fail
if (parseAC4SpecificBox(data_offset) != OK) {
if (mLastTrack != NULL) {
ALOGW("Fail to parse ac-4");
mLastTrack->skipTrack = true;
}
}
return OK;
}
case FOURCC("ftyp"):
{
if (chunk_data_size < 8 || depth != 0) {
return ERROR_MALFORMED;
}
off64_t stop_offset = *offset + chunk_size;
uint32_t numCompatibleBrands = (chunk_data_size - 8) / 4;
std::set<uint32_t> brandSet;
for (size_t i = 0; i < numCompatibleBrands + 2; ++i) {
if (i == 1) {
// Skip this index, it refers to the minorVersion,
// not a brand.
continue;
}
uint32_t brand;
if (mDataSource->readAt(data_offset + 4 * i, &brand, 4) < 4) {
return ERROR_MALFORMED;
}
brand = ntohl(brand);
brandSet.insert(brand);
}
if (brandSet.count(FOURCC("qt ")) > 0) {
mIsQT = true;
} else {
if (brandSet.count(FOURCC("mif1")) > 0
&& brandSet.count(FOURCC("heic")) > 0) {
ALOGV("identified HEIF image");
mIsHeif = true;
brandSet.erase(FOURCC("mif1"));
brandSet.erase(FOURCC("heic"));
}
if (!brandSet.empty()) {
// This means that the file should have moov box.
// It could be any iso files (mp4, heifs, etc.)
mHasMoovBox = true;
if (mIsHeif) {
ALOGV("identified HEIF image with other tracks");
}
}
}
*offset = stop_offset;
break;
}
default:
{
// check if we're parsing 'ilst' for meta keys
// if so, treat type as a number (key-id).
if (underQTMetaPath(mPath, 3)) {
status_t err = parseQTMetaVal(chunk_type, data_offset, chunk_data_size);
if (err != OK) {
return err;
}
}
*offset += chunk_size;
break;
}
}
return OK;
}
status_t MPEG4Extractor::parseChannelCountSampleRate(
off64_t *offset, uint16_t *channelCount, uint16_t *sampleRate) {
// skip 16 bytes:
// + 6-byte reserved,
// + 2-byte data reference index,
// + 8-byte reserved
*offset += 16;
if (!mDataSource->getUInt16(*offset, channelCount)) {
ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read channel count");
return ERROR_MALFORMED;
}
// skip 8 bytes:
// + 2-byte channelCount,
// + 2-byte sample size,
// + 4-byte reserved
*offset += 8;
if (!mDataSource->getUInt16(*offset, sampleRate)) {
ALOGE("MPEG4Extractor: error while reading sample entry box: cannot read sample rate");
return ERROR_MALFORMED;
}
// skip 4 bytes:
// + 2-byte sampleRate,
// + 2-byte reserved
*offset += 4;
return OK;
}
status_t MPEG4Extractor::parseAC4SpecificBox(off64_t offset) {
if (mLastTrack == NULL) {
return ERROR_MALFORMED;
}
uint16_t sampleRate, channelCount;
status_t status;
if ((status = parseChannelCountSampleRate(&offset, &channelCount, &sampleRate)) != OK) {
return status;
}
uint32_t size;
// + 4-byte size
// + 4-byte type
// + 3-byte payload
const uint32_t kAC4MinimumBoxSize = 4 + 4 + 3;
if (!mDataSource->getUInt32(offset, &size) || size < kAC4MinimumBoxSize) {
ALOGE("MPEG4Extractor: error while reading ac-4 block: cannot read specific box size");