samples/RhythmGame/src/main/cpp/audio/FFMpegExtractor.cpp - platform/external/oboe - Git at Google

 /*
  * Copyright 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include <memory>
 #include <oboe/Definitions.h>
 #include "FFMpegExtractor.h"
 #include "utils/logging.h"

 constexpr int kInternalBufferSize = 1152; // Use MP3 block size. https://wiki.hydrogenaud.io/index.php?title=MP3

 int read(void *opaque, uint8_t *buf, int buf_size) {

     auto asset = (AAsset *) opaque;
     int bytesRead = AAsset_read(asset, buf, (size_t)buf_size);
     return bytesRead;
 }

 int64_t seek(void *opaque, int64_t offset, int whence){

     auto asset = (AAsset*)opaque;

     // See https://www.ffmpeg.org/doxygen/3.0/avio_8h.html#a427ff2a881637b47ee7d7f9e368be63f
     if (whence == AVSEEK_SIZE) return AAsset_getLength(asset);
     if (AAsset_seek(asset, offset, whence) == -1){
         return -1;
     } else {
         return 0;
     }
 }

 bool FFMpegExtractor::createAVIOContext(AAsset *asset, uint8_t *buffer, uint32_t bufferSize,
                                         AVIOContext **avioContext) {

     constexpr int isBufferWriteable = 0;

     *avioContext = avio_alloc_context(
             buffer, // internal buffer for FFmpeg to use
             bufferSize, // For optimal decoding speed this should be the protocol block size
             isBufferWriteable,
             asset, // Will be passed to our callback functions as a (void *)
             read, // Read callback function
             nullptr, // Write callback function (not used)
             seek); // Seek callback function

     if (*avioContext == nullptr){
         LOGE("Failed to create AVIO context");
         return false;
     } else {
         return true;
     }
 }

 bool
 FFMpegExtractor::createAVFormatContext(AVIOContext *avioContext, AVFormatContext **avFormatContext) {

     *avFormatContext = avformat_alloc_context();
     (*avFormatContext)->pb = avioContext;

     if (*avFormatContext == nullptr){
         LOGE("Failed to create AVFormatContext");
         return false;
     } else {
         return true;
     }
 }

 bool FFMpegExtractor::openAVFormatContext(AVFormatContext *avFormatContext) {

     int result = avformat_open_input(&avFormatContext,
                                      "", /* URL is left empty because we're providing our own I/O */
                                      nullptr /* AVInputFormat *fmt */,
                                      nullptr /* AVDictionary **options */
     );

     if (result == 0) {
         return true;
     } else {
         LOGE("Failed to open file. Error code %s", av_err2str(result));
         return false;
     }
 }

 bool FFMpegExtractor::getStreamInfo(AVFormatContext *avFormatContext) {

     int result = avformat_find_stream_info(avFormatContext, nullptr);
     if (result == 0 ){
         return true;
     } else {
         LOGE("Failed to find stream info. Error code %s", av_err2str(result));
         return false;
     }
 }

 AVStream *FFMpegExtractor::getBestAudioStream(AVFormatContext *avFormatContext) {

     int streamIndex = av_find_best_stream(avFormatContext, AVMEDIA_TYPE_AUDIO, -1, -1, nullptr, 0);

     if (streamIndex < 0){
         LOGE("Could not find stream");
         return nullptr;
     } else {
         return avFormatContext->streams[streamIndex];
     }
 }

 int64_t FFMpegExtractor::decode(
         AAsset *asset,
         uint8_t *targetData,
         AudioProperties targetProperties) {

     int returnValue = -1; // -1 indicates error

     // Create a buffer for FFmpeg to use for decoding (freed in the custom deleter below)
     auto buffer = reinterpret_cast<uint8_t*>(av_malloc(kInternalBufferSize));

     // Create an AVIOContext with a custom deleter
     std::unique_ptr<AVIOContext, void(*)(AVIOContext *)> ioContext {
             nullptr,
             [](AVIOContext *c) {
                 av_free(c->buffer);
                 avio_context_free(&c);
             }
     };
     {
         AVIOContext *tmp = nullptr;
         if (!createAVIOContext(asset, buffer, kInternalBufferSize, &tmp)){
             LOGE("Could not create an AVIOContext");
             return returnValue;
         }
         ioContext.reset(tmp);
     }

     // Create an AVFormatContext using the avformat_free_context as the deleter function
     std::unique_ptr<AVFormatContext, decltype(&avformat_free_context)> formatContext {
             nullptr,
             &avformat_free_context
     };
     {
         AVFormatContext *tmp;
         if (!createAVFormatContext(ioContext.get(), &tmp)) return returnValue;
         formatContext.reset(tmp);
     }

     if (!openAVFormatContext(formatContext.get())) return returnValue;

     if (!getStreamInfo(formatContext.get())) return returnValue;

     // Obtain the best audio stream to decode
     AVStream *stream = getBestAudioStream(formatContext.get());
     if (stream == nullptr || stream->codecpar == nullptr){
         LOGE("Could not find a suitable audio stream to decode");
         return returnValue;
     }

     printCodecParameters(stream->codecpar);

     // Find the codec to decode this stream
     AVCodec *codec = avcodec_find_decoder(stream->codecpar->codec_id);
     if (!codec){
         LOGE("Could not find codec with ID: %d", stream->codecpar->codec_id);
         return returnValue;
     }

     // Create the codec context, specifying the deleter function
     std::unique_ptr<AVCodecContext, void(*)(AVCodecContext *)> codecContext {
             nullptr,
             [](AVCodecContext *c) { avcodec_free_context(&c); }
     };
     {
         AVCodecContext *tmp = avcodec_alloc_context3(codec);
         if (!tmp){
             LOGE("Failed to allocate codec context");
             return returnValue;
         }
         codecContext.reset(tmp);
     }

     // Copy the codec parameters into the context
     if (avcodec_parameters_to_context(codecContext.get(), stream->codecpar) < 0){
         LOGE("Failed to copy codec parameters to codec context");
         return returnValue;
     }

     // Open the codec
     if (avcodec_open2(codecContext.get(), codec, nullptr) < 0){
         LOGE("Could not open codec");
         return returnValue;
     }

     // prepare resampler
     int32_t outChannelLayout = (1 << targetProperties.channelCount) - 1;
     LOGD("Channel layout %d", outChannelLayout);

     SwrContext *swr = swr_alloc();
     av_opt_set_int(swr, "in_channel_count", stream->codecpar->channels, 0);
     av_opt_set_int(swr, "out_channel_count", targetProperties.channelCount, 0);
     av_opt_set_int(swr, "in_channel_layout", stream->codecpar->channel_layout, 0);
     av_opt_set_int(swr, "out_channel_layout", outChannelLayout, 0);
     av_opt_set_int(swr, "in_sample_rate", stream->codecpar->sample_rate, 0);
     av_opt_set_int(swr, "out_sample_rate", targetProperties.sampleRate, 0);
     av_opt_set_int(swr, "in_sample_fmt", stream->codecpar->format, 0);
     av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_FLT, 0);
     av_opt_set_int(swr, "force_resampling", 1, 0);

     // Check that resampler has been inited
     int result = swr_init(swr);
     if (result != 0){
         LOGE("swr_init failed. Error: %s", av_err2str(result));
         return returnValue;
     };
     if (!swr_is_initialized(swr)) {
         LOGE("swr_is_initialized is false\n");
         return returnValue;
     }

     // Prepare to read data
     int bytesWritten = 0;
     AVPacket avPacket; // Stores compressed audio data
     av_init_packet(&avPacket);
     AVFrame *decodedFrame = av_frame_alloc(); // Stores raw audio data
     int bytesPerSample = av_get_bytes_per_sample((AVSampleFormat)stream->codecpar->format);

     LOGD("Bytes per sample %d", bytesPerSample);

     LOGD("DECODE START");

     // While there is more data to read, read it into the avPacket
     while (av_read_frame(formatContext.get(), &avPacket) == 0){

         if (avPacket.stream_index == stream->index && avPacket.size > 0) {

             // Pass our compressed data into the codec
             result = avcodec_send_packet(codecContext.get(), &avPacket);
             if (result != 0) {
                 LOGE("avcodec_send_packet error: %s", av_err2str(result));
                 goto cleanup;
             }

             // Retrieve our raw data from the codec
             result = avcodec_receive_frame(codecContext.get(), decodedFrame);
             if (result == AVERROR(EAGAIN)) {
                 // The codec needs more data before it can decode
                 LOGI("avcodec_receive_frame returned EAGAIN");
                 avPacket.size = 0;
                 avPacket.data = nullptr;
                 continue;
             } else if (result != 0) {
                 LOGE("avcodec_receive_frame error: %s", av_err2str(result));
                 goto cleanup;
             }

             // DO RESAMPLING
             auto dst_nb_samples = (int32_t) av_rescale_rnd(
                     swr_get_delay(swr, decodedFrame->sample_rate) + decodedFrame->nb_samples,
                     targetProperties.sampleRate,
                     decodedFrame->sample_rate,
                     AV_ROUND_UP);

             short *buffer1;
             av_samples_alloc(
                     (uint8_t **) &buffer1,
                     nullptr,
                     targetProperties.channelCount,
                     dst_nb_samples,
                     AV_SAMPLE_FMT_FLT,
                     0);
             int frame_count = swr_convert(
                     swr,
                     (uint8_t **) &buffer1,
                     dst_nb_samples,
                     (const uint8_t **) decodedFrame->data,
                     decodedFrame->nb_samples);

             int64_t bytesToWrite = frame_count * sizeof(float) * targetProperties.channelCount;
             memcpy(targetData + bytesWritten, buffer1, (size_t)bytesToWrite);
             bytesWritten += bytesToWrite;
             av_freep(&buffer1);

             avPacket.size = 0;
             avPacket.data = nullptr;
         }
     }

     av_frame_free(&decodedFrame);
     LOGD("DECODE END");

     returnValue = bytesWritten;

     cleanup:
     return returnValue;
 }

 void FFMpegExtractor::printCodecParameters(AVCodecParameters *params) {

     LOGD("Stream properties");
     LOGD("Channels: %d", params->channels);
     LOGD("Channel layout: %" PRId64, params->channel_layout);
     LOGD("Sample rate: %d", params->sample_rate);
     LOGD("Format: %s", av_get_sample_fmt_name((AVSampleFormat)params->format));
     LOGD("Frame size: %d", params->frame_size);
 }
	/*
	* Copyright 2018 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include <memory>
	#include <oboe/Definitions.h>
	#include "FFMpegExtractor.h"
	#include "utils/logging.h"

	constexpr int kInternalBufferSize = 1152; // Use MP3 block size. https://wiki.hydrogenaud.io/index.php?title=MP3

	int read(void opaque, uint8_t buf, int buf_size) {

	auto asset = (AAsset *) opaque;
	int bytesRead = AAsset_read(asset, buf, (size_t)buf_size);
	return bytesRead;
	}

	int64_t seek(void *opaque, int64_t offset, int whence){

	auto asset = (AAsset*)opaque;

	// See https://www.ffmpeg.org/doxygen/3.0/avio_8h.html#a427ff2a881637b47ee7d7f9e368be63f
	if (whence == AVSEEK_SIZE) return AAsset_getLength(asset);
	if (AAsset_seek(asset, offset, whence) == -1){
	return -1;
	} else {
	return 0;
	}
	}

	bool FFMpegExtractor::createAVIOContext(AAsset asset, uint8_t buffer, uint32_t bufferSize,
	AVIOContext **avioContext) {

	constexpr int isBufferWriteable = 0;

	*avioContext = avio_alloc_context(
	buffer, // internal buffer for FFmpeg to use
	bufferSize, // For optimal decoding speed this should be the protocol block size
	isBufferWriteable,
	asset, // Will be passed to our callback functions as a (void *)
	read, // Read callback function
	nullptr, // Write callback function (not used)
	seek); // Seek callback function

	if (*avioContext == nullptr){
	LOGE("Failed to create AVIO context");
	return false;
	} else {
	return true;
	}
	}

	bool
	FFMpegExtractor::createAVFormatContext(AVIOContext avioContext, AVFormatContext *avFormatContext) {

	*avFormatContext = avformat_alloc_context();
	(*avFormatContext)->pb = avioContext;

	if (*avFormatContext == nullptr){
	LOGE("Failed to create AVFormatContext");
	return false;
	} else {
	return true;
	}
	}

	bool FFMpegExtractor::openAVFormatContext(AVFormatContext *avFormatContext) {

	int result = avformat_open_input(&avFormatContext,
	"", /* URL is left empty because we're providing our own I/O */
	nullptr /* AVInputFormat fmt /,
	nullptr /* AVDictionary *options /
	);

	if (result == 0) {
	return true;
	} else {
	LOGE("Failed to open file. Error code %s", av_err2str(result));
	return false;
	}
	}

	bool FFMpegExtractor::getStreamInfo(AVFormatContext *avFormatContext) {

	int result = avformat_find_stream_info(avFormatContext, nullptr);
	if (result == 0 ){
	return true;
	} else {
	LOGE("Failed to find stream info. Error code %s", av_err2str(result));
	return false;
	}
	}

	AVStream FFMpegExtractor::getBestAudioStream(AVFormatContext avFormatContext) {

	int streamIndex = av_find_best_stream(avFormatContext, AVMEDIA_TYPE_AUDIO, -1, -1, nullptr, 0);

	if (streamIndex < 0){
	LOGE("Could not find stream");
	return nullptr;
	} else {
	return avFormatContext->streams[streamIndex];
	}
	}

	int64_t FFMpegExtractor::decode(
	AAsset *asset,
	uint8_t *targetData,
	AudioProperties targetProperties) {

	int returnValue = -1; // -1 indicates error

	// Create a buffer for FFmpeg to use for decoding (freed in the custom deleter below)
	auto buffer = reinterpret_cast<uint8_t*>(av_malloc(kInternalBufferSize));

	// Create an AVIOContext with a custom deleter
	std::unique_ptr<AVIOContext, void()(AVIOContext )> ioContext {
	nullptr,
	[](AVIOContext *c) {
	av_free(c->buffer);
	avio_context_free(&c);
	}
	};
	{
	AVIOContext *tmp = nullptr;
	if (!createAVIOContext(asset, buffer, kInternalBufferSize, &tmp)){
	LOGE("Could not create an AVIOContext");
	return returnValue;
	}
	ioContext.reset(tmp);
	}

	// Create an AVFormatContext using the avformat_free_context as the deleter function
	std::unique_ptr<AVFormatContext, decltype(&avformat_free_context)> formatContext {
	nullptr,
	&avformat_free_context
	};
	{
	AVFormatContext *tmp;
	if (!createAVFormatContext(ioContext.get(), &tmp)) return returnValue;
	formatContext.reset(tmp);
	}

	if (!openAVFormatContext(formatContext.get())) return returnValue;

	if (!getStreamInfo(formatContext.get())) return returnValue;

	// Obtain the best audio stream to decode
	AVStream *stream = getBestAudioStream(formatContext.get());
	if (stream == nullptr \|\| stream->codecpar == nullptr){
	LOGE("Could not find a suitable audio stream to decode");
	return returnValue;
	}

	printCodecParameters(stream->codecpar);

	// Find the codec to decode this stream
	AVCodec *codec = avcodec_find_decoder(stream->codecpar->codec_id);
	if (!codec){
	LOGE("Could not find codec with ID: %d", stream->codecpar->codec_id);
	return returnValue;
	}

	// Create the codec context, specifying the deleter function
	std::unique_ptr<AVCodecContext, void()(AVCodecContext )> codecContext {
	nullptr,
	[](AVCodecContext *c) { avcodec_free_context(&c); }
	};
	{
	AVCodecContext *tmp = avcodec_alloc_context3(codec);
	if (!tmp){
	LOGE("Failed to allocate codec context");
	return returnValue;
	}
	codecContext.reset(tmp);
	}

	// Copy the codec parameters into the context
	if (avcodec_parameters_to_context(codecContext.get(), stream->codecpar) < 0){
	LOGE("Failed to copy codec parameters to codec context");
	return returnValue;
	}

	// Open the codec
	if (avcodec_open2(codecContext.get(), codec, nullptr) < 0){
	LOGE("Could not open codec");
	return returnValue;
	}

	// prepare resampler
	int32_t outChannelLayout = (1 << targetProperties.channelCount) - 1;
	LOGD("Channel layout %d", outChannelLayout);

	SwrContext *swr = swr_alloc();
	av_opt_set_int(swr, "in_channel_count", stream->codecpar->channels, 0);
	av_opt_set_int(swr, "out_channel_count", targetProperties.channelCount, 0);
	av_opt_set_int(swr, "in_channel_layout", stream->codecpar->channel_layout, 0);
	av_opt_set_int(swr, "out_channel_layout", outChannelLayout, 0);
	av_opt_set_int(swr, "in_sample_rate", stream->codecpar->sample_rate, 0);
	av_opt_set_int(swr, "out_sample_rate", targetProperties.sampleRate, 0);
	av_opt_set_int(swr, "in_sample_fmt", stream->codecpar->format, 0);
	av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_FLT, 0);
	av_opt_set_int(swr, "force_resampling", 1, 0);

	// Check that resampler has been inited
	int result = swr_init(swr);
	if (result != 0){
	LOGE("swr_init failed. Error: %s", av_err2str(result));
	return returnValue;
	};
	if (!swr_is_initialized(swr)) {
	LOGE("swr_is_initialized is false\n");
	return returnValue;
	}

	// Prepare to read data
	int bytesWritten = 0;
	AVPacket avPacket; // Stores compressed audio data
	av_init_packet(&avPacket);
	AVFrame *decodedFrame = av_frame_alloc(); // Stores raw audio data
	int bytesPerSample = av_get_bytes_per_sample((AVSampleFormat)stream->codecpar->format);

	LOGD("Bytes per sample %d", bytesPerSample);

	LOGD("DECODE START");

	// While there is more data to read, read it into the avPacket
	while (av_read_frame(formatContext.get(), &avPacket) == 0){

	if (avPacket.stream_index == stream->index && avPacket.size > 0) {

	// Pass our compressed data into the codec
	result = avcodec_send_packet(codecContext.get(), &avPacket);
	if (result != 0) {
	LOGE("avcodec_send_packet error: %s", av_err2str(result));
	goto cleanup;
	}

	// Retrieve our raw data from the codec
	result = avcodec_receive_frame(codecContext.get(), decodedFrame);
	if (result == AVERROR(EAGAIN)) {
	// The codec needs more data before it can decode
	LOGI("avcodec_receive_frame returned EAGAIN");
	avPacket.size = 0;
	avPacket.data = nullptr;
	continue;
	} else if (result != 0) {
	LOGE("avcodec_receive_frame error: %s", av_err2str(result));
	goto cleanup;
	}

	// DO RESAMPLING
	auto dst_nb_samples = (int32_t) av_rescale_rnd(
	swr_get_delay(swr, decodedFrame->sample_rate) + decodedFrame->nb_samples,
	targetProperties.sampleRate,
	decodedFrame->sample_rate,
	AV_ROUND_UP);

	short *buffer1;
	av_samples_alloc(
	(uint8_t **) &buffer1,
	nullptr,
	targetProperties.channelCount,
	dst_nb_samples,
	AV_SAMPLE_FMT_FLT,
	0);
	int frame_count = swr_convert(
	swr,
	(uint8_t **) &buffer1,
	dst_nb_samples,
	(const uint8_t **) decodedFrame->data,
	decodedFrame->nb_samples);

	int64_t bytesToWrite = frame_count * sizeof(float) * targetProperties.channelCount;
	memcpy(targetData + bytesWritten, buffer1, (size_t)bytesToWrite);
	bytesWritten += bytesToWrite;
	av_freep(&buffer1);

	avPacket.size = 0;
	avPacket.data = nullptr;
	}
	}

	av_frame_free(&decodedFrame);
	LOGD("DECODE END");

	returnValue = bytesWritten;

	cleanup:
	return returnValue;
	}

	void FFMpegExtractor::printCodecParameters(AVCodecParameters *params) {

	LOGD("Stream properties");
	LOGD("Channels: %d", params->channels);
	LOGD("Channel layout: %" PRId64, params->channel_layout);
	LOGD("Sample rate: %d", params->sample_rate);
	LOGD("Format: %s", av_get_sample_fmt_name((AVSampleFormat)params->format));
	LOGD("Frame size: %d", params->frame_size);
	}