| #include <assert.h> |
| #include <caffe2/core/logging.h> |
| #include <caffe2/video/video_decoder.h> |
| #include <array> |
| #include <mutex> |
| #include <random> |
| |
| namespace caffe2 { |
| |
| VideoDecoder::VideoDecoder() { |
| static bool gInitialized = false; |
| static std::mutex gMutex; |
| std::unique_lock<std::mutex> lock(gMutex); |
| if (!gInitialized) { |
| av_register_all(); |
| avcodec_register_all(); |
| avformat_network_init(); |
| gInitialized = true; |
| } |
| } |
| |
| void VideoDecoder::getAudioSample( |
| AVPacket& packet, |
| AVCodecContext* audioCodecContext_, |
| AVFrame* audioStreamFrame_, |
| SwrContext* convertCtx_, |
| Callback& callback, |
| const Params& params) { |
| int frame_finished = 0; |
| auto result = avcodec_decode_audio4( |
| audioCodecContext_, audioStreamFrame_, &frame_finished, &packet); |
| |
| if (frame_finished) { |
| // from |
| // https://www.ffmpeg.org/doxygen/2.3/decoding_encoding_8c-example.html#a57 |
| auto c = audioCodecContext_; |
| int data_size = av_samples_get_buffer_size( |
| nullptr, c->channels, audioStreamFrame_->nb_samples, c->sample_fmt, 1); |
| if (data_size < 0) { |
| // This should not occur, checking just for paranoia |
| LOG(ERROR) << "Failed to calculate data size"; |
| } |
| |
| // from https://www.ffmpeg.org/doxygen/2.1/group__lswr.html#details |
| uint8_t* output; |
| auto swr = convertCtx_; |
| auto inrate = audioCodecContext_->sample_rate; |
| auto in_samples = audioStreamFrame_->nb_samples; |
| |
| int out_samples = av_rescale_rnd( |
| swr_get_delay(swr, inrate) + in_samples, |
| params.outrate_, |
| inrate, |
| AV_ROUND_UP); |
| |
| if (out_samples > 0) { |
| auto input = (const uint8_t**)&audioStreamFrame_->data[0]; |
| av_samples_alloc( |
| &output, |
| nullptr, |
| c->channels, |
| out_samples, |
| (AVSampleFormat)params.outfmt_, |
| 0); |
| |
| // resample the audio data |
| out_samples = swr_convert(swr, &output, out_samples, input, in_samples); |
| auto sample_size = out_samples * c->channels * sizeof(float); |
| auto buffer = std::make_unique<float[]>(sample_size); |
| memcpy(buffer.get(), output, sample_size); |
| av_freep(&output); |
| |
| unique_ptr<DecodedAudio> audio_sample = make_unique<DecodedAudio>(); |
| audio_sample->dataSize_ = data_size; |
| audio_sample->outSampleSize_ = out_samples * c->channels; |
| audio_sample->audio_data_ = std::move(buffer); |
| callback.audioDecoded(std::move(audio_sample)); |
| } |
| } else { |
| result = packet.size; |
| } |
| packet.size -= result; |
| packet.data += result; |
| } |
| |
| void VideoDecoder::ResizeAndKeepAspectRatio( |
| const int origWidth, |
| const int origHeight, |
| const int short_edge, |
| const int long_edge, |
| int& outWidth, |
| int& outHeight) { |
| if (origWidth < origHeight) { |
| // dominant height |
| if (short_edge > 0) { |
| // use short_edge for rescale |
| float ratio = short_edge / float(origWidth); |
| outWidth = short_edge; |
| outHeight = (int)round(ratio * origHeight); |
| } else { |
| // use long_edge for rescale |
| float ratio = long_edge / float(origHeight); |
| outHeight = long_edge; |
| outWidth = (int)round(ratio * origWidth); |
| } |
| } else { |
| // dominant width |
| if (short_edge > 0) { |
| // use short_edge for rescale |
| float ratio = short_edge / float(origHeight); |
| outHeight = short_edge; |
| outWidth = (int)round(ratio * origWidth); |
| } else { |
| // use long_edge for rescale |
| float ratio = long_edge / float(origWidth); |
| outWidth = long_edge; |
| outHeight = (int)round(ratio * origHeight); |
| } |
| } |
| } |
| |
| void VideoDecoder::decodeLoop( |
| const string& videoName, |
| VideoIOContext& ioctx, |
| const Params& params, |
| const int start_frm, |
| Callback& callback) { |
| AVPixelFormat pixFormat = params.pixelFormat_; |
| AVFormatContext* inputContext = avformat_alloc_context(); |
| AVStream* videoStream_ = nullptr; |
| AVCodecContext* videoCodecContext_ = nullptr; |
| AVCodecContext* audioCodecContext_ = nullptr; |
| AVFrame* videoStreamFrame_ = nullptr; |
| AVFrame* audioStreamFrame_ = nullptr; |
| SwrContext* convertCtx_ = nullptr; |
| AVPacket packet; |
| av_init_packet(&packet); // init packet |
| SwsContext* scaleContext_ = nullptr; |
| |
| try { |
| inputContext->pb = ioctx.get_avio(); |
| inputContext->flags |= AVFMT_FLAG_CUSTOM_IO; |
| int ret = 0; |
| |
| // Determining the input format: |
| int probeSz = 1 * 1024 + AVPROBE_PADDING_SIZE; |
| DecodedFrame::AvDataPtr probe((uint8_t*)av_malloc(probeSz)); |
| memset(probe.get(), 0, probeSz); |
| int len = ioctx.read(probe.get(), probeSz - AVPROBE_PADDING_SIZE); |
| if (len < probeSz - AVPROBE_PADDING_SIZE) { |
| LOG(ERROR) << "Insufficient data to determine video format"; |
| return; |
| } |
| // seek back to start of stream |
| ioctx.seek(0, SEEK_SET); |
| |
| unique_ptr<AVProbeData> probeData(new AVProbeData()); |
| probeData->buf = probe.get(); |
| probeData->buf_size = len; |
| probeData->filename = ""; |
| // Determine the input-format: |
| inputContext->iformat = av_probe_input_format(probeData.get(), 1); |
| // this is to avoid the double-free error |
| if (inputContext->iformat == nullptr) { |
| LOG(ERROR) << "inputContext iformat is nullptr!"; |
| return; |
| } |
| |
| ret = avformat_open_input(&inputContext, "", nullptr, nullptr); |
| if (ret < 0) { |
| LOG(ERROR) << "Unable to open stream : " << ffmpegErrorStr(ret); |
| return; |
| } |
| |
| ret = avformat_find_stream_info(inputContext, nullptr); |
| if (ret < 0) { |
| LOG(ERROR) << "Unable to find stream info in " << videoName << " " |
| << ffmpegErrorStr(ret); |
| return; |
| } |
| |
| // Decode the first video stream |
| int videoStreamIndex_ = params.streamIndex_; |
| int audioStreamIndex_ = params.streamIndex_; |
| if (params.streamIndex_ == -1) { |
| for (int i = 0; i < inputContext->nb_streams; i++) { |
| auto stream = inputContext->streams[i]; |
| if (stream->codec->codec_type == AVMEDIA_TYPE_VIDEO && |
| videoStreamIndex_ == -1) { |
| videoStreamIndex_ = i; |
| videoStream_ = stream; |
| } else if ( |
| stream->codec->codec_type == AVMEDIA_TYPE_AUDIO && |
| audioStreamIndex_ == -1) { |
| audioStreamIndex_ = i; |
| } |
| if (videoStreamIndex_ != -1 && audioStreamIndex_ != -1) { |
| break; |
| } |
| } |
| } |
| if (videoStream_ == nullptr) { |
| LOG(ERROR) << "Unable to find video stream in " << videoName << " " |
| << ffmpegErrorStr(ret); |
| return; |
| } |
| |
| // Initialize codec |
| AVDictionary* opts = nullptr; |
| videoCodecContext_ = videoStream_->codec; |
| try { |
| ret = avcodec_open2( |
| videoCodecContext_, |
| avcodec_find_decoder(videoCodecContext_->codec_id), |
| &opts); |
| } catch (const std::exception&) { |
| LOG(ERROR) << "Exception during open video codec"; |
| return; |
| } |
| |
| if (ret < 0) { |
| LOG(ERROR) << "Cannot open video codec : " |
| << videoCodecContext_->codec->name; |
| return; |
| } |
| |
| if (params.getAudio_ && audioStreamIndex_ >= 0) { |
| // see e.g. ridge/decoder/StreamDecoder.cpp |
| audioCodecContext_ = inputContext->streams[audioStreamIndex_]->codec; |
| ret = avcodec_open2( |
| audioCodecContext_, |
| avcodec_find_decoder(audioCodecContext_->codec_id), |
| nullptr); |
| |
| if (ret < 0) { |
| LOG(ERROR) << "Cannot open audio codec : " |
| << audioCodecContext_->codec->name; |
| return; |
| } |
| |
| convertCtx_ = swr_alloc_set_opts( |
| nullptr, |
| params.outlayout_, |
| (AVSampleFormat)params.outfmt_, |
| params.outrate_, |
| audioCodecContext_->channel_layout, |
| audioCodecContext_->sample_fmt, |
| audioCodecContext_->sample_rate, |
| 0, |
| nullptr); |
| |
| if (convertCtx_ == nullptr) { |
| LOG(ERROR) << "Cannot setup sample format converter."; |
| return; |
| } |
| if (swr_init(convertCtx_) < 0) { |
| LOG(ERROR) << "Cannot init sample format converter."; |
| return; |
| } |
| } |
| |
| // Calculate if we need to rescale the frames |
| const int origWidth = videoCodecContext_->width; |
| const int origHeight = videoCodecContext_->height; |
| int outWidth = origWidth; |
| int outHeight = origHeight; |
| |
| if (params.video_res_type_ == VideoResType::ORIGINAL_RES) { |
| // if the original resolution is too low, |
| // make it at least the same size as crop_size_ |
| if (params.crop_size_ > origWidth || params.crop_size_ > origHeight) { |
| ResizeAndKeepAspectRatio( |
| origWidth, origHeight, params.crop_size_, -1, outWidth, outHeight); |
| } |
| } else if (params.video_res_type_ == VideoResType::USE_SHORT_EDGE) { |
| // resize the image to the predefined |
| // short_edge_ resolution while keep the aspect ratio |
| ResizeAndKeepAspectRatio( |
| origWidth, origHeight, params.short_edge_, -1, outWidth, outHeight); |
| } else if (params.video_res_type_ == VideoResType::USE_WIDTH_HEIGHT) { |
| // resize the image to the predefined |
| // resolution and ignore the aspect ratio |
| outWidth = params.outputWidth_; |
| outHeight = params.outputHeight_; |
| } else { |
| LOG(ERROR) << "Unknown VideoResType: " << params.video_res_type_; |
| return; |
| } |
| |
| // Make sure that we have a valid format |
| if (videoCodecContext_->pix_fmt == AV_PIX_FMT_NONE) { |
| LOG(ERROR) << "pixel format is not valid."; |
| return; |
| } |
| |
| // Create a scale context |
| scaleContext_ = sws_getContext( |
| videoCodecContext_->width, |
| videoCodecContext_->height, |
| videoCodecContext_->pix_fmt, |
| outWidth, |
| outHeight, |
| pixFormat, |
| SWS_FAST_BILINEAR, |
| nullptr, |
| nullptr, |
| nullptr); |
| |
| // Getting video meta data |
| VideoMeta videoMeta; |
| videoMeta.codec_type = videoCodecContext_->codec_type; |
| videoMeta.width = outWidth; |
| videoMeta.height = outHeight; |
| videoMeta.pixFormat = pixFormat; |
| |
| // avoid division by zero, code adapted from |
| // https://www.ffmpeg.org/doxygen/0.6/rational_8h-source.html |
| if (videoStream_->avg_frame_rate.num == 0 || |
| videoStream_->avg_frame_rate.den == 0) { |
| LOG(ERROR) << "Frame rate is wrong. No data found."; |
| return; |
| } |
| |
| videoMeta.fps = av_q2d(videoStream_->avg_frame_rate); |
| callback.videoDecodingStarted(videoMeta); |
| |
| if (params.intervals_.size() == 0) { |
| LOG(ERROR) << "Empty sampling intervals."; |
| return; |
| } |
| |
| std::vector<SampleInterval>::const_iterator itvlIter = |
| params.intervals_.begin(); |
| if (itvlIter->timestamp != 0) { |
| LOG(ERROR) << "Sampling interval starting timestamp is not zero."; |
| return; |
| } |
| |
| double currFps = itvlIter->fps; |
| if (currFps < 0 && currFps != SpecialFps::SAMPLE_ALL_FRAMES && |
| currFps != SpecialFps::SAMPLE_TIMESTAMP_ONLY) { |
| // fps must be 0, -1, -2 or > 0 |
| LOG(ERROR) << "Invalid sampling fps."; |
| return; |
| } |
| |
| double prevTimestamp = itvlIter->timestamp; |
| itvlIter++; |
| if (itvlIter != params.intervals_.end() && |
| prevTimestamp >= itvlIter->timestamp) { |
| LOG(ERROR) << "Sampling interval timestamps must be strictly ascending."; |
| return; |
| } |
| |
| double lastFrameTimestamp = -1.0; |
| double timestamp = -1.0; |
| |
| // Initialize frame and packet. |
| // These will be reused across calls. |
| videoStreamFrame_ = av_frame_alloc(); |
| audioStreamFrame_ = av_frame_alloc(); |
| |
| // frame index in video stream |
| int frameIndex = -1; |
| // frame index of outputed frames |
| int outputFrameIndex = -1; |
| |
| /* identify the starting point from where we must start decoding */ |
| std::mt19937 meta_randgen(time(nullptr)); |
| long int start_ts = -1; |
| bool mustDecodeAll = false; |
| |
| if (videoStream_->duration > 0 && videoStream_->nb_frames > 0) { |
| /* we have a valid duration and nb_frames. We can safely |
| * detect an intermediate timestamp to start decoding from. */ |
| |
| // leave a margin of 10 frames to take in to account the error |
| // from av_seek_frame |
| long int margin = |
| int(ceil((10 * videoStream_->duration) / (videoStream_->nb_frames))); |
| // if we need to do temporal jittering |
| if (params.decode_type_ == DecodeType::DO_TMP_JITTER) { |
| /* estimate the average duration for the required # of frames */ |
| double maxFramesDuration = |
| (videoStream_->duration * params.num_of_required_frame_) / |
| (videoStream_->nb_frames); |
| int ts1 = 0; |
| int ts2 = videoStream_->duration - int(ceil(maxFramesDuration)); |
| ts2 = ts2 > 0 ? ts2 : 0; |
| // pick a random timestamp between ts1 and ts2. ts2 is selected such |
| // that you have enough frames to satisfy the required # of frames. |
| start_ts = std::uniform_int_distribution<>(ts1, ts2)(meta_randgen); |
| // seek a frame at start_ts |
| ret = av_seek_frame( |
| inputContext, |
| videoStreamIndex_, |
| 0 > (start_ts - margin) ? 0 : (start_ts - margin), |
| AVSEEK_FLAG_BACKWARD); |
| |
| // if we need to decode from the start_frm |
| } else if (params.decode_type_ == DecodeType::USE_START_FRM) { |
| if (videoStream_ == nullptr) { |
| LOG(ERROR) << "Nullptr found at videoStream_"; |
| return; |
| } |
| start_ts = int(floor( |
| (videoStream_->duration * start_frm) / (videoStream_->nb_frames))); |
| // seek a frame at start_ts |
| ret = av_seek_frame( |
| inputContext, |
| videoStreamIndex_, |
| 0 > (start_ts - margin) ? 0 : (start_ts - margin), |
| AVSEEK_FLAG_BACKWARD); |
| } else { |
| mustDecodeAll = true; |
| } |
| |
| if (ret < 0) { |
| LOG(INFO) << "Unable to decode from a random start point"; |
| /* fall back to default decoding of all frames from start */ |
| av_seek_frame(inputContext, videoStreamIndex_, 0, AVSEEK_FLAG_BACKWARD); |
| mustDecodeAll = true; |
| } |
| } else { |
| mustDecodeAll = true; |
| } |
| |
| int gotPicture = 0; |
| int eof = 0; |
| int selectiveDecodedFrames = 0; |
| |
| int maxFrames = (params.decode_type_ == DecodeType::DO_UNIFORM_SMP) |
| ? MAX_DECODING_FRAMES |
| : params.num_of_required_frame_; |
| // There is a delay between reading packets from the |
| // transport and getting decoded frames back. |
| // Therefore, after EOF, continue going while |
| // the decoder is still giving us frames. |
| int ipacket = 0; |
| while ((!eof || gotPicture) && |
| /* either you must decode all frames or decode up to maxFrames |
| * based on status of the mustDecodeAll flag */ |
| (mustDecodeAll || (selectiveDecodedFrames < maxFrames)) && |
| /* If on the last interval and not autodecoding keyframes and a |
| * SpecialFps indicates no more frames are needed, stop decoding */ |
| !((itvlIter == params.intervals_.end() && |
| (currFps == SpecialFps::SAMPLE_TIMESTAMP_ONLY || |
| currFps == SpecialFps::SAMPLE_NO_FRAME)) && |
| !params.keyFrames_)) { |
| try { |
| if (!eof) { |
| ret = av_read_frame(inputContext, &packet); |
| if (ret == AVERROR_EOF) { |
| eof = 1; |
| av_free_packet(&packet); |
| packet.data = nullptr; |
| packet.size = 0; |
| // stay in the while loop to flush frames |
| } else if (ret == AVERROR(EAGAIN)) { |
| av_free_packet(&packet); |
| continue; |
| } else if (ret < 0) { |
| LOG(ERROR) << "Error reading packet : " << ffmpegErrorStr(ret); |
| return; |
| } |
| ipacket++; |
| |
| auto si = packet.stream_index; |
| if (params.getAudio_ && audioStreamIndex_ >= 0 && |
| si == audioStreamIndex_) { |
| // Audio packets can have multiple audio frames in a single packet |
| while (packet.size > 0) { |
| assert(audioCodecContext_ != nullptr); |
| assert(convertCtx_ != nullptr); |
| getAudioSample( |
| packet, |
| audioCodecContext_, |
| audioStreamFrame_, |
| convertCtx_, |
| callback, |
| params); |
| } |
| } |
| |
| if (si != videoStreamIndex_) { |
| av_free_packet(&packet); |
| continue; |
| } |
| } |
| |
| ret = avcodec_decode_video2( |
| videoCodecContext_, videoStreamFrame_, &gotPicture, &packet); |
| if (ret < 0) { |
| LOG(ERROR) << "Error decoding video frame : " << ffmpegErrorStr(ret); |
| return; |
| } |
| try { |
| // Nothing to do without a picture |
| if (!gotPicture) { |
| av_free_packet(&packet); |
| continue; |
| } |
| frameIndex++; |
| |
| long int frame_ts = |
| av_frame_get_best_effort_timestamp(videoStreamFrame_); |
| timestamp = frame_ts * av_q2d(videoStream_->time_base); |
| if ((frame_ts >= start_ts && !mustDecodeAll) || mustDecodeAll) { |
| /* process current frame if: |
| * 1) We are not doing selective decoding and mustDecodeAll |
| * OR |
| * 2) We are doing selective decoding and current frame |
| * timestamp is >= start_ts from where we start selective |
| * decoding*/ |
| // if reaching the next interval, update the current fps |
| // and reset lastFrameTimestamp so the current frame could be |
| // sampled (unless fps == SpecialFps::SAMPLE_NO_FRAME) |
| if (itvlIter != params.intervals_.end() && |
| timestamp >= itvlIter->timestamp) { |
| lastFrameTimestamp = -1.0; |
| currFps = itvlIter->fps; |
| prevTimestamp = itvlIter->timestamp; |
| itvlIter++; |
| if (itvlIter != params.intervals_.end() && |
| prevTimestamp >= itvlIter->timestamp) { |
| LOG(ERROR) |
| << "Sampling interval timestamps must be strictly ascending."; |
| return; |
| } |
| } |
| |
| // keyFrame will bypass all checks on fps sampling settings |
| bool keyFrame = params.keyFrames_ && videoStreamFrame_->key_frame; |
| if (!keyFrame) { |
| // if fps == SpecialFps::SAMPLE_NO_FRAME (0), don't sample at all |
| if (currFps == SpecialFps::SAMPLE_NO_FRAME) { |
| av_free_packet(&packet); |
| continue; |
| } |
| |
| // fps is considered reached in the following cases: |
| // 1. lastFrameTimestamp < 0 - start of a new interval |
| // (or first frame) |
| // 2. currFps == SpecialFps::SAMPLE_ALL_FRAMES (-1) - sample every |
| // frame |
| // 3. timestamp - lastFrameTimestamp has reached target fps and |
| // currFps > 0 (not special fps setting) |
| // different modes for fps: |
| // SpecialFps::SAMPLE_NO_FRAMES (0): |
| // disable fps sampling, no frame sampled at all |
| // SpecialFps::SAMPLE_ALL_FRAMES (-1): |
| // unlimited fps sampling, will sample at native video fps |
| // SpecialFps::SAMPLE_TIMESTAMP_ONLY (-2): |
| // disable fps sampling, but will get the frame at specific |
| // timestamp |
| // others (> 0): decoding at the specified fps |
| bool fpsReached = lastFrameTimestamp < 0 || |
| currFps == SpecialFps::SAMPLE_ALL_FRAMES || |
| (currFps > 0 && |
| timestamp >= lastFrameTimestamp + (1 / currFps)); |
| |
| if (!fpsReached) { |
| av_free_packet(&packet); |
| continue; |
| } |
| } |
| |
| lastFrameTimestamp = timestamp; |
| |
| outputFrameIndex++; |
| if (params.maximumOutputFrames_ != -1 && |
| outputFrameIndex >= params.maximumOutputFrames_) { |
| // enough frames |
| av_free_packet(&packet); |
| break; |
| } |
| |
| AVFrame* rgbFrame = av_frame_alloc(); |
| if (!rgbFrame) { |
| LOG(ERROR) << "Error allocating AVframe"; |
| return; |
| } |
| |
| try { |
| // Determine required buffer size and allocate buffer |
| int numBytes = avpicture_get_size(pixFormat, outWidth, outHeight); |
| DecodedFrame::AvDataPtr buffer( |
| (uint8_t*)av_malloc(numBytes * sizeof(uint8_t))); |
| |
| int size = avpicture_fill( |
| (AVPicture*)rgbFrame, |
| buffer.get(), |
| pixFormat, |
| outWidth, |
| outHeight); |
| |
| sws_scale( |
| scaleContext_, |
| videoStreamFrame_->data, |
| videoStreamFrame_->linesize, |
| 0, |
| videoCodecContext_->height, |
| rgbFrame->data, |
| rgbFrame->linesize); |
| |
| unique_ptr<DecodedFrame> frame = make_unique<DecodedFrame>(); |
| frame->width_ = outWidth; |
| frame->height_ = outHeight; |
| frame->data_ = move(buffer); |
| frame->size_ = size; |
| frame->index_ = frameIndex; |
| frame->outputFrameIndex_ = outputFrameIndex; |
| frame->timestamp_ = timestamp; |
| frame->keyFrame_ = videoStreamFrame_->key_frame; |
| |
| callback.frameDecoded(std::move(frame)); |
| |
| selectiveDecodedFrames++; |
| av_frame_free(&rgbFrame); |
| } catch (const std::exception&) { |
| av_frame_free(&rgbFrame); |
| } |
| } |
| av_frame_unref(videoStreamFrame_); |
| av_frame_unref(audioStreamFrame_); |
| } catch (const std::exception&) { |
| av_frame_unref(videoStreamFrame_); |
| av_frame_unref(audioStreamFrame_); |
| } |
| |
| av_free_packet(&packet); |
| } catch (const std::exception&) { |
| av_free_packet(&packet); |
| } |
| } // of while loop |
| callback.videoDecodingEnded(timestamp); |
| |
| // free all stuffs |
| sws_freeContext(scaleContext_); |
| swr_free(&convertCtx_); |
| av_packet_unref(&packet); |
| av_frame_free(&videoStreamFrame_); |
| av_frame_free(&audioStreamFrame_); |
| avcodec_close(videoCodecContext_); |
| if (audioCodecContext_ != nullptr) { |
| avcodec_close(audioCodecContext_); |
| } |
| avformat_close_input(&inputContext); |
| avformat_free_context(inputContext); |
| } catch (const std::exception&) { |
| // In case of decoding error |
| // free all stuffs |
| sws_freeContext(scaleContext_); |
| swr_free(&convertCtx_); |
| av_packet_unref(&packet); |
| av_frame_free(&videoStreamFrame_); |
| av_frame_free(&audioStreamFrame_); |
| avcodec_close(videoCodecContext_); |
| avcodec_close(audioCodecContext_); |
| avformat_close_input(&inputContext); |
| avformat_free_context(inputContext); |
| } |
| } |
| |
| void VideoDecoder::decodeMemory( |
| const string& videoName, |
| const char* buffer, |
| const int size, |
| const Params& params, |
| const int start_frm, |
| Callback& callback) { |
| VideoIOContext ioctx(buffer, size); |
| decodeLoop(videoName, ioctx, params, start_frm, callback); |
| } |
| |
| void VideoDecoder::decodeFile( |
| const string& file, |
| const Params& params, |
| const int start_frm, |
| Callback& callback) { |
| VideoIOContext ioctx(file); |
| decodeLoop(file, ioctx, params, start_frm, callback); |
| } |
| |
| string VideoDecoder::ffmpegErrorStr(int result) { |
| std::array<char, 128> buf; |
| av_strerror(result, buf.data(), buf.size()); |
| return string(buf.data()); |
| } |
| |
| void FreeDecodedData( |
| std::vector<std::unique_ptr<DecodedFrame>>& sampledFrames, |
| std::vector<std::unique_ptr<DecodedAudio>>& sampledAudio) { |
| // free the sampledFrames and sampledAudio |
| for (int i = 0; i < sampledFrames.size(); i++) { |
| DecodedFrame* p = sampledFrames[i].release(); |
| delete p; |
| } |
| for (int i = 0; i < sampledAudio.size(); i++) { |
| DecodedAudio* p = sampledAudio[i].release(); |
| delete p; |
| } |
| sampledFrames.clear(); |
| sampledAudio.clear(); |
| } |
| |
| bool DecodeMultipleClipsFromVideo( |
| const char* video_buffer, |
| const std::string& video_filename, |
| const int encoded_size, |
| const Params& params, |
| const int start_frm, |
| const int clip_per_video, |
| const std::vector<int>& clip_start_positions, |
| const bool use_local_file, |
| int& height, |
| int& width, |
| std::vector<unsigned char*>& buffer_rgb) { |
| std::vector<std::unique_ptr<DecodedFrame>> sampledFrames; |
| std::vector<std::unique_ptr<DecodedAudio>> sampledAudio; |
| VideoDecoder decoder; |
| |
| CallbackImpl callback; |
| // decoding from buffer or file |
| if (!use_local_file) { |
| decoder.decodeMemory( |
| string("Memory Buffer"), |
| video_buffer, |
| encoded_size, |
| params, |
| start_frm, |
| callback); |
| } else { |
| decoder.decodeFile(video_filename, params, start_frm, callback); |
| } |
| |
| for (auto& frame : callback.frames) { |
| sampledFrames.push_back(move(frame)); |
| } |
| for (auto& audio_sample : callback.audio_samples) { |
| sampledAudio.push_back(move(audio_sample)); |
| } |
| |
| for (int i = 0; i < buffer_rgb.size(); i++) { |
| unsigned char* buff = buffer_rgb[i]; |
| delete[] buff; |
| } |
| buffer_rgb.clear(); |
| |
| if (sampledFrames.size() < params.num_of_required_frame_) { |
| LOG(ERROR) |
| << "The video seems faulty and we could not decode enough frames: " |
| << sampledFrames.size() << " VS " << params.num_of_required_frame_; |
| FreeDecodedData(sampledFrames, sampledAudio); |
| return true; |
| } |
| if (sampledFrames.size() == 0) { |
| LOG(ERROR) << "The samples frames have size 0, no frame to process"; |
| FreeDecodedData(sampledFrames, sampledAudio); |
| return true; |
| } |
| height = sampledFrames[0]->height_; |
| width = sampledFrames[0]->width_; |
| float sample_stepsz = (clip_per_video <= 1) |
| ? 0 |
| : (float(sampledFrames.size() - params.num_of_required_frame_) / |
| (clip_per_video - 1)); |
| |
| int image_size = 3 * height * width; |
| int clip_size = params.num_of_required_frame_ * image_size; |
| // get the RGB frames for each clip |
| if (clip_start_positions.size() > 0) { |
| for (int i = 0; i < clip_start_positions.size(); i++) { |
| unsigned char* buffer_rgb_ptr = new unsigned char[clip_size]; |
| int clip_start = clip_start_positions[i]; |
| for (int j = 0; j < params.num_of_required_frame_; j++) { |
| memcpy( |
| buffer_rgb_ptr + j * image_size, |
| (unsigned char*)sampledFrames[j + clip_start]->data_.get(), |
| image_size * sizeof(unsigned char)); |
| } |
| buffer_rgb.push_back(buffer_rgb_ptr); |
| } |
| } else { |
| for (int i = 0; i < clip_per_video; i++) { |
| unsigned char* buffer_rgb_ptr = new unsigned char[clip_size]; |
| int clip_start = floor(i * sample_stepsz); |
| for (int j = 0; j < params.num_of_required_frame_; j++) { |
| memcpy( |
| buffer_rgb_ptr + j * image_size, |
| (unsigned char*)sampledFrames[j + clip_start]->data_.get(), |
| image_size * sizeof(unsigned char)); |
| } |
| buffer_rgb.push_back(buffer_rgb_ptr); |
| } |
| } |
| FreeDecodedData(sampledFrames, sampledAudio); |
| |
| return true; |
| } |
| |
| } // namespace caffe2 |