Snap for 5447620 from 24298e143028dbae661488c604e537d3556d2267 to pi-qpr3-b-release

Change-Id: I6a603608a8dbe11ab0ccb9589bb18c43d63033da
diff --git a/codec2/hidl/1.0/utils/InputSurfaceConnection.cpp b/codec2/hidl/1.0/utils/InputSurfaceConnection.cpp
index 4f4af6a..eea4e9d 100644
--- a/codec2/hidl/1.0/utils/InputSurfaceConnection.cpp
+++ b/codec2/hidl/1.0/utils/InputSurfaceConnection.cpp
@@ -168,7 +168,7 @@
 
         std::shared_ptr<C2GraphicAllocation> alloc;
         C2Handle* handle = WrapNativeCodec2GrallocHandle(
-                buffer->handle,
+                native_handle_clone(buffer->handle),
                 buffer->width, buffer->height,
                 buffer->format, buffer->usage, buffer->stride);
         mAllocatorMutex.lock();
diff --git a/codec2/vndk/C2AllocatorGralloc.cpp b/codec2/vndk/C2AllocatorGralloc.cpp
index 360dec8..b0ec5f1 100644
--- a/codec2/vndk/C2AllocatorGralloc.cpp
+++ b/codec2/vndk/C2AllocatorGralloc.cpp
@@ -159,7 +159,7 @@
         return xd != nullptr && xd->magic == MAGIC;
     }
 
-    static C2HandleGralloc* WrapAndMoveNativeHandle(
+    static C2HandleGralloc* WrapNativeHandle(
             const native_handle_t *const handle,
             uint32_t width, uint32_t height, uint32_t format, uint64_t usage,
             uint32_t stride, uint32_t generation, uint64_t igbp_id = 0, uint32_t igbp_slot = 0) {
@@ -181,26 +181,6 @@
         return reinterpret_cast<C2HandleGralloc *>(res);
     }
 
-    static C2HandleGralloc* WrapNativeHandle(
-            const native_handle_t *const handle,
-            uint32_t width, uint32_t height, uint32_t format, uint64_t usage,
-            uint32_t stride, uint32_t generation, uint64_t igbp_id = 0, uint32_t igbp_slot = 0) {
-        if (handle == nullptr) {
-            return nullptr;
-        }
-        native_handle_t *clone = native_handle_clone(handle);
-        if (clone == nullptr) {
-            return nullptr;
-        }
-        C2HandleGralloc *res = WrapAndMoveNativeHandle(
-                clone, width, height, format, usage, stride, generation, igbp_id, igbp_slot);
-        if (res == nullptr) {
-            native_handle_close(clone);
-        }
-        native_handle_delete(clone);
-        return res;
-    }
-
     static native_handle_t* UnwrapNativeHandle(
             const C2Handle *const handle) {
         const ExtraData *xd = getExtraData(handle);
@@ -335,11 +315,6 @@
         native_handle_delete(
                 const_cast<native_handle_t *>(reinterpret_cast<const native_handle_t *>(mHandle)));
     }
-    if (mLockedHandle) {
-        native_handle_delete(
-                const_cast<native_handle_t *>(
-                        reinterpret_cast<const native_handle_t *>(mLockedHandle)));
-    }
 }
 
 c2_status_t C2AllocationGralloc::map(
@@ -384,7 +359,7 @@
         if (mHandle) {
             mHandle->getIgbpData(&generation, &igbp_id, &igbp_slot);
         }
-        mLockedHandle = C2HandleGralloc::WrapAndMoveNativeHandle(
+        mLockedHandle = C2HandleGralloc::WrapNativeHandle(
                 mBuffer, mInfo.mapperInfo.width, mInfo.mapperInfo.height,
                 (uint32_t)mInfo.mapperInfo.format, mInfo.mapperInfo.usage, mInfo.stride,
                 generation, igbp_id, igbp_slot);
@@ -683,7 +658,7 @@
                     return;
                 }
                 info.stride = stride;
-                buffer = buffers[0];
+                buffer = std::move(buffers[0]);
             });
     if (err != C2_OK) {
         return err;
@@ -692,7 +667,7 @@
 
     allocation->reset(new C2AllocationGralloc(
             info, mMapper, buffer,
-            C2HandleGralloc::WrapAndMoveNativeHandle(
+            C2HandleGralloc::WrapNativeHandle(
                     buffer.getNativeHandle(),
                     info.mapperInfo.width, info.mapperInfo.height,
                     (uint32_t)info.mapperInfo.format, info.mapperInfo.usage, info.stride,
diff --git a/codec2/vndk/platform/C2BqBuffer.cpp b/codec2/vndk/platform/C2BqBuffer.cpp
index 7bf3d64..cbacd97 100644
--- a/codec2/vndk/platform/C2BqBuffer.cpp
+++ b/codec2/vndk/platform/C2BqBuffer.cpp
@@ -269,28 +269,36 @@
             }
         }
         if (slotBuffer) {
-            ALOGV("buffer wraps %llu %d", (unsigned long long)mProducerId, slot);
-            C2Handle *c2Handle = android::WrapNativeCodec2GrallocHandle(
-                    slotBuffer->handle,
-                    slotBuffer->width,
-                    slotBuffer->height,
-                    slotBuffer->format,
-                    slotBuffer->usage,
-                    slotBuffer->stride,
-                    slotBuffer->getGenerationNumber(),
-                    mProducerId, slot);
-            if (c2Handle) {
-                std::shared_ptr<C2GraphicAllocation> alloc;
-                c2_status_t err = mAllocator->priorGraphicAllocation(c2Handle, &alloc);
-                if (err != C2_OK) {
-                    return err;
+            native_handle_t *grallocHandle = native_handle_clone(slotBuffer->handle);
+
+            if (grallocHandle) {
+                ALOGV("buffer wraps %llu %d", (unsigned long long)mProducerId, slot);
+                C2Handle *c2Handle = android::WrapNativeCodec2GrallocHandle(
+                        grallocHandle,
+                        slotBuffer->width,
+                        slotBuffer->height,
+                        slotBuffer->format,
+                        slotBuffer->usage,
+                        slotBuffer->stride,
+                        slotBuffer->getGenerationNumber(),
+                        mProducerId, slot);
+                if (c2Handle) {
+                    // Moved everything to c2Handle.
+                    native_handle_delete(grallocHandle);
+                    std::shared_ptr<C2GraphicAllocation> alloc;
+                    c2_status_t err = mAllocator->priorGraphicAllocation(c2Handle, &alloc);
+                    if (err != C2_OK) {
+                        return err;
+                    }
+                    std::shared_ptr<C2BufferQueueBlockPoolData> poolData =
+                            std::make_shared<C2BufferQueueBlockPoolData>(
+                                    slotBuffer->getGenerationNumber(),
+                                    mProducerId, slot, shared_from_this());
+                    *block = _C2BlockFactory::CreateGraphicBlock(alloc, poolData);
+                    return C2_OK;
                 }
-                std::shared_ptr<C2BufferQueueBlockPoolData> poolData =
-                        std::make_shared<C2BufferQueueBlockPoolData>(
-                                slotBuffer->getGenerationNumber(),
-                                mProducerId, slot, shared_from_this());
-                *block = _C2BlockFactory::CreateGraphicBlock(alloc, poolData);
-                return C2_OK;
+                native_handle_close(grallocHandle);
+                native_handle_delete(grallocHandle);
             }
             // Block was not created. call requestBuffer# again next time.
             slotBuffer.clear();
diff --git a/media/codecs/aac/C2SoftAacEnc.cpp b/media/codecs/aac/C2SoftAacEnc.cpp
index 889016a..ea72aad 100644
--- a/media/codecs/aac/C2SoftAacEnc.cpp
+++ b/media/codecs/aac/C2SoftAacEnc.cpp
@@ -153,8 +153,9 @@
       mSentCodecSpecificData(false),
       mInputTimeSet(false),
       mInputSize(0),
-      mInputTimeUs(0),
-      mSignalledError(false) {
+      mInputTimeUs(-1ll),
+      mSignalledError(false),
+      mOutIndex(0u) {
 }
 
 C2SoftAacEnc::~C2SoftAacEnc() {
@@ -178,7 +179,7 @@
     mSentCodecSpecificData = false;
     mInputTimeSet = false;
     mInputSize = 0u;
-    mInputTimeUs = 0;
+    mInputTimeUs = -1ll;
     mSignalledError = false;
     return C2_OK;
 }
@@ -196,7 +197,6 @@
     mSentCodecSpecificData = false;
     mInputTimeSet = false;
     mInputSize = 0u;
-    mInputTimeUs = 0;
     return C2_OK;
 }
 
@@ -350,7 +350,6 @@
         mInputTimeUs = work->input.ordinal.timestamp;
         mInputTimeSet = true;
     }
-    uint64_t timestamp = mInputTimeUs.peeku();
 
     size_t numFrames = (capacity + mInputSize + (eos ? mNumBytesPerInputFrame - 1 : 0))
             / mNumBytesPerInputFrame;
@@ -358,22 +357,11 @@
           capacity, mInputSize, numFrames, mNumBytesPerInputFrame);
 
     std::shared_ptr<C2LinearBlock> block;
+    std::shared_ptr<C2Buffer> buffer;
     std::unique_ptr<C2WriteView> wView;
     uint8_t *outPtr = temp;
     size_t outAvailable = 0u;
-
-    if (numFrames) {
-        C2MemoryUsage usage = { C2MemoryUsage::CPU_READ, C2MemoryUsage::CPU_WRITE };
-        // TODO: error handling, proper usage, etc.
-        c2_status_t err = pool->fetchLinearBlock(mOutBufferSize * numFrames, usage, &block);
-        if (err != C2_OK) {
-            ALOGE("err = %d", err);
-        }
-
-        wView.reset(new C2WriteView(block->map().get()));
-        outPtr = wView->data();
-        outAvailable = wView->size();
-    }
+    uint64_t inputIndex = work->input.ordinal.frameIndex.peeku();
 
     AACENC_InArgs inargs;
     AACENC_OutArgs outargs;
@@ -405,15 +393,58 @@
     outBufDesc.bufSizes          = outBufferSize;
     outBufDesc.bufElSizes        = outBufferElSize;
 
-    // Encode the mInputFrame, which is treated as a modulo buffer
     AACENC_ERROR encoderErr = AACENC_OK;
-    size_t nOutputBytes = 0;
+
+    class FillWork {
+    public:
+        FillWork(uint32_t flags, C2WorkOrdinalStruct ordinal,
+                 const std::shared_ptr<C2Buffer> &buffer)
+            : mFlags(flags), mOrdinal(ordinal), mBuffer(buffer) {
+        }
+        ~FillWork() = default;
+
+        void operator()(const std::unique_ptr<C2Work> &work) {
+            work->worklets.front()->output.flags = (C2FrameData::flags_t)mFlags;
+            work->worklets.front()->output.buffers.clear();
+            work->worklets.front()->output.ordinal = mOrdinal;
+            work->workletsProcessed = 1u;
+            work->result = C2_OK;
+            if (mBuffer) {
+                work->worklets.front()->output.buffers.push_back(mBuffer);
+            }
+            ALOGV("timestamp = %lld, index = %lld, w/%s buffer",
+                  mOrdinal.timestamp.peekll(),
+                  mOrdinal.frameIndex.peekll(),
+                  mBuffer ? "" : "o");
+        }
+
+    private:
+        const uint32_t mFlags;
+        const C2WorkOrdinalStruct mOrdinal;
+        const std::shared_ptr<C2Buffer> mBuffer;
+    };
+
+    C2WorkOrdinalStruct outOrdinal = work->input.ordinal;
 
     while (encoderErr == AACENC_OK && inargs.numInSamples > 0) {
+        if (numFrames && !block) {
+            C2MemoryUsage usage = { C2MemoryUsage::CPU_READ, C2MemoryUsage::CPU_WRITE };
+            // TODO: error handling, proper usage, etc.
+            c2_status_t err = pool->fetchLinearBlock(mOutBufferSize, usage, &block);
+            if (err != C2_OK) {
+                ALOGE("err = %d", err);
+            }
+
+            wView.reset(new C2WriteView(block->map().get()));
+            outPtr = wView->data();
+            outAvailable = wView->size();
+            --numFrames;
+        }
+
         memset(&outargs, 0, sizeof(outargs));
 
         outBuffer[0] = outPtr;
-        outBufferSize[0] = outAvailable - nOutputBytes;
+        outBufferSize[0] = outAvailable;
 
         encoderErr = aacEncEncode(mAACEncoder,
                                   &inBufDesc,
@@ -422,17 +453,32 @@
                                   &outargs);
 
         if (encoderErr == AACENC_OK) {
+            if (buffer) {
+                outOrdinal.frameIndex = mOutIndex++;
+                outOrdinal.timestamp = mInputTimeUs;
+                cloneAndSend(
+                        inputIndex,
+                        work,
+                        FillWork(C2FrameData::FLAG_INCOMPLETE, outOrdinal, buffer));
+                buffer.reset();
+            }
+
             if (outargs.numOutBytes > 0) {
                 mInputSize = 0;
                 int consumed = (capacity / sizeof(int16_t)) - inargs.numInSamples
                         + outargs.numInSamples;
                 mInputTimeUs = work->input.ordinal.timestamp
                         + (consumed * 1000000ll / channelCount / sampleRate);
+                buffer = createLinearBuffer(block, 0, outargs.numOutBytes);
+#if defined(LOG_NDEBUG) && !LOG_NDEBUG
+                hexdump(outPtr, std::min(outargs.numOutBytes, 256));
+#endif
+                outPtr = temp;
+                outAvailable = 0;
+                block.reset();
             } else {
                 mInputSize += outargs.numInSamples * sizeof(int16_t);
             }
-            outPtr += outargs.numOutBytes;
-            nOutputBytes += outargs.numOutBytes;
 
             if (outargs.numInSamples > 0) {
                 inBuffer[0] = (int16_t *)inBuffer[0] + outargs.numInSamples;
@@ -440,15 +486,29 @@
                 inargs.numInSamples -= outargs.numInSamples;
             }
         }
-        ALOGV("encoderErr = %d nOutputBytes = %zu; mInputSize = %zu inargs.numInSamples = %d",
-              encoderErr, nOutputBytes, mInputSize, inargs.numInSamples);
+        ALOGV("encoderErr = %d mInputSize = %zu inargs.numInSamples = %d, mInputTimeUs = %lld",
+              encoderErr, mInputSize, inargs.numInSamples, mInputTimeUs.peekll());
     }
 
     if (eos && inBufferSize[0] > 0) {
+        if (numFrames && !block) {
+            C2MemoryUsage usage = { C2MemoryUsage::CPU_READ, C2MemoryUsage::CPU_WRITE };
+            // TODO: error handling, proper usage, etc.
+            c2_status_t err = pool->fetchLinearBlock(mOutBufferSize, usage, &block);
+            if (err != C2_OK) {
+                ALOGE("err = %d", err);
+            }
+
+            wView.reset(new C2WriteView(block->map().get()));
+            outPtr = wView->data();
+            outAvailable = wView->size();
+            --numFrames;
+        }
+
         memset(&outargs, 0, sizeof(outargs));
 
         outBuffer[0] = outPtr;
-        outBufferSize[0] = outAvailable - nOutputBytes;
+        outBufferSize[0] = outAvailable;
 
         // Flush
         inargs.numInSamples = -1;
@@ -458,27 +518,12 @@
                            &outBufDesc,
                            &inargs,
                            &outargs);
-
-        nOutputBytes += outargs.numOutBytes;
     }
 
-    work->worklets.front()->output.flags =
-        (C2FrameData::flags_t)(eos ? C2FrameData::FLAG_END_OF_STREAM : 0);
-    work->worklets.front()->output.buffers.clear();
-    work->worklets.front()->output.ordinal = work->input.ordinal;
-    work->worklets.front()->output.ordinal.timestamp = timestamp;
-    work->workletsProcessed = 1u;
-    if (nOutputBytes) {
-        work->worklets.front()->output.buffers.push_back(
-                createLinearBuffer(block, 0, nOutputBytes));
-    }
-
-#if 0
-    ALOGI("sending %d bytes of data (time = %lld us, flags = 0x%08lx)",
-          nOutputBytes, mInputTimeUs.peekll(), outHeader->nFlags);
-
-    hexdump(outHeader->pBuffer + outHeader->nOffset, outHeader->nFilledLen);
-#endif
+    outOrdinal.frameIndex = mOutIndex++;
+    outOrdinal.timestamp = mInputTimeUs;
+    FillWork((C2FrameData::flags_t)(eos ? C2FrameData::FLAG_END_OF_STREAM : 0),
+             outOrdinal, buffer)(work);
 }
 
 c2_status_t C2SoftAacEnc::drain(
@@ -501,7 +546,6 @@
     mSentCodecSpecificData = false;
     mInputTimeSet = false;
     mInputSize = 0u;
-    mInputTimeUs = 0;
 
     // TODO: we don't have any pending work at this time to drain.
     return C2_OK;
diff --git a/media/codecs/aac/C2SoftAacEnc.h b/media/codecs/aac/C2SoftAacEnc.h
index e6e7cf0..779365b 100644
--- a/media/codecs/aac/C2SoftAacEnc.h
+++ b/media/codecs/aac/C2SoftAacEnc.h
@@ -17,6 +17,8 @@
 #ifndef ANDROID_C2_SOFT_AAC_ENC_H_
 #define ANDROID_C2_SOFT_AAC_ENC_H_
 
+#include <atomic>
+
 #include <SimpleC2Component.h>
 
 #include "aacenc_lib.h"
@@ -60,6 +62,7 @@
     c2_cntr64_t mInputTimeUs;
 
     bool mSignalledError;
+    std::atomic_uint64_t mOutIndex;
 
     status_t initEncoder();
 
diff --git a/media/codecs/base/SimpleC2Component.cpp b/media/codecs/base/SimpleC2Component.cpp
index d6a62a7..54f094c 100644
--- a/media/codecs/base/SimpleC2Component.cpp
+++ b/media/codecs/base/SimpleC2Component.cpp
@@ -132,6 +132,56 @@
     }
 }
 
+class SimpleC2Component::BlockingBlockPool : public C2BlockPool {
+public:
+    BlockingBlockPool(const std::shared_ptr<C2BlockPool>& base): mBase{base} {}
+
+    virtual local_id_t getLocalId() const override {
+        return mBase->getLocalId();
+    }
+
+    virtual C2Allocator::id_t getAllocatorId() const override {
+        return mBase->getAllocatorId();
+    }
+
+    virtual c2_status_t fetchLinearBlock(
+            uint32_t capacity,
+            C2MemoryUsage usage,
+            std::shared_ptr<C2LinearBlock>* block) {
+        c2_status_t status;
+        do {
+            status = mBase->fetchLinearBlock(capacity, usage, block);
+        } while (status == C2_TIMED_OUT);
+        return status;
+    }
+
+    virtual c2_status_t fetchCircularBlock(
+            uint32_t capacity,
+            C2MemoryUsage usage,
+            std::shared_ptr<C2CircularBlock>* block) {
+        c2_status_t status;
+        do {
+            status = mBase->fetchCircularBlock(capacity, usage, block);
+        } while (status == C2_TIMED_OUT);
+        return status;
+    }
+
+    virtual c2_status_t fetchGraphicBlock(
+            uint32_t width, uint32_t height, uint32_t format,
+            C2MemoryUsage usage,
+            std::shared_ptr<C2GraphicBlock>* block) {
+        c2_status_t status;
+        do {
+            status = mBase->fetchGraphicBlock(width, height, format, usage,
+                                              block);
+        } while (status == C2_TIMED_OUT);
+        return status;
+    }
+
+private:
+    std::shared_ptr<C2BlockPool> mBase;
+};
+
 ////////////////////////////////////////////////////////////////////////////////
 
 namespace {
@@ -361,14 +411,38 @@
     }
     if (work) {
         fillWork(work);
-        Mutexed<ExecState>::Locked state(mExecState);
-        std::shared_ptr<C2Component::Listener> listener = state->mListener;
-        state.unlock();
+        std::shared_ptr<C2Component::Listener> listener = mExecState.lock()->mListener;
         listener->onWorkDone_nb(shared_from_this(), vec(work));
         ALOGV("returning pending work");
     }
 }
 
+void SimpleC2Component::cloneAndSend(
+        uint64_t frameIndex,
+        const std::unique_ptr<C2Work> &currentWork,
+        std::function<void(const std::unique_ptr<C2Work> &)> fillWork) {
+    std::unique_ptr<C2Work> work(new C2Work);
+    if (currentWork->input.ordinal.frameIndex == frameIndex) {
+        work->input.flags = currentWork->input.flags;
+        work->input.ordinal = currentWork->input.ordinal;
+    } else {
+        Mutexed<PendingWork>::Locked pending(mPendingWork);
+        if (pending->count(frameIndex) == 0) {
+            ALOGW("unknown frame index: %" PRIu64, frameIndex);
+            return;
+        }
+        work->input.flags = pending->at(frameIndex)->input.flags;
+        work->input.ordinal = pending->at(frameIndex)->input.ordinal;
+    }
+    work->worklets.emplace_back(new C2Worklet);
+    if (work) {
+        fillWork(work);
+        std::shared_ptr<C2Component::Listener> listener = mExecState.lock()->mListener;
+        listener->onWorkDone_nb(shared_from_this(), vec(work));
+        ALOGV("cloned and sending work");
+    }
+}
+
 bool SimpleC2Component::processQueue() {
     std::unique_ptr<C2Work> work;
     uint64_t generation;
@@ -422,12 +496,16 @@
                 }
             }
 
-            err = GetCodec2BlockPool(poolId, shared_from_this(), &mOutputBlockPool);
+            std::shared_ptr<C2BlockPool> blockPool;
+            err = GetCodec2BlockPool(poolId, shared_from_this(), &blockPool);
             ALOGD("Using output block pool with poolID %llu => got %llu - %d",
                     (unsigned long long)poolId,
                     (unsigned long long)(
-                            mOutputBlockPool ? mOutputBlockPool->getLocalId() : 111000111),
+                            blockPool ? blockPool->getLocalId() : 111000111),
                     err);
+            if (err == C2_OK) {
+                mOutputBlockPool = std::make_shared<BlockingBlockPool>(blockPool);
+            }
             return err;
         }();
         if (err != C2_OK) {
diff --git a/media/codecs/base/include/SimpleC2Component.h b/media/codecs/base/include/SimpleC2Component.h
index e745dc5..43029a9 100644
--- a/media/codecs/base/include/SimpleC2Component.h
+++ b/media/codecs/base/include/SimpleC2Component.h
@@ -121,6 +121,24 @@
      */
     void finish(uint64_t frameIndex, std::function<void(const std::unique_ptr<C2Work> &)> fillWork);
 
+    /**
+     * Clone pending or current work and send the work back to client.
+     *
+     * This method will retrieve and clone the pending or current work according
+     * to |frameIndex| and feed the work into |fillWork| function. |fillWork|
+     * must be "non-blocking". Once |fillWork| returns the filled work will be
+     * returned to the client.
+     *
+     * \param[in]   frameIndex    the index of the work
+     * \param[in]   currentWork   the current work under processing
+     * \param[in]   fillWork      the function to fill the retrieved work.
+     */
+    void cloneAndSend(
+            uint64_t frameIndex,
+            const std::unique_ptr<C2Work> &currentWork,
+            std::function<void(const std::unique_ptr<C2Work> &)> fillWork);
+
+
     std::shared_ptr<C2Buffer> createLinearBuffer(
             const std::shared_ptr<C2LinearBlock> &block);
 
@@ -216,7 +234,8 @@
     typedef std::unordered_map<uint64_t, std::unique_ptr<C2Work>> PendingWork;
     Mutexed<PendingWork> mPendingWork;
 
-    std::shared_ptr<C2BlockPool> mOutputBlockPool;
+    class BlockingBlockPool;
+    std::shared_ptr<BlockingBlockPool> mOutputBlockPool;
 
     SimpleC2Component() = delete;
 };
diff --git a/media/sfplugin/C2OMXNode.cpp b/media/sfplugin/C2OMXNode.cpp
index 73cbbc6..5b9aebb 100644
--- a/media/sfplugin/C2OMXNode.cpp
+++ b/media/sfplugin/C2OMXNode.cpp
@@ -226,7 +226,7 @@
             && omxBuf.mGraphicBuffer != nullptr) {
         std::shared_ptr<C2GraphicAllocation> alloc;
         handle = WrapNativeCodec2GrallocHandle(
-                omxBuf.mGraphicBuffer->handle,
+                native_handle_clone(omxBuf.mGraphicBuffer->handle),
                 omxBuf.mGraphicBuffer->width,
                 omxBuf.mGraphicBuffer->height,
                 omxBuf.mGraphicBuffer->format,
diff --git a/media/sfplugin/CCodec.cpp b/media/sfplugin/CCodec.cpp
index 20c3de3..1e74658 100644
--- a/media/sfplugin/CCodec.cpp
+++ b/media/sfplugin/CCodec.cpp
@@ -533,6 +533,10 @@
         mCodec->onWorkQueued(eos);
     }
 
+    void onOutputBuffersChanged() override {
+        mCodec->mCallback->onOutputBuffersChanged();
+    }
+
 private:
     CCodec *mCodec;
 };
@@ -1325,6 +1329,10 @@
 
     std::list<std::unique_ptr<C2Work>> flushedWork;
     c2_status_t err = comp->flush(C2Component::FLUSH_COMPONENT, &flushedWork);
+    {
+        Mutexed<std::list<std::unique_ptr<C2Work>>>::Locked queue(mWorkDoneQueue);
+        flushedWork.splice(flushedWork.end(), *queue);
+    }
     if (err != C2_OK) {
         // TODO: convert err into status_t
         mCallback->onError(UNKNOWN_ERROR, ACTION_CODE_FATAL);
@@ -1572,7 +1580,10 @@
                 (new AMessage(kWhatWorkDone, this))->post();
             }
 
-            subQueuedWorkCount(1);
+            if (work->worklets.empty()
+                    || !(work->worklets.front()->output.flags & C2FrameData::FLAG_INCOMPLETE)) {
+                subQueuedWorkCount(1);
+            }
             // handle configuration changes in work done
             Mutexed<Config>::Locked config(mConfig);
             bool changed = false;
@@ -1697,7 +1708,7 @@
         deadline->set(std::chrono::steady_clock::now() + 3s, "eos");
     }
     // TODO: query and use input/pipeline/output delay combined
-    if (count >= 8) {
+    if (count >= 4) {
         CCodecWatchdog::getInstance()->watch(this);
         Mutexed<NamedTimePoint>::Locked deadline(mQueueDeadline);
         deadline->set(std::chrono::steady_clock::now() + 3s, "queue");
diff --git a/media/sfplugin/CCodecBufferChannel.cpp b/media/sfplugin/CCodecBufferChannel.cpp
index 2cdea6e..69de800 100644
--- a/media/sfplugin/CCodecBufferChannel.cpp
+++ b/media/sfplugin/CCodecBufferChannel.cpp
@@ -171,7 +171,7 @@
      * index and MediaCodecBuffer object. Returns false if registration
      * fails.
      */
-    virtual bool registerBuffer(
+    virtual status_t registerBuffer(
             const std::shared_ptr<C2Buffer> &buffer,
             size_t *index,
             sp<MediaCodecBuffer> *clientBuffer) = 0;
@@ -180,7 +180,7 @@
      * Register codec specific data as a buffer to be consistent with
      * MediaCodec behavior.
      */
-    virtual bool registerCsd(
+    virtual status_t registerCsd(
             const C2StreamCsdInfo::output * /* csd */,
             size_t * /* index */,
             sp<MediaCodecBuffer> * /* clientBuffer */) = 0;
@@ -274,7 +274,7 @@
 namespace {
 
 // TODO: get this info from component
-const static size_t kMinInputBufferArraySize = 8;
+const static size_t kMinInputBufferArraySize = 4;
 const static size_t kMaxPipelineCapacity = 18;
 const static size_t kChannelOutputDelay = 0;
 const static size_t kMinOutputBufferArraySize = kMaxPipelineCapacity +
@@ -571,25 +571,33 @@
      * \param match[in]     a function to test whether the buffer matches the
      *                      criteria or not.
      * \return OK           if successful,
-     *         NO_MEMORY    if there's no available slot meets the criteria.
+     *         WOULD_BLOCK  if slots are being used,
+     *         NO_MEMORY    if no slot matches the criteria, even though it's
+     *                      available
      */
     status_t grabBuffer(
             size_t *index,
             sp<Codec2Buffer> *buffer,
             std::function<bool(const sp<Codec2Buffer> &)> match =
                 [](const sp<Codec2Buffer> &) { return true; }) {
+        // allBuffersDontMatch remains true if all buffers are available but
+        // match() returns false for every buffer.
+        bool allBuffersDontMatch = true;
         for (size_t i = 0; i < mBuffers.size(); ++i) {
-            if (!mBuffers[i].ownedByClient && mBuffers[i].compBuffer.expired()
-                    && match(mBuffers[i].clientBuffer)) {
-                mBuffers[i].ownedByClient = true;
-                *buffer = mBuffers[i].clientBuffer;
-                (*buffer)->meta()->clear();
-                (*buffer)->setRange(0, (*buffer)->capacity());
-                *index = i;
-                return OK;
+            if (!mBuffers[i].ownedByClient && mBuffers[i].compBuffer.expired()) {
+                if (match(mBuffers[i].clientBuffer)) {
+                    mBuffers[i].ownedByClient = true;
+                    *buffer = mBuffers[i].clientBuffer;
+                    (*buffer)->meta()->clear();
+                    (*buffer)->setRange(0, (*buffer)->capacity());
+                    *index = i;
+                    return OK;
+                }
+            } else {
+                allBuffersDontMatch = false;
             }
         }
-        return NO_MEMORY;
+        return allBuffersDontMatch ? NO_MEMORY : WOULD_BLOCK;
     }
 
     /**
@@ -681,6 +689,14 @@
         }
     }
 
+    void realloc(std::function<sp<Codec2Buffer>()> alloc) {
+        size_t size = mBuffers.size();
+        mBuffers.clear();
+        for (size_t i = 0; i < size; ++i) {
+            mBuffers.push_back({ alloc(), std::weak_ptr<C2Buffer>(), false });
+        }
+    }
+
 private:
     std::string mImplName; ///< name for debugging
     const char *mName; ///< C-string version of name
@@ -1073,7 +1089,7 @@
         return nullptr;
     }
 
-    bool registerBuffer(
+    status_t registerBuffer(
             const std::shared_ptr<C2Buffer> &buffer,
             size_t *index,
             sp<MediaCodecBuffer> *clientBuffer) final {
@@ -1084,22 +1100,25 @@
                 [buffer](const sp<Codec2Buffer> &clientBuffer) {
                     return clientBuffer->canCopy(buffer);
                 });
-        if (err != OK) {
+        if (err == WOULD_BLOCK) {
+            ALOGV("[%s] buffers temporarily not available", mName);
+            return err;
+        } else if (err != OK) {
             ALOGD("[%s] grabBuffer failed: %d", mName, err);
-            return false;
+            return err;
         }
         c2Buffer->setFormat(mFormat);
         if (!c2Buffer->copy(buffer)) {
             ALOGD("[%s] copy buffer failed", mName);
-            return false;
+            return WOULD_BLOCK;
         }
         submit(c2Buffer);
         *clientBuffer = c2Buffer;
         ALOGV("[%s] grabbed buffer %zu", mName, *index);
-        return true;
+        return OK;
     }
 
-    bool registerCsd(
+    status_t registerCsd(
             const C2StreamCsdInfo::output *csd,
             size_t *index,
             sp<MediaCodecBuffer> *clientBuffer) final {
@@ -1112,13 +1131,13 @@
                             && clientBuffer->capacity() >= csd->flexCount();
                 });
         if (err != OK) {
-            return false;
+            return err;
         }
         memcpy(c2Buffer->base(), csd->m.value, csd->flexCount());
         c2Buffer->setRange(0, csd->flexCount());
         c2Buffer->setFormat(mFormat);
         *clientBuffer = c2Buffer;
-        return true;
+        return OK;
     }
 
     bool releaseBuffer(
@@ -1138,6 +1157,36 @@
         mImpl.getArray(array);
     }
 
+    void realloc(const std::shared_ptr<C2Buffer> &c2buffer) {
+        std::function<sp<Codec2Buffer>()> alloc;
+        switch (c2buffer->data().type()) {
+            case C2BufferData::LINEAR: {
+                uint32_t size = kLinearBufferSize;
+                const C2ConstLinearBlock &block = c2buffer->data().linearBlocks().front();
+                if (block.size() < kMaxLinearBufferSize / 2) {
+                    size = block.size() * 2;
+                } else {
+                    size = kMaxLinearBufferSize;
+                }
+                alloc = [format = mFormat, size] {
+                    return new LocalLinearBuffer(format, new ABuffer(size));
+                };
+                break;
+            }
+
+            // TODO: add support
+            case C2BufferData::GRAPHIC:         FALLTHROUGH_INTENDED;
+
+            case C2BufferData::INVALID:         FALLTHROUGH_INTENDED;
+            case C2BufferData::LINEAR_CHUNKS:   FALLTHROUGH_INTENDED;
+            case C2BufferData::GRAPHIC_CHUNKS:  FALLTHROUGH_INTENDED;
+            default:
+                ALOGD("Unsupported type: %d", (int)c2buffer->data().type());
+                return;
+        }
+        mImpl.realloc(alloc);
+    }
+
 private:
     BuffersArrayImpl mImpl;
 };
@@ -1148,7 +1197,7 @@
         : OutputBuffers(componentName, name),
           mImpl(mName) { }
 
-    bool registerBuffer(
+    status_t registerBuffer(
             const std::shared_ptr<C2Buffer> &buffer,
             size_t *index,
             sp<MediaCodecBuffer> *clientBuffer) override {
@@ -1157,10 +1206,10 @@
         *index = mImpl.assignSlot(newBuffer);
         *clientBuffer = newBuffer;
         ALOGV("[%s] registered buffer %zu", mName, *index);
-        return true;
+        return OK;
     }
 
-    bool registerCsd(
+    status_t registerCsd(
             const C2StreamCsdInfo::output *csd,
             size_t *index,
             sp<MediaCodecBuffer> *clientBuffer) final {
@@ -1168,7 +1217,7 @@
                 mFormat, ABuffer::CreateAsCopy(csd->m.value, csd->flexCount()));
         *index = mImpl.assignSlot(newBuffer);
         *clientBuffer = newBuffer;
-        return true;
+        return OK;
     }
 
     bool releaseBuffer(
@@ -1380,65 +1429,57 @@
 // CCodecBufferChannel::PipelineCapacity
 
 CCodecBufferChannel::PipelineCapacity::PipelineCapacity()
-      : input(0), component(0), output(0),
+      : input(0), component(0),
         mName("<UNKNOWN COMPONENT>") {
 }
 
 void CCodecBufferChannel::PipelineCapacity::initialize(
         int newInput,
         int newComponent,
-        int newOutput,
         const char* newName,
         const char* callerTag) {
     input.store(newInput, std::memory_order_relaxed);
     component.store(newComponent, std::memory_order_relaxed);
-    output.store(newOutput, std::memory_order_relaxed);
     mName = newName;
     ALOGV("[%s] %s -- PipelineCapacity::initialize(): "
           "pipeline availability initialized ==> "
-          "input = %d, component = %d, output = %d",
+          "input = %d, component = %d",
             mName, callerTag ? callerTag : "*",
-            newInput, newComponent, newOutput);
+            newInput, newComponent);
 }
 
 bool CCodecBufferChannel::PipelineCapacity::allocate(const char* callerTag) {
     int prevInput = input.fetch_sub(1, std::memory_order_relaxed);
     int prevComponent = component.fetch_sub(1, std::memory_order_relaxed);
-    int prevOutput = output.fetch_sub(1, std::memory_order_relaxed);
-    if (prevInput > 0 && prevComponent > 0 && prevOutput > 0) {
+    if (prevInput > 0 && prevComponent > 0) {
         ALOGV("[%s] %s -- PipelineCapacity::allocate() returns true: "
               "pipeline availability -1 all ==> "
-              "input = %d, component = %d, output = %d",
+              "input = %d, component = %d",
                 mName, callerTag ? callerTag : "*",
                 prevInput - 1,
-                prevComponent - 1,
-                prevOutput - 1);
+                prevComponent - 1);
         return true;
     }
     input.fetch_add(1, std::memory_order_relaxed);
     component.fetch_add(1, std::memory_order_relaxed);
-    output.fetch_add(1, std::memory_order_relaxed);
     ALOGV("[%s] %s -- PipelineCapacity::allocate() returns false: "
           "pipeline availability unchanged ==> "
-          "input = %d, component = %d, output = %d",
+          "input = %d, component = %d",
             mName, callerTag ? callerTag : "*",
             prevInput,
-            prevComponent,
-            prevOutput);
+            prevComponent);
     return false;
 }
 
 void CCodecBufferChannel::PipelineCapacity::free(const char* callerTag) {
     int prevInput = input.fetch_add(1, std::memory_order_relaxed);
     int prevComponent = component.fetch_add(1, std::memory_order_relaxed);
-    int prevOutput = output.fetch_add(1, std::memory_order_relaxed);
     ALOGV("[%s] %s -- PipelineCapacity::free(): "
           "pipeline availability +1 all ==> "
-          "input = %d, component = %d, output = %d",
+          "input = %d, component = %d",
             mName, callerTag ? callerTag : "*",
             prevInput + 1,
-            prevComponent + 1,
-            prevOutput + 1);
+            prevComponent + 1);
 }
 
 int CCodecBufferChannel::PipelineCapacity::freeInputSlots(
@@ -1448,13 +1489,12 @@
                                     std::memory_order_relaxed);
     ALOGV("[%s] %s -- PipelineCapacity::freeInputSlots(%zu): "
           "pipeline availability +%zu input ==> "
-          "input = %d, component = %d, output = %d",
+          "input = %d, component = %d",
             mName, callerTag ? callerTag : "*",
             numDiscardedInputBuffers,
             numDiscardedInputBuffers,
             prevInput + static_cast<int>(numDiscardedInputBuffers),
-            component.load(std::memory_order_relaxed),
-            output.load(std::memory_order_relaxed));
+            component.load(std::memory_order_relaxed));
     return prevInput + static_cast<int>(numDiscardedInputBuffers);
 }
 
@@ -1463,25 +1503,84 @@
     int prevComponent = component.fetch_add(1, std::memory_order_relaxed);
     ALOGV("[%s] %s -- PipelineCapacity::freeComponentSlot(): "
           "pipeline availability +1 component ==> "
-          "input = %d, component = %d, output = %d",
+          "input = %d, component = %d",
             mName, callerTag ? callerTag : "*",
             input.load(std::memory_order_relaxed),
-            prevComponent + 1,
-            output.load(std::memory_order_relaxed));
+            prevComponent + 1);
     return prevComponent + 1;
 }
 
-int CCodecBufferChannel::PipelineCapacity::freeOutputSlot(
-        const char* callerTag) {
-    int prevOutput = output.fetch_add(1, std::memory_order_relaxed);
-    ALOGV("[%s] %s -- PipelineCapacity::freeOutputSlot(): "
-          "pipeline availability +1 output ==> "
-          "input = %d, component = %d, output = %d",
-            mName, callerTag ? callerTag : "*",
-            input.load(std::memory_order_relaxed),
-            component.load(std::memory_order_relaxed),
-            prevOutput + 1);
-    return prevOutput + 1;
+// CCodecBufferChannel::ReorderStash
+
+CCodecBufferChannel::ReorderStash::ReorderStash() {
+    clear();
+}
+
+void CCodecBufferChannel::ReorderStash::clear() {
+    mPending.clear();
+    mStash.clear();
+    mDepth = 0;
+    mKey = C2Config::ORDINAL;
+}
+
+void CCodecBufferChannel::ReorderStash::setDepth(uint32_t depth) {
+    mPending.splice(mPending.end(), mStash);
+    mDepth = depth;
+}
+void CCodecBufferChannel::ReorderStash::setKey(C2Config::ordinal_key_t key) {
+    mPending.splice(mPending.end(), mStash);
+    mKey = key;
+}
+
+bool CCodecBufferChannel::ReorderStash::pop(Entry *entry) {
+    if (mPending.empty()) {
+        return false;
+    }
+    entry->buffer     = mPending.front().buffer;
+    entry->timestamp  = mPending.front().timestamp;
+    entry->flags      = mPending.front().flags;
+    entry->ordinal    = mPending.front().ordinal;
+    mPending.pop_front();
+    return true;
+}
+
+void CCodecBufferChannel::ReorderStash::emplace(
+        const std::shared_ptr<C2Buffer> &buffer,
+        int64_t timestamp,
+        int32_t flags,
+        const C2WorkOrdinalStruct &ordinal) {
+    for (auto it = mStash.begin(); it != mStash.end(); ++it) {
+        if (less(ordinal, it->ordinal)) {
+            mStash.emplace(it, buffer, timestamp, flags, ordinal);
+            return;
+        }
+    }
+    mStash.emplace_back(buffer, timestamp, flags, ordinal);
+    while (!mStash.empty() && mStash.size() > mDepth) {
+        mPending.push_back(mStash.front());
+        mStash.pop_front();
+    }
+}
+
+void CCodecBufferChannel::ReorderStash::defer(
+        const CCodecBufferChannel::ReorderStash::Entry &entry) {
+    mPending.push_front(entry);
+}
+
+bool CCodecBufferChannel::ReorderStash::hasPending() const {
+    return !mPending.empty();
+}
+
+bool CCodecBufferChannel::ReorderStash::less(
+        const C2WorkOrdinalStruct &o1, const C2WorkOrdinalStruct &o2) {
+    switch (mKey) {
+        case C2Config::ORDINAL:   return o1.frameIndex < o2.frameIndex;
+        case C2Config::TIMESTAMP: return o1.timestamp < o2.timestamp;
+        case C2Config::CUSTOM:    return o1.customOrdinal < o2.customOrdinal;
+        default:
+            ALOGD("Unrecognized key; default to timestamp");
+            return o1.frameIndex < o2.frameIndex;
+    }
 }
 
 // CCodecBufferChannel
@@ -1720,6 +1819,7 @@
 
 void CCodecBufferChannel::feedInputBufferIfAvailableInternal() {
     while ((!mInputMetEos || mPendingEosTimestamp != INT64_MIN) &&
+           !mReorderStash.lock()->hasPending() &&
            mAvailablePipelineCapacity.allocate("feedInputBufferIfAvailable")) {
         int64_t pendingEosTimestamp = mPendingEosTimestamp.exchange(INT64_MIN);
         if (pendingEosTimestamp != INT64_MIN) {
@@ -1760,16 +1860,27 @@
 
 status_t CCodecBufferChannel::renderOutputBuffer(
         const sp<MediaCodecBuffer> &buffer, int64_t timestampNs) {
-    mAvailablePipelineCapacity.freeOutputSlot("renderOutputBuffer");
-    feedInputBufferIfAvailable();
+    ALOGV("[%s] renderOutputBuffer: %p", mName, buffer.get());
     std::shared_ptr<C2Buffer> c2Buffer;
+    bool released = false;
     {
         Mutexed<std::unique_ptr<OutputBuffers>>::Locked buffers(mOutputBuffers);
         if (*buffers) {
-            (*buffers)->releaseBuffer(buffer, &c2Buffer);
+            released = (*buffers)->releaseBuffer(buffer, &c2Buffer);
         }
     }
+    // NOTE: some apps try to releaseOutputBuffer() with timestamp and/or render
+    //       set to true.
+    sendOutputBuffers();
+    // input buffer feeding may have been gated by pending output buffers
+    feedInputBufferIfAvailable();
     if (!c2Buffer) {
+        if (released) {
+            ALOGD("[%s] The app is calling releaseOutputBuffer() with "
+                  "timestamp or render=true with non-video buffers. Apps should "
+                  "call releaseOutputBuffer() with render=false for those.",
+                  mName);
+        }
         return INVALID_OPERATION;
     }
 
@@ -1914,11 +2025,12 @@
         if (*buffers && (*buffers)->releaseBuffer(buffer, nullptr)) {
             buffers.unlock();
             released = true;
-            mAvailablePipelineCapacity.freeOutputSlot("discardBuffer");
         }
     }
-    feedInputBufferIfAvailable();
-    if (!released) {
+    if (released) {
+        sendOutputBuffers();
+        feedInputBufferIfAvailable();
+    } else {
         ALOGD("[%s] MediaCodec discarded an unknown buffer", mName);
     }
     return OK;
@@ -1950,38 +2062,34 @@
         const sp<AMessage> &inputFormat, const sp<AMessage> &outputFormat) {
     C2StreamBufferTypeSetting::input iStreamFormat(0u);
     C2StreamBufferTypeSetting::output oStreamFormat(0u);
+    C2PortReorderBufferDepthTuning::output reorderDepth;
+    C2PortReorderKeySetting::output reorderKey;
     c2_status_t err = mComponent->query(
-            { &iStreamFormat, &oStreamFormat },
+            {
+                &iStreamFormat,
+                &oStreamFormat,
+                &reorderDepth,
+                &reorderKey,
+            },
             {},
             C2_DONT_BLOCK,
             nullptr);
-    if (err != C2_OK) {
+    if (err == C2_BAD_INDEX) {
+        if (!iStreamFormat || !oStreamFormat) {
+            return UNKNOWN_ERROR;
+        }
+        Mutexed<ReorderStash>::Locked reorder(mReorderStash);
+        reorder->clear();
+        if (reorderDepth) {
+            reorder->setDepth(reorderDepth.value);
+        }
+        if (reorderKey) {
+            reorder->setKey(reorderKey.value);
+        }
+    } else if (err != C2_OK) {
         return UNKNOWN_ERROR;
     }
 
-    // Query delays
-    C2PortRequestedDelayTuning::input inputDelay;
-    C2PortRequestedDelayTuning::output outputDelay;
-    C2RequestedPipelineDelayTuning pipelineDelay;
-#if 0
-    err = mComponent->query(
-            { &inputDelay, &pipelineDelay, &outputDelay },
-            {},
-            C2_DONT_BLOCK,
-            nullptr);
-    mAvailablePipelineCapacity.initialize(
-            inputDelay,
-            inputDelay + pipelineDelay,
-            inputDelay + pipelineDelay + outputDelay,
-            mName);
-#else
-    mAvailablePipelineCapacity.initialize(
-            kMinInputBufferArraySize,
-            kMaxPipelineCapacity,
-            kMinOutputBufferArraySize,
-            mName);
-#endif
-
     // TODO: get this from input format
     bool secure = mComponent->getName().find(".secure") != std::string::npos;
 
@@ -2205,11 +2313,11 @@
                     outputGeneration);
         }
 
+        (*buffers) = (*buffers)->toArrayMode(kMinOutputBufferArraySize);
         if (oStreamFormat.value == C2BufferData::LINEAR) {
             // WORKAROUND: if we're using early CSD workaround we convert to
             //             array mode, to appease apps assuming the output
             //             buffers to be of the same size.
-            (*buffers) = (*buffers)->toArrayMode(kMinOutputBufferArraySize);
 
             int32_t channelCount;
             int32_t sampleRate;
@@ -2232,6 +2340,33 @@
         }
     }
 
+    // Set up pipeline control. This has to be done after mInputBuffers and
+    // mOutputBuffers are initialized to make sure that lingering callbacks
+    // about buffers from the previous generation do not interfere with the
+    // newly initialized pipeline capacity.
+
+    // Query delays
+    C2PortRequestedDelayTuning::input inputDelay;
+    C2PortRequestedDelayTuning::output outputDelay;
+    C2RequestedPipelineDelayTuning pipelineDelay;
+#if 0
+    err = mComponent->query(
+            { &inputDelay, &pipelineDelay, &outputDelay },
+            {},
+            C2_DONT_BLOCK,
+            nullptr);
+    mAvailablePipelineCapacity.initialize(
+            inputDelay,
+            inputDelay + pipelineDelay,
+            inputDelay + pipelineDelay + outputDelay,
+            mName);
+#else
+    mAvailablePipelineCapacity.initialize(
+            kMinInputBufferArraySize,
+            kMaxPipelineCapacity,
+            mName);
+#endif
+
     mInputMetEos = false;
     mPendingEosTimestamp = INT64_MIN;
     mSync.start();
@@ -2306,7 +2441,6 @@
     for (const sp<MediaCodecBuffer> &buffer : toBeQueued) {
         if (queueInputBufferInternal(buffer) != OK) {
             mAvailablePipelineCapacity.freeComponentSlot("requestInitialInputBuffers");
-            mAvailablePipelineCapacity.freeOutputSlot("requestInitialInputBuffers");
         }
     }
     return OK;
@@ -2314,7 +2448,7 @@
 
 void CCodecBufferChannel::stop() {
     mSync.stop();
-    mFirstValidFrameIndex = mFrameIndex.load();
+    mFirstValidFrameIndex = mFrameIndex.load(std::memory_order_relaxed);
     if (mInputSurface != nullptr) {
         mInputSurface.reset();
     }
@@ -2357,23 +2491,11 @@
         std::unique_ptr<C2Work> work, const sp<AMessage> &outputFormat,
         const C2StreamInitDataInfo::output *initData,
         size_t numDiscardedInputBuffers) {
-    if (work->result == C2_NOT_FOUND) {
-        // TODO: Define what flushed work's result is.
-        ALOGD("[%s] flushed work; ignored.", mName);
-        return;
-    }
-    if ((work->input.ordinal.frameIndex - mFirstValidFrameIndex.load()).peek() < 0) {
-        // Discard frames from previous generation.
-        ALOGD("[%s] Discard frames from previous generation.", mName);
-        return;
-    }
-
-    mAvailablePipelineCapacity.freeInputSlots(numDiscardedInputBuffers, "onWorkDone");
-    mAvailablePipelineCapacity.freeComponentSlot("onWorkDone");
     if (handleWork(std::move(work), outputFormat, initData)) {
-        mAvailablePipelineCapacity.freeOutputSlot("onWorkDone");
+        mAvailablePipelineCapacity.freeInputSlots(numDiscardedInputBuffers,
+                                                  "onWorkDone");
+        feedInputBufferIfAvailable();
     }
-    feedInputBufferIfAvailable();
 }
 
 void CCodecBufferChannel::onInputBufferDone(
@@ -2382,9 +2504,11 @@
     {
         Mutexed<std::unique_ptr<InputBuffers>>::Locked buffers(mInputBuffers);
         newInputSlotAvailable = (*buffers)->expireComponentBuffer(buffer);
+        if (newInputSlotAvailable) {
+            mAvailablePipelineCapacity.freeInputSlots(1, "onInputBufferDone");
+        }
     }
     if (newInputSlotAvailable) {
-        mAvailablePipelineCapacity.freeInputSlots(1, "onInputBufferDone");
         feedInputBufferIfAvailable();
     }
 }
@@ -2393,6 +2517,23 @@
         std::unique_ptr<C2Work> work,
         const sp<AMessage> &outputFormat,
         const C2StreamInitDataInfo::output *initData) {
+    if ((work->input.ordinal.frameIndex - mFirstValidFrameIndex.load()).peek() < 0) {
+        // Discard frames from previous generation.
+        ALOGD("[%s] Discard frames from previous generation.", mName);
+        return false;
+    }
+
+    if (work->worklets.size() != 1u
+            || !work->worklets.front()
+            || !(work->worklets.front()->output.flags & C2FrameData::FLAG_INCOMPLETE)) {
+        mAvailablePipelineCapacity.freeComponentSlot("handleWork");
+    }
+
+    if (work->result == C2_NOT_FOUND) {
+        ALOGD("[%s] flushed work; ignored.", mName);
+        return true;
+    }
+
     if (work->result != C2_OK) {
         ALOGD("[%s] work failed to complete: %d", mName, work->result);
         mCCodecCallback->onError(work->result, ACTION_CODE_FATAL);
@@ -2422,6 +2563,40 @@
         }
     }
 
+    while (!worklet->output.configUpdate.empty()) {
+        std::unique_ptr<C2Param> param;
+        worklet->output.configUpdate.back().swap(param);
+        worklet->output.configUpdate.pop_back();
+        switch (param->coreIndex().coreIndex()) {
+            case C2PortReorderBufferDepthTuning::CORE_INDEX: {
+                C2PortReorderBufferDepthTuning::output reorderDepth;
+                if (reorderDepth.updateFrom(*param)) {
+                    mReorderStash.lock()->setDepth(reorderDepth.value);
+                    ALOGV("[%s] onWorkDone: updated reorder depth to %u",
+                          mName, reorderDepth.value);
+                } else {
+                    ALOGD("[%s] onWorkDone: failed to read reorder depth", mName);
+                }
+                break;
+            }
+            case C2PortReorderKeySetting::CORE_INDEX: {
+                C2PortReorderKeySetting::output reorderKey;
+                if (reorderKey.updateFrom(*param)) {
+                    mReorderStash.lock()->setKey(reorderKey.value);
+                    ALOGV("[%s] onWorkDone: updated reorder key to %u",
+                          mName, reorderKey.value);
+                } else {
+                    ALOGD("[%s] onWorkDone: failed to read reorder key", mName);
+                }
+                break;
+            }
+            default:
+                ALOGV("[%s] onWorkDone: unrecognized config update (%08X)",
+                      mName, param->index());
+                break;
+        }
+    }
+
     if (outputFormat != nullptr) {
         Mutexed<std::unique_ptr<OutputBuffers>>::Locked buffers(mOutputBuffers);
         ALOGD("[%s] onWorkDone: output format changed to %s",
@@ -2446,7 +2621,6 @@
         ALOGV("[%s] onWorkDone: output EOS", mName);
     }
 
-    bool feedNeeded = true;
     sp<MediaCodecBuffer> outBuffer;
     size_t index;
 
@@ -2468,7 +2642,7 @@
 
     if (initData != nullptr) {
         Mutexed<std::unique_ptr<OutputBuffers>>::Locked buffers(mOutputBuffers);
-        if ((*buffers)->registerCsd(initData, &index, &outBuffer)) {
+        if ((*buffers)->registerCsd(initData, &index, &outBuffer) == OK) {
             outBuffer->meta()->setInt64("timeUs", timestamp.peek());
             outBuffer->meta()->setInt32("flags", MediaCodec::BUFFER_FLAG_CODECCONFIG);
             ALOGV("[%s] onWorkDone: csd index = %zu [%p]", mName, index, outBuffer.get());
@@ -2476,7 +2650,6 @@
             buffers.unlock();
             mCallback->onOutputBufferAvailable(index, outBuffer);
             buffers.lock();
-            feedNeeded = false;
         } else {
             ALOGD("[%s] onWorkDone: unable to register csd", mName);
             buffers.unlock();
@@ -2489,7 +2662,7 @@
     if (!buffer && !flags) {
         ALOGV("[%s] onWorkDone: Not reporting output buffer (%lld)",
               mName, work->input.ordinal.frameIndex.peekull());
-        return feedNeeded;
+        return true;
     }
 
     if (buffer) {
@@ -2508,23 +2681,53 @@
     }
 
     {
-        Mutexed<std::unique_ptr<OutputBuffers>>::Locked buffers(mOutputBuffers);
-        if (!(*buffers)->registerBuffer(buffer, &index, &outBuffer)) {
-            ALOGD("[%s] onWorkDone: unable to register output buffer", mName);
-            // TODO
-            // buffers.unlock();
-            // mCCodecCallback->onError(UNKNOWN_ERROR, ACTION_CODE_FATAL);
-            // buffers.lock();
-            return false;
+        Mutexed<ReorderStash>::Locked reorder(mReorderStash);
+        reorder->emplace(buffer, timestamp.peek(), flags, worklet->output.ordinal);
+        if (flags & MediaCodec::BUFFER_FLAG_EOS) {
+            // Flush reorder stash
+            reorder->setDepth(0);
         }
     }
+    sendOutputBuffers();
+    return true;
+}
 
-    outBuffer->meta()->setInt64("timeUs", timestamp.peek());
-    outBuffer->meta()->setInt32("flags", flags);
-    ALOGV("[%s] onWorkDone: out buffer index = %zu [%p] => %p + %zu",
-            mName, index, outBuffer.get(), outBuffer->data(), outBuffer->size());
-    mCallback->onOutputBufferAvailable(index, outBuffer);
-    return false;
+void CCodecBufferChannel::sendOutputBuffers() {
+    ReorderStash::Entry entry;
+    sp<MediaCodecBuffer> outBuffer;
+    size_t index;
+
+    while (true) {
+        {
+            Mutexed<ReorderStash>::Locked reorder(mReorderStash);
+            if (!reorder->hasPending()) {
+                break;
+            }
+            if (!reorder->pop(&entry)) {
+                break;
+            }
+        }
+        Mutexed<std::unique_ptr<OutputBuffers>>::Locked buffers(mOutputBuffers);
+        status_t err = (*buffers)->registerBuffer(entry.buffer, &index, &outBuffer);
+        if (err != OK) {
+            if (err != WOULD_BLOCK) {
+                OutputBuffersArray *array = (OutputBuffersArray *)buffers->get();
+                array->realloc(entry.buffer);
+                mCCodecCallback->onOutputBuffersChanged();
+            }
+            buffers.unlock();
+            ALOGV("[%s] sendOutputBuffers: unable to register output buffer", mName);
+            mReorderStash.lock()->defer(entry);
+            return;
+        }
+        buffers.unlock();
+
+        outBuffer->meta()->setInt64("timeUs", entry.timestamp);
+        outBuffer->meta()->setInt32("flags", entry.flags);
+        ALOGV("[%s] sendOutputBuffers: out buffer index = %zu [%p] => %p + %zu",
+                mName, index, outBuffer.get(), outBuffer->data(), outBuffer->size());
+        mCallback->onOutputBufferAvailable(index, outBuffer);
+    }
 }
 
 status_t CCodecBufferChannel::setSurface(const sp<Surface> &newSurface) {
diff --git a/media/sfplugin/CCodecBufferChannel.h b/media/sfplugin/CCodecBufferChannel.h
index 86739ab..e354fd0 100644
--- a/media/sfplugin/CCodecBufferChannel.h
+++ b/media/sfplugin/CCodecBufferChannel.h
@@ -43,6 +43,7 @@
     virtual void onError(status_t err, enum ActionCode actionCode) = 0;
     virtual void onOutputFramesRendered(int64_t mediaTimeUs, nsecs_t renderTimeNs) = 0;
     virtual void onWorkQueued(bool eos) = 0;
+    virtual void onOutputBuffersChanged() = 0;
 };
 
 /**
@@ -217,6 +218,7 @@
     bool handleWork(
             std::unique_ptr<C2Work> work, const sp<AMessage> &outputFormat,
             const C2StreamInitDataInfo::output *initData);
+    void sendOutputBuffers();
 
     QueueSync mSync;
     sp<MemoryDealer> mDealer;
@@ -271,44 +273,36 @@
     //    CCodecBufferChannel whose outputs have not been returned from the
     //    component (by calling onWorkDone()) does not exceed a certain limit.
     //    (Let us call this the "component" capacity.)
-    // 3. The number of work items that have been received by
-    //    CCodecBufferChannel whose outputs have not been released by the app
-    //    (either by calling discardBuffer() on an output buffer or calling
-    //    renderOutputBuffer()) does not exceed a certain limit. (Let us call
-    //    this the "output" capacity.)
     //
     // These three criteria guarantee that a new input buffer that arrives from
     // the invocation of onInputBufferAvailable() will not
     // 1. overload CCodecBufferChannel's input buffers;
     // 2. overload the component; or
-    // 3. overload CCodecBufferChannel's output buffers if the component
-    //    finishes all the pending work right away.
     //
     struct PipelineCapacity {
         // The number of available input capacity.
         std::atomic_int input;
         // The number of available component capacity.
         std::atomic_int component;
-        // The number of available output capacity.
-        std::atomic_int output;
 
         PipelineCapacity();
-        // Set the values of #component and #output.
-        void initialize(int newInput, int newComponent, int newOutput,
+        // Set the values of #input and #component.
+        void initialize(int newInput, int newComponent,
                         const char* newName = "<UNKNOWN COMPONENT>",
                         const char* callerTag = nullptr);
 
-        // Return true and decrease #input, #component and #output by one if
+        // Return true and decrease #input and #component by one if
         // they are all greater than zero; return false otherwise.
         //
         // callerTag is used for logging only.
         //
         // allocate() is called by CCodecBufferChannel to check whether it can
         // receive another input buffer. If the return value is true,
-        // onInputBufferAvailable() can (and will) be called afterwards.
+        // onInputBufferAvailable() and onOutputBufferAvailable() can be called
+        // afterwards.
         bool allocate(const char* callerTag = nullptr);
 
-        // Increase #input, #component and #output by one.
+        // Increase #input and #component by one.
         //
         // callerTag is used for logging only.
         //
@@ -335,21 +329,52 @@
         // onWorkDone() is called.
         int freeComponentSlot(const char* callerTag = nullptr);
 
-        // Increase #output by one and return the updated value.
-        //
-        // callerTag is used for logging only.
-        //
-        // freeOutputSlot() is called by CCodecBufferChannel when
-        // discardBuffer() is called on an output buffer or when
-        // renderOutputBuffer() is called.
-        int freeOutputSlot(const char* callerTag = nullptr);
-
     private:
         // Component name. Used for logging.
         const char* mName;
     };
     PipelineCapacity mAvailablePipelineCapacity;
 
+    class ReorderStash {
+    public:
+        struct Entry {
+            inline Entry() : buffer(nullptr), timestamp(0), flags(0), ordinal({0, 0, 0}) {}
+            inline Entry(
+                    const std::shared_ptr<C2Buffer> &b,
+                    int64_t t,
+                    int32_t f,
+                    const C2WorkOrdinalStruct &o)
+                : buffer(b), timestamp(t), flags(f), ordinal(o) {}
+            std::shared_ptr<C2Buffer> buffer;
+            int64_t timestamp;
+            int32_t flags;
+            C2WorkOrdinalStruct ordinal;
+        };
+
+        ReorderStash();
+
+        void clear();
+        void setDepth(uint32_t depth);
+        void setKey(C2Config::ordinal_key_t key);
+        bool pop(Entry *entry);
+        void emplace(
+                const std::shared_ptr<C2Buffer> &buffer,
+                int64_t timestamp,
+                int32_t flags,
+                const C2WorkOrdinalStruct &ordinal);
+        void defer(const Entry &entry);
+        bool hasPending() const;
+
+    private:
+        std::list<Entry> mPending;
+        std::list<Entry> mStash;
+        uint32_t mDepth;
+        C2Config::ordinal_key_t mKey;
+
+        bool less(const C2WorkOrdinalStruct &o1, const C2WorkOrdinalStruct &o2);
+    };
+    Mutexed<ReorderStash> mReorderStash;
+
     std::atomic_bool mInputMetEos;
     std::atomic_int64_t mPendingEosTimestamp;
 
diff --git a/media/sfplugin/Codec2Buffer.cpp b/media/sfplugin/Codec2Buffer.cpp
index df87a67..64a48f5 100644
--- a/media/sfplugin/Codec2Buffer.cpp
+++ b/media/sfplugin/Codec2Buffer.cpp
@@ -576,7 +576,7 @@
 
     ALOGV("VideoNativeMetadata: %dx%d", buffer->width, buffer->height);
     C2Handle *handle = WrapNativeCodec2GrallocHandle(
-            buffer->handle,
+            native_handle_clone(buffer->handle),
             buffer->width,
             buffer->height,
             buffer->format,