Merge "CCodec: create an empty ByteBuffer for null output buffer" into pi-dev
diff --git a/codec2/hidl/1.0/utils/Android.bp b/codec2/hidl/1.0/utils/Android.bp
index aa1f18b..1641e51 100644
--- a/codec2/hidl/1.0/utils/Android.bp
+++ b/codec2/hidl/1.0/utils/Android.bp
@@ -49,7 +49,9 @@
         "hardware.google.media.c2@1.0",
         "libhidlbase",
         "libstagefright_bufferpool@1.0",
+        "libstagefright_bufferqueue_helper",
         "libstagefright_codec2",
+        "libui",
     ],
 }
 
diff --git a/codec2/hidl/1.0/utils/Component.cpp b/codec2/hidl/1.0/utils/Component.cpp
index f109d44..8f04e55 100644
--- a/codec2/hidl/1.0/utils/Component.cpp
+++ b/codec2/hidl/1.0/utils/Component.cpp
@@ -162,9 +162,9 @@
             Return<void> transStatus = listener->onWorkDone(workBundle);
             if (!transStatus.isOk()) {
                 ALOGE("onWorkDone -- transaction failed.");
+                return;
             }
-
-            // Finish buffer transfers: nothing else to do
+            yieldBufferQueueBlocks(c2workItems, true);
         }
     }
 
@@ -220,11 +220,11 @@
 
     Status res = static_cast<Status>(c2res);
     if (c2res == C2_OK) {
-        // TODO: Connect with bufferpool API for buffer transfers
         ALOGV("flush -- converting output");
         res = objcpy(&flushedWorkBundle, c2flushedWorks, &mBufferPoolSender);
     }
     _hidl_cb(res, flushedWorkBundle);
+    yieldBufferQueueBlocks(c2flushedWorks, true);
     return Void();
 }
 
diff --git a/codec2/hidl/1.0/utils/include/codec2/hidl/1.0/Component.h b/codec2/hidl/1.0/utils/include/codec2/hidl/1.0/Component.h
index 534d342..a6e4333 100644
--- a/codec2/hidl/1.0/utils/include/codec2/hidl/1.0/Component.h
+++ b/codec2/hidl/1.0/utils/include/codec2/hidl/1.0/Component.h
@@ -78,7 +78,7 @@
     typedef ::android::hardware::graphics::bufferqueue::V1_0::
             IGraphicBufferProducer HGraphicBufferProducer;
 
-    // Methods from gIComponent follow.
+    // Methods from IComponent follow.
     virtual Return<Status> queue(const WorkBundle& workBundle) override;
     virtual Return<void> flush(flush_cb _hidl_cb) override;
     virtual Return<Status> drain(bool withEos) override;
diff --git a/codec2/hidl/1.0/utils/include/codec2/hidl/1.0/types.h b/codec2/hidl/1.0/utils/include/codec2/hidl/1.0/types.h
index 72c991e..46dee38 100644
--- a/codec2/hidl/1.0/utils/include/codec2/hidl/1.0/types.h
+++ b/codec2/hidl/1.0/utils/include/codec2/hidl/1.0/types.h
@@ -22,6 +22,7 @@
 #include <android/hardware/media/bufferpool/1.0/types.h>
 #include <hardware/google/media/c2/1.0/IComponentStore.h>
 #include <hardware/google/media/c2/1.0/types.h>
+#include <gui/IGraphicBufferProducer.h>
 
 #include <C2Component.h>
 #include <C2Param.h>
@@ -39,9 +40,11 @@
 using ::android::hardware::hidl_handle;
 using ::android::hardware::hidl_string;
 using ::android::hardware::hidl_vec;
+using ::android::status_t;
 using ::android::sp;
 using ::android::hardware::media::bufferpool::V1_0::implementation::
         ConnectionId;
+using ::android::IGraphicBufferProducer;
 
 // Types of metadata for Blocks.
 struct C2Hidl_Range {
@@ -262,6 +265,82 @@
 c2_status_t toC2Status(::android::hardware::media::bufferpool::V1_0::
         ResultStatus rs);
 
+// BufferQueue-Based Block Operations
+// ==================================
+
+// Create a GraphicBuffer object from a graphic block and attach it to an
+// IGraphicBufferProducer.
+status_t attachToBufferQueue(const C2ConstGraphicBlock& block,
+                             const sp<IGraphicBufferProducer>& igbp,
+                             uint32_t generation,
+                             int32_t* bqSlot);
+
+// Return false if block does not come from a bufferqueue-based blockpool.
+// Otherwise, extract bqId and bqSlot and return true.
+bool getBufferQueueAssignment(const C2ConstGraphicBlock& block,
+                              uint64_t* bqId,
+                              int32_t* bqSlot);
+
+// Disassociate the given block with its designated bufferqueue so that
+// cancelBuffer() will not be called when the block is destroyed. If the block
+// does not have a designated bufferqueue, the function returns false.
+// Otherwise, it returns true.
+//
+// Note: This function should be called after attachBuffer() or queueBuffer() is
+// called manually.
+bool yieldBufferQueueBlock(const C2ConstGraphicBlock& block);
+
+// Call yieldBufferQueueBlock() on blocks in the given workList. processInput
+// determines whether input blocks are yielded. processOutput works similarly on
+// output blocks. (The default value of processInput is false while the default
+// value of processOutput is true. This implies that in most cases, only output
+// buffers contain bufferqueue-based blocks.)
+//
+// Note: This function should be called after WorkBundle has been successfully
+// sent over the Treble boundary to another process.
+void yieldBufferQueueBlocks(const std::list<std::unique_ptr<C2Work>>& workList,
+                            bool processInput = false,
+                            bool processOutput = true);
+
+// Assign the given block to a bufferqueue so that when the block is destroyed,
+// cancelBuffer() will be called.
+//
+// If the block does not come from a bufferqueue-based blockpool, this function
+// returns false.
+//
+// If the block already has a bufferqueue assignment that matches the given one,
+// the function returns true.
+//
+// If the block already has a bufferqueue assignment that does not match the
+// given one, the block will be reassigned to the given bufferqueue. This
+// will call attachBuffer() on the given igbp. The function then returns true on
+// success or false on any failure during the operation.
+//
+// Note: This function should be called after detachBuffer() or dequeueBuffer()
+// is called manually.
+bool holdBufferQueueBlock(const C2ConstGraphicBlock& block,
+                          const sp<IGraphicBufferProducer>& igbp,
+                          uint64_t bqId,
+                          uint32_t generation);
+
+// Call holdBufferQueueBlock() on input or output blocks in the given workList.
+// Since the bufferqueue assignment for input and output buffers can be
+// different, this function takes forInput to determine whether the given
+// bufferqueue is for input buffers or output buffers. (The default value of
+// forInput is false.)
+//
+// In the (rare) case that both input and output buffers are bufferqueue-based,
+// this function must be called twice, once for the input buffers and once for
+// the output buffers.
+//
+// Note: This function should be called after WorkBundle has been received from
+// another process.
+void holdBufferQueueBlocks(const std::list<std::unique_ptr<C2Work>>& workList,
+                           const sp<IGraphicBufferProducer>& igbp,
+                           uint64_t bqId,
+                           uint32_t generation,
+                           bool forInput = false);
+
 }  // namespace utils
 }  // namespace V1_0
 }  // namespace c2
diff --git a/codec2/hidl/1.0/utils/types.cpp b/codec2/hidl/1.0/utils/types.cpp
index dd05056..cad7605 100644
--- a/codec2/hidl/1.0/utils/types.cpp
+++ b/codec2/hidl/1.0/utils/types.cpp
@@ -20,6 +20,8 @@
 
 #include <codec2/hidl/1.0/types.h>
 
+#include <media/stagefright/bqhelper/WGraphicBufferProducer.h>
+
 #include <C2AllocatorIon.h>
 #include <C2AllocatorGralloc.h>
 #include <C2BlockInternal.h>
@@ -31,8 +33,9 @@
 #include <C2Work.h>
 #include <util/C2ParamUtils.h>
 
-#include <unordered_map>
 #include <algorithm>
+#include <functional>
+#include <unordered_map>
 
 #include <media/stagefright/foundation/AUtils.h>
 
@@ -52,6 +55,7 @@
         ClientManager;
 using ::android::hardware::media::bufferpool::V1_0::implementation::
         TransactionId;
+using ::android::TWGraphicBufferProducer;
 
 namespace /* unnamed */ {
 
@@ -926,7 +930,9 @@
 }
 
 // std::list<std::unique_ptr<C2Work>> -> WorkBundle
-Status objcpy(WorkBundle* d, const std::list<std::unique_ptr<C2Work>>& s,
+Status objcpy(
+        WorkBundle* d,
+        const std::list<std::unique_ptr<C2Work>>& s,
         BufferPoolSender* bufferPoolSender) {
     Status status = Status::OK;
 
@@ -1573,6 +1579,201 @@
     }
 }
 
+namespace /* unnamed */ {
+
+// Create a GraphicBuffer object from a graphic block.
+sp<GraphicBuffer> createGraphicBuffer(const C2ConstGraphicBlock& block) {
+    uint32_t width;
+    uint32_t height;
+    uint32_t format;
+    uint64_t usage;
+    uint32_t stride;
+    uint64_t bqId;
+    int32_t bqSlot;
+    _UnwrapNativeCodec2GrallocMetadata(
+            block.handle(), &width, &height, &format, &usage,
+            &stride, &bqId, reinterpret_cast<uint32_t*>(&bqSlot));
+    native_handle_t *grallocHandle =
+            UnwrapNativeCodec2GrallocHandle(block.handle());
+    sp<GraphicBuffer> graphicBuffer =
+            new GraphicBuffer(grallocHandle,
+                              GraphicBuffer::CLONE_HANDLE,
+                              width, height, format,
+                              1, usage, stride);
+    native_handle_delete(grallocHandle);
+    return graphicBuffer;
+}
+
+template <typename BlockProcessor>
+void forEachBlock(C2FrameData& frameData,
+                  BlockProcessor process) {
+    for (const std::shared_ptr<C2Buffer>& buffer : frameData.buffers) {
+        if (buffer) {
+            for (const C2ConstGraphicBlock& block :
+                    buffer->data().graphicBlocks()) {
+                process(block);
+            }
+        }
+    }
+}
+
+template <typename BlockProcessor>
+void forEachBlock(const std::list<std::unique_ptr<C2Work>>& workList,
+                  BlockProcessor process,
+                  bool processInput, bool processOutput) {
+    for (const std::unique_ptr<C2Work>& work : workList) {
+        if (!work) {
+            continue;
+        }
+        if (processInput) {
+            forEachBlock(work->input, process);
+        }
+        if (processOutput) {
+            for (const std::unique_ptr<C2Worklet>& worklet : work->worklets) {
+                if (worklet) {
+                    forEachBlock(worklet->output,
+                                 process);
+                }
+            }
+        }
+    }
+}
+
+sp<HGraphicBufferProducer> getHgbp(const sp<IGraphicBufferProducer>& igbp) {
+    sp<HGraphicBufferProducer> hgbp = igbp->getHalInterface();
+    return hgbp ? hgbp :
+            new TWGraphicBufferProducer<HGraphicBufferProducer>(igbp);
+}
+
+} // unnamed namespace
+
+status_t attachToBufferQueue(const C2ConstGraphicBlock& block,
+                             const sp<IGraphicBufferProducer>& igbp,
+                             uint32_t generation,
+                             int32_t* bqSlot) {
+    if (!igbp) {
+        ALOGW("attachToBufferQueue -- null producer.");
+        return NO_INIT;
+    }
+
+    sp<GraphicBuffer> graphicBuffer = createGraphicBuffer(block);
+    graphicBuffer->setGenerationNumber(generation);
+
+    ALOGV("attachToBufferQueue -- attaching buffer: "
+            "block dimension %ux%u, "
+            "graphicBuffer dimension %ux%u, "
+            "format %#x, usage %#llx, stride %u, generation %u.",
+            static_cast<unsigned>(block.width()),
+            static_cast<unsigned>(block.height()),
+            static_cast<unsigned>(graphicBuffer->getWidth()),
+            static_cast<unsigned>(graphicBuffer->getHeight()),
+            static_cast<unsigned>(graphicBuffer->getPixelFormat()),
+            static_cast<unsigned long long>(graphicBuffer->getUsage()),
+            static_cast<unsigned>(graphicBuffer->getStride()),
+            static_cast<unsigned>(graphicBuffer->getGenerationNumber()));
+
+    status_t result = igbp->attachBuffer(bqSlot, graphicBuffer);
+    if (result != OK) {
+        ALOGW("attachToBufferQueue -- attachBuffer failed. Error code = %d",
+                static_cast<int>(result));
+        return false;
+    }
+    ALOGV("attachToBufferQueue -- attachBuffer returned slot %d",
+            static_cast<int>(*bqSlot));
+    return true;
+}
+
+bool getBufferQueueAssignment(const C2ConstGraphicBlock& block,
+                              uint64_t* bqId,
+                              int32_t* bqSlot) {
+    return _C2BlockFactory::GetBufferQueueData(
+            _C2BlockFactory::GetGraphicBlockPoolData(block),
+            bqId, bqSlot);
+}
+
+bool yieldBufferQueueBlock(const C2ConstGraphicBlock& block) {
+    std::shared_ptr<_C2BlockPoolData> data =
+            _C2BlockFactory::GetGraphicBlockPoolData(block);
+    if (data && _C2BlockFactory::GetBufferQueueData(data)) {
+        _C2BlockFactory::YieldBlockToBufferQueue(data);
+        return true;
+    }
+    return false;
+}
+
+void yieldBufferQueueBlocks(
+        const std::list<std::unique_ptr<C2Work>>& workList,
+        bool processInput, bool processOutput) {
+    forEachBlock(workList, yieldBufferQueueBlock, processInput, processOutput);
+}
+
+bool holdBufferQueueBlock(const C2ConstGraphicBlock& block,
+                            const sp<IGraphicBufferProducer>& igbp,
+                            uint64_t bqId,
+                            uint32_t generation) {
+    std::shared_ptr<_C2BlockPoolData> data =
+            _C2BlockFactory::GetGraphicBlockPoolData(block);
+    if (!data) {
+        return false;
+    }
+
+    uint64_t oldId;
+    int32_t oldSlot;
+    // If the block is not bufferqueue-based, do nothing.
+    if (!_C2BlockFactory::GetBufferQueueData(data, &oldId, &oldSlot) ||
+            (oldId == 0)) {
+        return false;
+    }
+
+    // If the block's bqId is the same as the desired bqId, just hold.
+    if (oldId == bqId) {
+        ALOGV("holdBufferQueueBlock -- import without attaching: "
+                "bqId %llu, bqSlot %d, generation %u.",
+                static_cast<long long unsigned>(oldId),
+                static_cast<int>(oldSlot),
+                static_cast<unsigned>(generation));
+        _C2BlockFactory::HoldBlockFromBufferQueue(data, getHgbp(igbp));
+        return true;
+    }
+
+    // Otherwise, attach to the given igbp, which must not be null.
+    if (!igbp) {
+        return false;
+    }
+
+    int32_t bqSlot;
+    status_t result = attachToBufferQueue(block, igbp, generation, &bqSlot);
+
+    if (result != OK) {
+        ALOGE("holdBufferQueueBlock -- fail to attach: "
+                "target bqId %llu, generation %u.",
+                static_cast<long long unsigned>(bqId),
+                static_cast<unsigned>(generation));
+
+        return false;
+    }
+
+    ALOGV("holdBufferQueueBlock -- attached: "
+            "bqId %llu, bqSlot %d, generation %u.",
+            static_cast<long long unsigned>(bqId),
+            static_cast<int>(bqSlot),
+            static_cast<unsigned>(generation));
+    _C2BlockFactory::AssignBlockToBufferQueue(
+            data, getHgbp(igbp), bqId, bqSlot, true);
+    return true;
+}
+
+void holdBufferQueueBlocks(const std::list<std::unique_ptr<C2Work>>& workList,
+                           const sp<IGraphicBufferProducer>& igbp,
+                           uint64_t bqId,
+                           uint32_t generation,
+                           bool forInput) {
+    forEachBlock(workList,
+                 std::bind(holdBufferQueueBlock,
+                           std::placeholders::_1, igbp, bqId, generation),
+                 forInput, !forInput);
+}
+
 }  // namespace utils
 }  // namespace V1_0
 }  // namespace c2
diff --git a/codec2/hidl/client/Android.bp b/codec2/hidl/client/Android.bp
index c98de21..0819055 100644
--- a/codec2/hidl/client/Android.bp
+++ b/codec2/hidl/client/Android.bp
@@ -6,8 +6,10 @@
     ],
 
     shared_libs: [
+        "android.hardware.graphics.bufferqueue@1.0",
         "android.hardware.media.bufferpool@1.0",
         "hardware.google.media.c2@1.0",
+        "libbase",
         "libbinder",
         "libcodec2_hidl_utils@1.0",
         "libcutils",
@@ -16,8 +18,10 @@
         "libhidltransport",
         "liblog",
         "libstagefright_bufferpool@1.0",
+        "libstagefright_bufferqueue_helper",
         "libstagefright_codec2",
         "libstagefright_codec2_vndk",
+        "libui",
         "libutils",
     ],
 
diff --git a/codec2/hidl/client/client.cpp b/codec2/hidl/client/client.cpp
index 7164916..8751601 100644
--- a/codec2/hidl/client/client.cpp
+++ b/codec2/hidl/client/client.cpp
@@ -20,33 +20,37 @@
 
 #include <codec2/hidl/client.h>
 
-#include <hardware/google/media/c2/1.0/IComponentListener.h>
-#include <hardware/google/media/c2/1.0/IConfigurable.h>
-#include <hardware/google/media/c2/1.0/IComponentInterface.h>
-#include <hardware/google/media/c2/1.0/IComponent.h>
-#include <hardware/google/media/c2/1.0/IComponentStore.h>
-#include <android/hardware/media/bufferpool/1.0/IClientManager.h>
-
-#include <C2PlatformSupport.h>
-#include <C2BufferPriv.h>
-#include <C2Debug.h>
-#include <bufferpool/ClientManager.h>
-#include <gui/bufferqueue/1.0/H2BGraphicBufferProducer.h>
-#include <hidl/HidlSupport.h>
-#include <cutils/properties.h>
-
 #include <deque>
 #include <limits>
 #include <map>
 #include <type_traits>
 #include <vector>
 
+#include <bufferpool/ClientManager.h>
+#include <cutils/properties.h>
+#include <gui/bufferqueue/1.0/H2BGraphicBufferProducer.h>
+#include <hidl/HidlSupport.h>
+#include <media/stagefright/bqhelper/WGraphicBufferProducer.h>
+#undef LOG
+
+#include <android/hardware/media/bufferpool/1.0/IClientManager.h>
+#include <hardware/google/media/c2/1.0/IComponent.h>
+#include <hardware/google/media/c2/1.0/IComponentInterface.h>
+#include <hardware/google/media/c2/1.0/IComponentListener.h>
+#include <hardware/google/media/c2/1.0/IComponentStore.h>
+#include <hardware/google/media/c2/1.0/IConfigurable.h>
+
+#include <C2Debug.h>
+#include <C2BufferPriv.h>
+#include <C2PlatformSupport.h>
+
 namespace android {
 
 using ::android::hardware::hidl_vec;
 using ::android::hardware::hidl_string;
 using ::android::hardware::Return;
 using ::android::hardware::Void;
+using ::android::TWGraphicBufferProducer;
 
 using namespace ::hardware::google::media::c2::V1_0;
 using namespace ::hardware::google::media::c2::V1_0::utils;
@@ -364,13 +368,7 @@
             // release input buffers potentially held by the component from queue
             std::shared_ptr<Codec2Client::Component> strongComponent = component.lock();
             if (strongComponent) {
-                std::vector<uint64_t> inputDone;
-                for (const std::unique_ptr<C2Work> &work : workItems) {
-                    if (work) {
-                        inputDone.emplace_back(work->input.ordinal.frameIndex.peeku());
-                    }
-                }
-                strongComponent->handleOnWorkDone(inputDone);
+                strongComponent->handleOnWorkDone(workItems);
             }
             if (std::shared_ptr<Codec2Client::Listener> listener = base.lock()) {
                 listener->onWorkDone(component, workItems);
@@ -802,18 +800,40 @@
     return static_cast<c2_status_t>(static_cast<Status>(transResult));
 }
 
-void Codec2Client::Component::handleOnWorkDone(const std::vector<uint64_t> &inputDone) {
-    std::lock_guard<std::mutex> lock(mInputBuffersMutex);
-    for (uint64_t inputIndex : inputDone) {
-        auto it = mInputBuffers.find(inputIndex);
-        if (it == mInputBuffers.end()) {
-            ALOGI("unknown input index %llu in onWorkDone", (long long)inputIndex);
-        } else {
-            ALOGV("done with input index %llu with %zu buffers",
-                    (long long)inputIndex, it->second.size());
-            mInputBuffers.erase(it);
+void Codec2Client::Component::handleOnWorkDone(
+        const std::list<std::unique_ptr<C2Work>> &workItems) {
+    // Input buffers' lifetime management
+    std::vector<uint64_t> inputDone;
+    for (const std::unique_ptr<C2Work> &work : workItems) {
+        if (work) {
+            inputDone.emplace_back(work->input.ordinal.frameIndex.peeku());
         }
     }
+
+    {
+        std::lock_guard<std::mutex> lock(mInputBuffersMutex);
+        for (uint64_t inputIndex : inputDone) {
+            auto it = mInputBuffers.find(inputIndex);
+            if (it == mInputBuffers.end()) {
+                ALOGI("unknown input index %llu in onWorkDone", (long long)inputIndex);
+            } else {
+                ALOGV("done with input index %llu with %zu buffers",
+                        (long long)inputIndex, it->second.size());
+                mInputBuffers.erase(it);
+            }
+        }
+    }
+
+    // Output bufferqueue-based blocks' lifetime management
+    mOutputBufferQueueMutex.lock();
+    sp<IGraphicBufferProducer> igbp = mOutputIgbp;
+    uint64_t bqId = mOutputBqId;
+    uint32_t generation = mOutputGeneration;
+    mOutputBufferQueueMutex.unlock();
+
+    if (igbp) {
+        holdBufferQueueBlocks(workItems, igbp, bqId, generation);
+    }
 }
 
 c2_status_t Codec2Client::Component::queue(
@@ -886,6 +906,8 @@
             flushedIndices.emplace_back(work->input.ordinal.frameIndex.peeku());
         }
     }
+
+    // Input buffers' lifetime management
     for (uint64_t flushedIndex : flushedIndices) {
         std::lock_guard<std::mutex> lock(mInputBuffersMutex);
         auto it = mInputBuffers.find(flushedIndex);
@@ -898,6 +920,17 @@
         }
     }
 
+    // Output bufferqueue-based blocks' lifetime management
+    mOutputBufferQueueMutex.lock();
+    sp<IGraphicBufferProducer> igbp = mOutputIgbp;
+    uint64_t bqId = mOutputBqId;
+    uint32_t generation = mOutputGeneration;
+    mOutputBufferQueueMutex.unlock();
+
+    if (igbp) {
+        holdBufferQueueBlocks(*flushedWork, igbp, bqId, generation);
+    }
+
     return status;
 }
 
@@ -988,9 +1021,15 @@
 
 c2_status_t Codec2Client::Component::setOutputSurface(
         C2BlockPool::local_id_t blockPoolId,
-        const sp<IGraphicBufferProducer>& surface) {
+        const sp<IGraphicBufferProducer>& surface,
+        uint32_t generation) {
+    sp<HGraphicBufferProducer> igbp = surface->getHalInterface();
+    if (!igbp) {
+        igbp = new TWGraphicBufferProducer<HGraphicBufferProducer>(surface);
+    }
+
     Return<Status> transStatus = base()->setOutputSurface(
-            static_cast<uint64_t>(blockPoolId), surface);
+            static_cast<uint64_t>(blockPoolId), igbp);
     if (!transStatus.isOk()) {
         ALOGE("setOutputSurface -- transaction failed.");
         return C2_TRANSACTION_FAILED;
@@ -1000,13 +1039,91 @@
     if (status != C2_OK) {
         ALOGE("setOutputSurface -- call failed. "
                 "Error code = %d", static_cast<int>(status));
+    } else {
+        std::lock_guard<std::mutex> lock(mOutputBufferQueueMutex);
+        if (mOutputIgbp != surface) {
+            mOutputIgbp = surface;
+            if (!surface) {
+                mOutputBqId = 0;
+            } else if (surface->getUniqueId(&mOutputBqId) != OK) {
+                ALOGE("setOutputSurface -- cannot obtain bufferqueue id.");
+            }
+        }
+        mOutputGeneration = generation;
     }
     return status;
 }
 
+status_t Codec2Client::Component::queueToOutputSurface(
+        const C2ConstGraphicBlock& block,
+        const QueueBufferInput& input,
+        QueueBufferOutput* output) {
+    uint64_t bqId;
+    int32_t bqSlot;
+    if (!getBufferQueueAssignment(block, &bqId, &bqSlot) || bqId == 0) {
+        // Block not from bufferqueue -- it must be attached before queuing.
+
+        mOutputBufferQueueMutex.lock();
+        sp<IGraphicBufferProducer> outputIgbp = mOutputIgbp;
+        uint32_t outputGeneration = mOutputGeneration;
+        mOutputBufferQueueMutex.unlock();
+
+        status_t status = !attachToBufferQueue(block,
+                                               outputIgbp,
+                                               outputGeneration,
+                                               &bqSlot);
+        if (status != OK) {
+            ALOGW("queueToOutputSurface -- attaching failed.");
+            return INVALID_OPERATION;
+        }
+
+        status = outputIgbp->queueBuffer(static_cast<int>(bqSlot),
+                                         input, output);
+        if (status != OK) {
+            ALOGE("queueToOutputSurface -- queueBuffer() failed "
+                    "on non-bufferqueue-based block. "
+                    "Error code = %d.",
+                    static_cast<int>(status));
+            return status;
+        }
+        return OK;
+    }
+
+    mOutputBufferQueueMutex.lock();
+    sp<IGraphicBufferProducer> outputIgbp = mOutputIgbp;
+    uint64_t outputBqId = mOutputBqId;
+    mOutputBufferQueueMutex.unlock();
+
+    if (!outputIgbp) {
+        ALOGE("queueToOutputSurface -- output surface is null.");
+        return NO_INIT;
+    }
+
+    if (bqId != outputBqId) {
+        ALOGE("queueToOutputSurface -- bufferqueue ids mismatch.");
+        return DEAD_OBJECT;
+    }
+
+    status_t status = outputIgbp->queueBuffer(static_cast<int>(bqSlot),
+                                              input, output);
+    if (status != OK) {
+        ALOGE("queueToOutputSurface -- queueBuffer() failed "
+                "on bufferqueue-based block. "
+                "Error code = %d.",
+                static_cast<int>(status));
+        return status;
+    }
+    if (!yieldBufferQueueBlock(block)) {
+        ALOGE("queueToOutputSurface -- cannot yield bufferqueue-based block "
+                "to the bufferqueue.");
+        return UNKNOWN_ERROR;
+    }
+    return OK;
+}
+
 c2_status_t Codec2Client::Component::connectToOmxInputSurface(
-        const sp<IGraphicBufferProducer>& producer,
-        const sp<IGraphicBufferSource>& source) {
+        const sp<HGraphicBufferProducer>& producer,
+        const sp<HGraphicBufferSource>& source) {
     Return<Status> transStatus = base()->connectToOmxInputSurface(
             producer, source);
     if (!transStatus.isOk()) {
diff --git a/codec2/hidl/client/include/codec2/hidl/client.h b/codec2/hidl/client/include/codec2/hidl/client.h
index 0433921..24b6f7e 100644
--- a/codec2/hidl/client/include/codec2/hidl/client.h
+++ b/codec2/hidl/client/include/codec2/hidl/client.h
@@ -17,6 +17,7 @@
 #ifndef CODEC2_HIDL_CLIENT_H_
 #define CODEC2_HIDL_CLIENT_H_
 
+#include <gui/IGraphicBufferProducer.h>
 #include <codec2/hidl/1.0/types.h>
 
 #include <C2PlatformSupport.h>
@@ -94,7 +95,6 @@
 
 // Forward declarations of other classes
 namespace android {
-class IGraphicBufferProducer;
 namespace hardware {
 namespace graphics {
 namespace bufferqueue {
@@ -298,24 +298,53 @@
 
     c2_status_t release();
 
-    typedef ::android::hardware::graphics::bufferqueue::V1_0::
+    typedef ::android::
             IGraphicBufferProducer IGraphicBufferProducer;
-    typedef ::android::hardware::media::omx::V1_0::
-            IGraphicBufferSource IGraphicBufferSource;
+    typedef IGraphicBufferProducer::
+            QueueBufferInput QueueBufferInput;
+    typedef IGraphicBufferProducer::
+            QueueBufferOutput QueueBufferOutput;
 
-    // Output surface
+    typedef ::android::hardware::graphics::bufferqueue::V1_0::
+            IGraphicBufferProducer HGraphicBufferProducer;
+    typedef ::android::hardware::media::omx::V1_0::
+            IGraphicBufferSource HGraphicBufferSource;
+
+    // Set the output surface to be used with a blockpool previously created by
+    // createBlockPool().
     c2_status_t setOutputSurface(
             C2BlockPool::local_id_t blockPoolId,
-            const sp<IGraphicBufferProducer>& surface);
+            const sp<IGraphicBufferProducer>& surface,
+            uint32_t generation);
+
+    // Extract a slot number from of the block, then call
+    // IGraphicBufferProducer::queueBuffer().
+    //
+    // If the output surface has not been set, NO_INIT will be returned.
+    //
+    // If the block does not come from a bufferqueue-based blockpool,
+    // attachBuffer() will be called, followed by queueBuffer().
+    //
+    // If the block has a bqId that does not match the id of the output surface,
+    // DEAD_OBJECT will be returned.
+    //
+    // If the call to queueBuffer() is successful but the block cannot be
+    // associated to the output surface for automatic cancellation upon
+    // destruction, UNKNOWN_ERROR will be returned.
+    //
+    // Otherwise, the return value from queueBuffer() will be returned.
+    status_t queueToOutputSurface(
+            const C2ConstGraphicBlock& block,
+            const QueueBufferInput& input,
+            QueueBufferOutput* output);
 
     c2_status_t connectToOmxInputSurface(
-            const sp<IGraphicBufferProducer>& producer,
-            const sp<IGraphicBufferSource>& source);
+            const sp<HGraphicBufferProducer>& producer,
+            const sp<HGraphicBufferSource>& source);
 
     c2_status_t disconnectFromInputSurface();
 
-    // Input buffer lifetime management
-    void handleOnWorkDone(const std::vector<uint64_t> &inputDone);
+    void handleOnWorkDone(const std::list<std::unique_ptr<C2Work>> &workItems);
 
     // base cannot be null.
     Component(const sp<Base>& base);
@@ -332,6 +361,11 @@
     ::hardware::google::media::c2::V1_0::utils::DefaultBufferPoolSender
             mBufferPoolSender;
 
+    std::mutex mOutputBufferQueueMutex;
+    sp<IGraphicBufferProducer> mOutputIgbp;
+    uint64_t mOutputBqId;
+    uint32_t mOutputGeneration;
+
     static c2_status_t setDeathListener(
             const std::shared_ptr<Component>& component,
             const std::shared_ptr<Listener>& listener);
diff --git a/codec2/vndk/C2Buffer.cpp b/codec2/vndk/C2Buffer.cpp
index 6288777..47366ca 100644
--- a/codec2/vndk/C2Buffer.cpp
+++ b/codec2/vndk/C2Buffer.cpp
@@ -129,8 +129,8 @@
           mAllocation(other.mAllocation),
           mPoolData(other.mPoolData) { }
 
-    /** returns const pool data  */
-    std::shared_ptr<const _C2BlockPoolData> poolData() const {
+    /** returns pool data  */
+    std::shared_ptr<_C2BlockPoolData> poolData() const {
         return mPoolData;
     }
 
@@ -382,7 +382,7 @@
     return std::shared_ptr<C2LinearBlock>(new C2LinearBlock(impl, *impl));
 }
 
-std::shared_ptr<const _C2BlockPoolData> _C2BlockFactory::GetLinearBlockPoolData(
+std::shared_ptr<_C2BlockPoolData> _C2BlockFactory::GetLinearBlockPoolData(
         const C2Block1D &block) {
     if (block.mImpl) {
         return block.mImpl->poolData();
@@ -804,8 +804,8 @@
 
     virtual ~_C2Block2DImpl() = default;
 
-    /** returns const pool data  */
-    std::shared_ptr<const _C2BlockPoolData> poolData() const {
+    /** returns pool data  */
+    std::shared_ptr<_C2BlockPoolData> poolData() const {
         return mPoolData;
     }
 
@@ -1101,7 +1101,7 @@
     return std::shared_ptr<C2GraphicBlock>(new C2GraphicBlock(impl, *impl));
 }
 
-std::shared_ptr<const _C2BlockPoolData> _C2BlockFactory::GetGraphicBlockPoolData(
+std::shared_ptr<_C2BlockPoolData> _C2BlockFactory::GetGraphicBlockPoolData(
         const C2Block2D &block) {
     if (block.mImpl) {
         return block.mImpl->poolData();
diff --git a/codec2/vndk/include/C2BqBufferPriv.h b/codec2/vndk/include/C2BqBufferPriv.h
index d666e59..e1baad4 100644
--- a/codec2/vndk/include/C2BqBufferPriv.h
+++ b/codec2/vndk/include/C2BqBufferPriv.h
@@ -64,12 +64,14 @@
      */
     virtual void configureProducer(const android::sp<android::HGraphicBufferProducer> &producer);
 
-    class Impl;
 private:
     const std::shared_ptr<C2Allocator> mAllocator;
     const local_id_t mLocalId;
 
+    class Impl;
     std::shared_ptr<Impl> mImpl;
+
+    friend struct C2BufferQueueBlockPoolData;
 };
 
 #endif // STAGEFRIGHT_CODEC2_BUFFER_PRIV_H_
diff --git a/codec2/vndk/internal/C2BlockInternal.h b/codec2/vndk/internal/C2BlockInternal.h
index 812bcea..7c1e0e7 100644
--- a/codec2/vndk/internal/C2BlockInternal.h
+++ b/codec2/vndk/internal/C2BlockInternal.h
@@ -17,6 +17,8 @@
 #ifndef ANDROID_STAGEFRIGHT_C2BLOCK_INTERNAL_H_
 #define ANDROID_STAGEFRIGHT_C2BLOCK_INTERNAL_H_
 
+#include <android/hardware/graphics/bufferqueue/1.0/IGraphicBufferProducer.h>
+
 #include <C2Buffer.h>
 
 namespace android {
@@ -48,6 +50,8 @@
     virtual ~_C2BlockPoolData() = default;
 };
 
+struct C2BufferQueueBlockPoolData;
+
 /**
  * Internal only interface for creating blocks by block pool/buffer passing implementations.
  *
@@ -57,12 +61,12 @@
     /**
      * Create a linear block from an allocation for an allotted range.
      *
-     * @param alloc parent allocation
-     * @param data  blockpool data
-     * @param offset allotted range offset
-     * @param size  allotted size
+     * \param alloc parent allocation
+     * \param data  blockpool data
+     * \param offset allotted range offset
+     * \param size  allotted size
      *
-     * @return shared pointer to the linear block. nullptr if there was not enough memory to
+     * \return shared pointer to the linear block. nullptr if there was not enough memory to
      *         create this block.
      */
     static
@@ -75,11 +79,11 @@
     /**
      * Create a graphic block from an allocation for an allotted section.
      *
-     * @param alloc parent allocation
-     * @param data  blockpool data
-     * @param crop  allotted crop region
+     * \param alloc parent allocation
+     * \param data  blockpool data
+     * \param crop  allotted crop region
      *
-     * @return shared pointer to the graphic block. nullptr if there was not enough memory to
+     * \return shared pointer to the graphic block. nullptr if there was not enough memory to
      *         create this block.
      */
     static
@@ -91,27 +95,27 @@
     /**
      * Return a block pool data from 1D block.
      *
-     * @param shared pointer to the 1D block which is already created.
+     * \param shared pointer to the 1D block which is already created.
      */
     static
-    std::shared_ptr<const _C2BlockPoolData> GetLinearBlockPoolData(
+    std::shared_ptr<_C2BlockPoolData> GetLinearBlockPoolData(
             const C2Block1D& block);
 
     /**
      * Return a block pool data from 2D block.
      *
-     * @param shared pointer to the 2D block which is already created.
+     * \param shared pointer to the 2D block which is already created.
      */
     static
-    std::shared_ptr<const _C2BlockPoolData> GetGraphicBlockPoolData(
+    std::shared_ptr<_C2BlockPoolData> GetGraphicBlockPoolData(
             const C2Block2D& block);
 
     /**
      * Create a linear block from the received native handle.
      *
-     * @param handle    native handle to a linear block
+     * \param handle    native handle to a linear block
      *
-     * @return shared pointer to the linear block. nullptr if there was not enough memory to
+     * \return shared pointer to the linear block. nullptr if there was not enough memory to
      *         create this block.
      */
     static
@@ -121,9 +125,9 @@
     /**
      * Create a graphic block from the received native handle.
      *
-     * @param handle    native handle to a graphic block
+     * \param handle    native handle to a graphic block
      *
-     * @return shared pointer to the graphic block. nullptr if there was not enough memory to
+     * \return shared pointer to the graphic block. nullptr if there was not enough memory to
      *         create this block.
      */
     static
@@ -133,9 +137,9 @@
     /**
      * Create a linear block from the received bufferpool data.
      *
-     * @param data  bufferpool data to a linear block
+     * \param data  bufferpool data to a linear block
      *
-     * @return shared pointer to the linear block. nullptr if there was not enough memory to
+     * \return shared pointer to the linear block. nullptr if there was not enough memory to
      *         create this block.
      */
     static
@@ -146,9 +150,9 @@
     /**
      * Create a graphic block from the received bufferpool data.
      *
-     * @param data  bufferpool data to a graphic block
+     * \param data  bufferpool data to a graphic block
      *
-     * @return shared pointer to the graphic block. nullptr if there was not enough memory to
+     * \return shared pointer to the graphic block. nullptr if there was not enough memory to
      *         create this block.
      */
     static
@@ -159,31 +163,165 @@
     /**
      * Get bufferpool data from the blockpool data.
      *
-     * @param poolData          blockpool data
-     * @param bufferPoolData    pointer to bufferpool data where the bufferpool
+     * \param poolData          blockpool data
+     * \param bufferPoolData    pointer to bufferpool data where the bufferpool
      *                          data is stored.
      *
-     * @return {@code true} when there is valid bufferpool data, {@code false} otherwise.
+     * \return {\code true} when there is valid bufferpool data, {\code false} otherwise.
      */
     static
     bool GetBufferPoolData(
             const std::shared_ptr<const _C2BlockPoolData> &poolData,
             std::shared_ptr<android::hardware::media::bufferpool::BufferPoolData> *bufferPoolData);
 
+    /*
+     * Life Cycle Management of BufferQueue-Based Blocks
+     * =================================================
+     *
+     * A block that is created by a bufferqueue-based blockpool requires some
+     * special treatment when it is destroyed. In particular, if the block
+     * corresponds to a held (dequeued/attached) GraphicBuffer in a slot of a
+     * bufferqueue, its destruction should trigger a call to
+     * IGraphicBufferProducer::cancelBuffer(). On the other hand, if the
+     * GraphicBuffer is not held, i.e., if it has been queued or detached,
+     * cancelBuffer() should not be called upon the destruction of the block.
+     *
+     * _C2BlockPoolData created by a bufferqueue-based blockpool includes two
+     * main pieces of information:
+     *   - "held" status: Whether cancelBuffer() should be called upon
+     *     destruction of the block.
+     *   - bufferqueue assignment: The triple (igbp, bqId, bqSlot), where igbp
+     *     is the IGraphicBufferProducer instance of the bufferqueue, bqId is
+     *     the globally unique id of the bufferqueue, and bqSlot is the slot in
+     *     the bufferqueue.
+     *
+     * igbp is the instance of IGraphicBufferProducer on which cancelBuffer()
+     * will be called if "held" status is true when the block is destroyed.
+     * (bqSlot is an input to cancelBuffer().) However, only bqId and bqSlot
+     * are retained when a block is transferred from one process to another. It
+     * is the responsibility of both the sending and receiving processes to
+     * maintain consistency of "held" status and igbp. Below are functions
+     * provided for this purpose:
+     *
+     *   - GetBufferQueueData(): Returns bqId and bqSlot.
+     *   - HoldBlockFromBufferQueue(): Sets "held" status to true.
+     *   - YieldBlockToBufferQueue(): Sets "held" status to false.
+     *   - AssignBlockToBufferQueue(): Sets the bufferqueue assignment and
+     *     "held" status.
+     *
+     * All these functions operate on _C2BlockPoolData, which can be obtained by
+     * calling GetGraphicBlockPoolData().
+     *
+     * HoldBlockFromBufferQueue() will mark the block as held, while
+     * YieldBlockToBufferQueue() will do the opposite. These two functions do
+     * not modify the bufferqueue assignment, so it is not wrong to call
+     * HoldBlockFromBufferQueue() after YieldBlockToBufferQueue() if it can be
+     * guaranteed that the block is not destroyed during the period between the
+     * two calls.
+     *
+     * AssingBlockToBufferQueue() has a "held" status as an optional argument.
+     * The default value is true.
+     *
+     * Maintaining Consistency with IGraphicBufferProducer Operations
+     * ==============================================================
+     *
+     * dequeueBuffer()
+     *   - This function is called by the blockpool. It should not be called
+     *     manually. The blockpool will automatically generate the correct
+     *     information for _C2BlockPoolData, with "held" status set to true.
+     *
+     * queueBuffer()
+     *   - After queueBuffer() is called, YieldBlockToBufferQueue() should be
+     *     called.
+     *
+     * attachBuffer()
+     *   - After attachBuffer() is called, AssignBlockToBufferQueue() should be
+     *     called with "held" status set to true.
+     *
+     * detachBuffer()
+     *   - After detachBuffer() is called, HoldBlockFromBufferQueue() should be
+     *     called.
+     */
+
     /**
      * Get bufferqueue data from the blockpool data.
      *
-     * @param poolData  blockpool data
-     * @param igbp_id   pointer to id of igbp to be stored
-     * @param igbp_slot pointer to slot of igbp to be stored
+     * Calling this function with \p bpId set to nullptr will return whether the
+     * block comes from a bufferqueue-based blockpool.
      *
-     * @return {@code true} when there is valid bufferqueue data, {@code false} otherwise.
+     * \param[in]  poolData blockpool data
+     * \param[out] bqId     Id of the bufferqueue owning the buffer (block)
+     * \param[out] bqSlot   Slot number of the buffer
+     *
+     * \return {\code true} when there is valid bufferqueue data;
+     *         {\code false} otherwise.
      */
     static
     bool GetBufferQueueData(
-            const std::shared_ptr<const _C2BlockPoolData> &poolData,
-            uint64_t *igbp_id,
-            int32_t *igbp_slot);
+            const std::shared_ptr<_C2BlockPoolData>& poolData,
+            uint64_t* bqId = nullptr, int32_t* bqSlot = nullptr);
+
+    /**
+     * Set bufferqueue assignment and "held" status to a block created by a
+     * bufferqueue-based blockpool.
+     *
+     * \param poolData blockpool data associated to the block.
+     * \param igbp     \c IGraphicBufferProducer instance from the designated
+     *                 bufferqueue.
+     * \param bqId     Id of the bufferqueue that will own the buffer (block).
+     * \param bqSlot   Slot number of the buffer.
+     * \param held     Whether the block is held. This "held" status can be
+     *                 changed later by calling YieldBlockToBufferQueue() or
+     *                 HoldBlockFromBufferQueue().
+     *
+     * \return \c true if \p poolData is valid bufferqueue data;
+     *         \c false otherwise.
+     *
+     * Note: bqId should match the unique id obtained from igbp->getUniqueId().
+     */
+    static
+    bool AssignBlockToBufferQueue(
+            const std::shared_ptr<_C2BlockPoolData>& poolData,
+            const ::android::sp<::android::hardware::graphics::bufferqueue::
+                                V1_0::IGraphicBufferProducer>& igbp,
+            uint64_t bqId,
+            int32_t bqSlot,
+            bool held = true);
+
+    /**
+     * Hold a block from the designated bufferqueue. This causes the destruction
+     * of the block to trigger a call to cancelBuffer().
+     *
+     * This function assumes that \p poolData comes from a bufferqueue-based
+     * block. It does not check if that is the case.
+     *
+     * \param poolData blockpool data associated to the block.
+     * \param igbp     \c IGraphicBufferProducer instance to be assigned to the
+     *                 block. This is not needed when the block is local.
+     *
+     * \return The previous held status.
+     */
+    static
+    bool HoldBlockFromBufferQueue(
+            const std::shared_ptr<_C2BlockPoolData>& poolData,
+            const ::android::sp<::android::hardware::graphics::bufferqueue::
+                                V1_0::IGraphicBufferProducer>& igbp = nullptr);
+
+    /**
+     * Yield a block to the designated bufferqueue. This causes the destruction
+     * of the block not to trigger a call to cancelBuffer();
+     *
+     * This function assumes that \p poolData comes from a bufferqueue-based
+     * block. It does not check if that is the case.
+     *
+     * \param poolData blockpool data associated to the block.
+     *
+     * \return The previous held status.
+     */
+    static
+    bool YieldBlockToBufferQueue(
+            const std::shared_ptr<_C2BlockPoolData>& poolData);
+
 };
 
 #endif // ANDROID_STAGEFRIGHT_C2BLOCK_INTERNAL_H_
diff --git a/codec2/vndk/platform/C2BqBuffer.cpp b/codec2/vndk/platform/C2BqBuffer.cpp
index a1788ed..3d13e85 100644
--- a/codec2/vndk/platform/C2BqBuffer.cpp
+++ b/codec2/vndk/platform/C2BqBuffer.cpp
@@ -41,75 +41,96 @@
 using ::android::status_t;
 using ::android::wp;
 
-struct C2_HIDE C2BufferQueueBlockPoolData : public _C2BlockPoolData {
+struct C2BufferQueueBlockPoolData : public _C2BlockPoolData {
+
+    bool held;
+    bool local;
+    uint64_t bqId;
+    int32_t bqSlot;
+    sp<HGraphicBufferProducer> igbp;
+    std::shared_ptr<C2BufferQueueBlockPool::Impl> localPool;
 
     virtual type_t getType() const override {
         return TYPE_BUFFERQUEUE;
     }
 
-    void getBufferQueueData(uint64_t *igbp_id, int32_t *igbp_slot) const {
-        *igbp_id = mIgbpId;
-        *igbp_slot = mIgbpSlot;
-    }
-
-    void remove() {
-        mConnected = false;
-    }
-
+    // Create a remote BlockPoolData.
     C2BufferQueueBlockPoolData(
-            uint64_t igbp_id, int32_t igbp_slot)
-            : mLocal(false), mConnected(true), mIgbpId(igbp_id), mIgbpSlot(igbp_slot) {
+            uint64_t bqId, int32_t bqSlot,
+            const sp<HGraphicBufferProducer>& producer = nullptr);
 
-    }
-
-    // in case of connect without attaching from remote side
-    void setOwner(sp<HGraphicBufferProducer> producer) {
-        if (!mLocal && producer) {
-            mProducer = producer;
-        }
-    }
-
-    // in case of attach and connnect from remote side
-    void setNewOwner(sp<HGraphicBufferProducer> producer, uint64_t igbp_id, int32_t igbp_slot) {
-        if (!mLocal && producer) {
-            mProducer = producer;
-            mIgbpId = igbp_id;
-            mIgbpSlot = igbp_slot;
-        }
-    }
-
+    // Create a local BlockPoolData.
     C2BufferQueueBlockPoolData(
-            uint64_t igbp_id, int32_t igbp_slot,
-            std::shared_ptr<C2BufferQueueBlockPool::Impl> pool)
-            : mLocal(true), mConnected(true), mIgbpId(igbp_id), mIgbpSlot(igbp_slot), mPool(pool) {
-        // TODO: use remove when a block is being disconnected. And remove
-        // clearing mConnected.
-        mConnected = false;
-    }
+            uint64_t bqId, int32_t bqSlot,
+            const std::shared_ptr<C2BufferQueueBlockPool::Impl>& pool);
 
     virtual ~C2BufferQueueBlockPoolData() override;
 
-private:
-    bool mLocal;
-    bool mConnected;
-    uint64_t mIgbpId;
-    int32_t mIgbpSlot;
-    std::weak_ptr<C2BufferQueueBlockPool::Impl> mPool;
-    wp<HGraphicBufferProducer> mProducer;
 };
 
 bool _C2BlockFactory::GetBufferQueueData(
-        const std::shared_ptr<const _C2BlockPoolData> &data,
-        uint64_t *igbp_id, int32_t *igbp_slot) {
+        const std::shared_ptr<_C2BlockPoolData>& data,
+        uint64_t* bqId, int32_t* bqSlot) {
     if (data && data->getType() == _C2BlockPoolData::TYPE_BUFFERQUEUE) {
-        const std::shared_ptr<const C2BufferQueueBlockPoolData> poolData =
-                std::static_pointer_cast<const C2BufferQueueBlockPoolData>(data);
-        poolData->getBufferQueueData(igbp_id, igbp_slot);
+        if (bqId) {
+            const std::shared_ptr<C2BufferQueueBlockPoolData> poolData =
+                    std::static_pointer_cast<C2BufferQueueBlockPoolData>(data);
+            *bqId = poolData->bqId;
+            if (bqSlot) {
+                *bqSlot = poolData->bqSlot;
+            }
+        }
         return true;
     }
     return false;
 }
 
+bool _C2BlockFactory::AssignBlockToBufferQueue(
+        const std::shared_ptr<_C2BlockPoolData>& data,
+        const sp<HGraphicBufferProducer>& igbp,
+        uint64_t bqId,
+        int32_t bqSlot,
+        bool held) {
+    if (data && data->getType() == _C2BlockPoolData::TYPE_BUFFERQUEUE) {
+        const std::shared_ptr<C2BufferQueueBlockPoolData> poolData =
+                std::static_pointer_cast<C2BufferQueueBlockPoolData>(data);
+        poolData->igbp = igbp;
+        poolData->bqId = bqId;
+        poolData->bqSlot = bqSlot;
+        poolData->held = held;
+        return true;
+    }
+    return false;
+}
+
+bool _C2BlockFactory::HoldBlockFromBufferQueue(
+        const std::shared_ptr<_C2BlockPoolData>& data,
+        const sp<HGraphicBufferProducer>& igbp) {
+    const std::shared_ptr<C2BufferQueueBlockPoolData> poolData =
+            std::static_pointer_cast<C2BufferQueueBlockPoolData>(data);
+    if (!poolData->local) {
+        poolData->igbp = igbp;
+    }
+    if (poolData->held) {
+        poolData->held = true;
+        return false;
+    }
+    poolData->held = true;
+    return true;
+}
+
+bool _C2BlockFactory::YieldBlockToBufferQueue(
+        const std::shared_ptr<_C2BlockPoolData>& data) {
+    const std::shared_ptr<C2BufferQueueBlockPoolData> poolData =
+            std::static_pointer_cast<C2BufferQueueBlockPoolData>(data);
+    if (!poolData->held) {
+        poolData->held = false;
+        return false;
+    }
+    poolData->held = false;
+    return true;
+}
+
 std::shared_ptr<C2GraphicBlock> _C2BlockFactory::CreateGraphicBlock(
         const C2Handle *handle) {
     // TODO: get proper allocator? and mutex?
@@ -393,23 +414,34 @@
     sp<GraphicBuffer> mBuffers[NUM_BUFFER_SLOTS];
 };
 
-C2BufferQueueBlockPoolData::~C2BufferQueueBlockPoolData() {
-    if (mIgbpId == 0 || mConnected == false) {
-        return;
-    }
-    if (mLocal) {
-        auto lockedPool = mPool.lock();
-        if (lockedPool) {
-            lockedPool->cancel(mIgbpId, mIgbpSlot);
-        }
-    } else {
-        sp<HGraphicBufferProducer> producer = mProducer.promote();
-        if (producer) {
-            producer->cancelBuffer(mIgbpSlot, nullptr);
-        }
-    }
+C2BufferQueueBlockPoolData::C2BufferQueueBlockPoolData(
+        uint64_t bqId, int32_t bqSlot,
+        const sp<HGraphicBufferProducer>& producer) :
+        held(producer && bqId != 0), local(false),
+        bqId(bqId), bqSlot(bqSlot),
+        igbp(producer),
+        localPool() {
 }
 
+C2BufferQueueBlockPoolData::C2BufferQueueBlockPoolData(
+        uint64_t bqId, int32_t bqSlot,
+        const std::shared_ptr<C2BufferQueueBlockPool::Impl>& pool) :
+        held(true), local(true),
+        bqId(bqId), bqSlot(bqSlot),
+        igbp(pool ? pool->mProducer : nullptr),
+        localPool(pool) {
+}
+
+C2BufferQueueBlockPoolData::~C2BufferQueueBlockPoolData() {
+    if (!held || bqId == 0) {
+        return;
+    }
+    if (local && localPool) {
+        localPool->cancel(bqId, bqSlot);
+    } else if (igbp) {
+        igbp->cancelBuffer(bqSlot, nullptr);
+    }
+}
 
 C2BufferQueueBlockPool::C2BufferQueueBlockPool(
         const std::shared_ptr<C2Allocator> &allocator, const local_id_t localId)
diff --git a/media/sfplugin/CCodecBufferChannel.cpp b/media/sfplugin/CCodecBufferChannel.cpp
index bcfd3ad..c834f36 100644
--- a/media/sfplugin/CCodecBufferChannel.cpp
+++ b/media/sfplugin/CCodecBufferChannel.cpp
@@ -252,26 +252,6 @@
     DISALLOW_EVIL_CONSTRUCTORS(OutputBuffers);
 };
 
-class CCodecBufferChannel::OutputBufferQueue {
-public:
-    OutputBufferQueue() : mIgbp(nullptr), mIgbpId(0) {}
-    bool isNewIgbp(sp<IGraphicBufferProducer> igbp) {
-        if (!igbp || igbp == mIgbp) return false;
-        mIgbp = igbp;
-        mIgbp->getUniqueId(&mIgbpId);
-        mIgbp->setMaxDequeuedBufferCount(16); // TODO: tune
-        return true;
-    }
-
-    bool isNewIgbpId(uint64_t igbpId) {
-        return igbpId != mIgbpId;
-    }
-
-private:
-    sp<IGraphicBufferProducer> mIgbp;
-    uint64_t mIgbpId;
-};
-
 namespace {
 
 // TODO: get this info from component
@@ -1206,7 +1186,6 @@
       mCCodecCallback(callback),
       mFrameIndex(0u),
       mFirstValidFrameIndex(0u),
-      mOutputBufferQueue(new OutputBufferQueue()),
       mMetaMode(MODE_NONE),
       mPendingFeed(0) {
 }
@@ -1562,23 +1541,11 @@
         std::static_pointer_cast<const C2StreamHdrStaticInfo::output>(
                 c2Buffer->getInfo(C2StreamHdrStaticInfo::output::PARAM_TYPE));
 
-    Mutexed<OutputSurface>::Locked output(mOutputSurface);
-    if (output->surface == nullptr) {
-        ALOGE("no surface");
-        return OK;
-    }
-    sp<IGraphicBufferProducer> igbp = output->surface->getIGraphicBufferProducer();
-    if (mOutputBufferQueue->isNewIgbp(igbp)) {
-        sp<HGraphicBufferProducer> higbp = igbp->getHalInterface();
-        if (!higbp) {
-            higbp = new TWGraphicBufferProducer<HGraphicBufferProducer>(igbp);
-        }
-        Mutexed<BlockPools>::Locked pools(mBlockPools);
-        // set output surface for managed pools (other than ion or gralloc-backed pool)
-        if (pools->outputPoolId >= C2BlockPool::PLATFORM_START
-                && pools->outputAllocatorId != C2PlatformAllocatorStore::GRALLOC
-                && pools->outputAllocatorId != C2PlatformAllocatorStore::ION) {
-            mComponent->setOutputSurface(pools->outputPoolId, higbp);
+    {
+        Mutexed<OutputSurface>::Locked output(mOutputSurface);
+        if (output->surface == nullptr) {
+            ALOGE("no surface");
+            return OK;
         }
     }
 
@@ -1589,42 +1556,6 @@
     }
     const C2ConstGraphicBlock &block = blocks.front();
 
-    native_handle_t *grallocHandle = UnwrapNativeCodec2GrallocHandle(block.handle());
-    uint32_t width;
-    uint32_t height;
-    uint32_t format;
-    uint64_t usage;
-    uint32_t stride;
-    uint64_t igbp_id;
-    int32_t igbp_slot;
-    _UnwrapNativeCodec2GrallocMetadata(
-            block.handle(), &width, &height, &format, &usage, &stride,
-            &igbp_id, (uint32_t *)&igbp_slot);
-    ALOGV("attaching buffer (%u*%u): (%u*%u, fmt %#x, usage %#llx, stride %u)",
-            block.width(),
-            block.height(),
-            width, height, format, (long long)usage, stride);
-
-    status_t result = OK;
-    if (mOutputBufferQueue->isNewIgbpId(igbp_id)) {
-        sp<GraphicBuffer> graphicBuffer(new GraphicBuffer(
-                grallocHandle,
-                GraphicBuffer::CLONE_HANDLE,
-                width, height, format, 1, usage, stride));
-        // TODO: detach?
-        graphicBuffer->setGenerationNumber(output->generation);
-        result = igbp->attachBuffer(&igbp_slot, graphicBuffer);
-        if (result != OK) {
-            native_handle_delete(grallocHandle);
-            ALOGI("attachBuffer failed: %d", result);
-            return result;
-        }
-        ALOGV("attach buffer from %" PRIu64 " : %d", igbp_id, igbp_slot);
-    } else {
-        ALOGV("dequeued buffer arrived %" PRIu64 " %d", igbp_id, igbp_slot);
-    }
-    native_handle_delete(grallocHandle);
-
     // TODO: revisit this after C2Fence implementation.
     android::IGraphicBufferProducer::QueueBufferInput qbi(
             timestampNs,
@@ -1667,7 +1598,7 @@
         qbi.setHdrMetadata(hdr);
     }
     android::IGraphicBufferProducer::QueueBufferOutput qbo;
-    result = igbp->queueBuffer(igbp_slot, qbi, &qbo);
+    status_t result = mComponent->queueToOutputSurface(block, qbi, &qbo);
     if (result != OK) {
         ALOGE("queueBuffer failed: %d", result);
         return result;
@@ -1746,7 +1677,8 @@
     std::shared_ptr<C2AllocatorStore> allocatorStore = GetCodec2PlatformAllocatorStore();
     int poolMask = property_get_int32(
             "debug.stagefright.c2-poolmask",
-            1 << C2PlatformAllocatorStore::ION);
+            1 << C2PlatformAllocatorStore::ION |
+            1 << C2PlatformAllocatorStore::BUFFERQUEUE);
 
     if (inputFormat != nullptr) {
         bool graphic = (iStreamFormat.value == C2FormatVideo);
@@ -1848,14 +1780,18 @@
     }
 
     if (outputFormat != nullptr) {
-        bool hasOutputSurface = false;
+        sp<IGraphicBufferProducer> outputSurface;
+        uint32_t outputGeneration;
         {
             Mutexed<OutputSurface>::Locked output(mOutputSurface);
-            hasOutputSurface = (output->surface != nullptr);
+            outputSurface = output->surface ?
+                    output->surface->getIGraphicBufferProducer() : nullptr;
+            outputGeneration = output->generation;
         }
 
         bool graphic = (oStreamFormat.value == C2FormatVideo);
         C2BlockPool::local_id_t outputPoolId_;
+
         {
             Mutexed<BlockPools>::Locked pools(mBlockPools);
 
@@ -1891,7 +1827,7 @@
 
             // use bufferqueue if outputting to a surface
             if (pools->outputAllocatorId == C2PlatformAllocatorStore::GRALLOC
-                    && hasOutputSurface
+                    && outputSurface
                     && ((poolMask >> C2PlatformAllocatorStore::BUFFERQUEUE) & 1)) {
                 pools->outputAllocatorId = C2PlatformAllocatorStore::BUFFERQUEUE;
             }
@@ -1927,7 +1863,7 @@
         Mutexed<std::unique_ptr<OutputBuffers>>::Locked buffers(mOutputBuffers);
 
         if (graphic) {
-            if (hasOutputSurface) {
+            if (outputSurface) {
                 buffers->reset(new GraphicOutputBuffers);
             } else {
                 buffers->reset(new RawGraphicOutputBuffers);
@@ -1939,16 +1875,11 @@
 
 
         // Try to set output surface to created block pool if given.
-        if (hasOutputSurface) {
-            Mutexed<OutputSurface>::Locked output(mOutputSurface);
-            sp<IGraphicBufferProducer> igbp = output->surface->getIGraphicBufferProducer();
-            if (mOutputBufferQueue->isNewIgbp(igbp)) {
-                sp<HGraphicBufferProducer> higbp = igbp->getHalInterface();
-                if (!higbp) {
-                    higbp = new TWGraphicBufferProducer<HGraphicBufferProducer>(igbp);
-                }
-                mComponent->setOutputSurface(outputPoolId_, higbp);
-            }
+        if (outputSurface) {
+            mComponent->setOutputSurface(
+                    outputPoolId_,
+                    outputSurface,
+                    outputGeneration);
         }
 
         if (oStreamFormat.value == C2FormatAudio) {
@@ -2167,7 +2098,6 @@
         newSurface->setMaxDequeuedBufferCount(kMinOutputBufferArraySize);
     }
 
-    Mutexed<OutputSurface>::Locked output(mOutputSurface);
 //    if (newSurface == nullptr) {
 //        if (*surface != nullptr) {
 //            ALOGW("cannot unset a surface");
@@ -2181,15 +2111,41 @@
 //        return INVALID_OPERATION;
 //    }
 
-    output->surface = newSurface;
+    uint32_t generation;
+
     ANativeWindowBuffer *buf;
-    ANativeWindow *window = output->surface.get();
+    ANativeWindow *window = newSurface.get();
     int fenceFd;
     window->dequeueBuffer(window, &buf, &fenceFd);
     sp<GraphicBuffer> gbuf = GraphicBuffer::from(buf);
-    output->generation = gbuf->getGenerationNumber();
+    generation = gbuf->getGenerationNumber();
     window->cancelBuffer(window, buf, fenceFd);
 
+    std::shared_ptr<Codec2Client::Configurable> outputPoolIntf;
+    C2BlockPool::local_id_t outputPoolId;
+    {
+        Mutexed<BlockPools>::Locked pools(mBlockPools);
+        outputPoolId = pools->outputPoolId;
+        outputPoolIntf = pools->outputPoolIntf;
+    }
+
+    if (outputPoolIntf) {
+        if (mComponent->setOutputSurface(
+                outputPoolId,
+                newSurface->getIGraphicBufferProducer(),
+                generation) != C2_OK) {
+            ALOGW("setSurface -- setOutputSurface() failed to configure "
+                    "new surface to the component's output block pool.");
+            return INVALID_OPERATION;
+        }
+    }
+
+    {
+        Mutexed<OutputSurface>::Locked output(mOutputSurface);
+        output->surface = newSurface;
+        output->generation = generation = gbuf->getGenerationNumber();
+    }
+
     return OK;
 }
 
diff --git a/media/sfplugin/CCodecBufferChannel.h b/media/sfplugin/CCodecBufferChannel.h
index 4762c33..2127e2b 100644
--- a/media/sfplugin/CCodecBufferChannel.h
+++ b/media/sfplugin/CCodecBufferChannel.h
@@ -130,7 +130,6 @@
     class Buffers;
     class InputBuffers;
     class OutputBuffers;
-    class OutputBufferQueue;
 
 private:
     class QueueGuard;
@@ -213,7 +212,6 @@
         uint32_t generation;
     };
     Mutexed<OutputSurface> mOutputSurface;
-    std::unique_ptr<OutputBufferQueue> mOutputBufferQueue;
 
     struct BlockPools {
         C2Allocator::id_t inputAllocatorId;