Use implementation-defined format when HWC writes to output buffer

When GLES isn't writing to the output buffer directly, request an
implementation-defined format with minimal usage flags, leaving the
format choice up to gralloc. On some hardware this allows HWC to do
format conversions during composition that would otherwise need to be
done (with worse power and/or performance) by the consumer.

Bug: 8316155
Change-Id: Iee6ee8404282036f9fd1833067cfe11dbadbf0bf
diff --git a/services/surfaceflinger/DisplayHardware/VirtualDisplaySurface.cpp b/services/surfaceflinger/DisplayHardware/VirtualDisplaySurface.cpp
index c5a14b0..29e9d40 100644
--- a/services/surfaceflinger/DisplayHardware/VirtualDisplaySurface.cpp
+++ b/services/surfaceflinger/DisplayHardware/VirtualDisplaySurface.cpp
@@ -47,7 +47,8 @@
     mHwc(hwc),
     mDisplayId(dispId),
     mDisplayName(name),
-    mProducerUsage(GRALLOC_USAGE_HW_COMPOSER),
+    mOutputFormat(HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED),
+    mOutputUsage(GRALLOC_USAGE_HW_COMPOSER),
     mProducerSlotSource(0),
     mDbgState(DBG_STATE_IDLE),
     mDbgLastCompositionType(COMPOSITION_UNKNOWN)
@@ -95,13 +96,30 @@
     mDbgState = DBG_STATE_PREPARED;
 
     mCompositionType = compositionType;
-
     if (mCompositionType != mDbgLastCompositionType) {
         VDS_LOGV("prepareFrame: composition type changed to %s",
                 dbgCompositionTypeStr(mCompositionType));
         mDbgLastCompositionType = mCompositionType;
     }
 
+    if (mCompositionType != COMPOSITION_GLES &&
+            (mOutputFormat != HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED ||
+             mOutputUsage != GRALLOC_USAGE_HW_COMPOSER)) {
+        // We must have just switched from GLES-only to MIXED or HWC
+        // composition. Stop using the format and usage requested by the GLES
+        // driver; they may be suboptimal when HWC is writing to the output
+        // buffer. For example, if the output is going to a video encoder, and
+        // HWC can write directly to YUV, some hardware can skip a
+        // memory-to-memory RGB-to-YUV conversion step.
+        //
+        // If we just switched *to* GLES-only mode, we'll change the
+        // format/usage and get a new buffer when the GLES driver calls
+        // dequeueBuffer().
+        mOutputFormat = HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED;
+        mOutputUsage = GRALLOC_USAGE_HW_COMPOSER;
+        refreshOutputBuffer();
+    }
+
     return NO_ERROR;
 }
 
@@ -212,12 +230,12 @@
 }
 
 status_t VirtualDisplaySurface::dequeueBuffer(Source source,
-        uint32_t format, int* sslot, sp<Fence>* fence) {
+        uint32_t format, uint32_t usage, int* sslot, sp<Fence>* fence) {
     // Don't let a slow consumer block us
     bool async = (source == SOURCE_SINK);
 
     status_t result = mSource[source]->dequeueBuffer(sslot, fence, async,
-            mSinkBufferWidth, mSinkBufferHeight, format, mProducerUsage);
+            mSinkBufferWidth, mSinkBufferHeight, format, usage);
     if (result < 0)
         return result;
     int pslot = mapSource2ProducerSlot(source, *sslot);
@@ -258,7 +276,6 @@
     VDS_LOGV("dequeueBuffer %dx%d fmt=%d usage=%#x", w, h, format, usage);
 
     status_t result = NO_ERROR;
-    mProducerUsage = usage | GRALLOC_USAGE_HW_COMPOSER;
     Source source = fbSourceForCompositionType(mCompositionType);
 
     if (source == SOURCE_SINK) {
@@ -279,13 +296,20 @@
         // prepare and set, but since we're in GLES-only mode already it
         // shouldn't matter.
 
+        usage |= GRALLOC_USAGE_HW_COMPOSER;
         const sp<GraphicBuffer>& buf = mProducerBuffers[mOutputProducerSlot];
-        if ((mProducerUsage & ~buf->getUsage()) != 0 ||
+        if ((usage & ~buf->getUsage()) != 0 ||
                 (format != 0 && format != (uint32_t)buf->getPixelFormat()) ||
                 (w != 0 && w != mSinkBufferWidth) ||
                 (h != 0 && h != mSinkBufferHeight)) {
-            VDS_LOGV("dequeueBuffer: output buffer doesn't satisfy GLES "
-                    "request, getting a new buffer");
+            VDS_LOGV("dequeueBuffer: dequeueing new output buffer: "
+                    "want %dx%d fmt=%d use=%#x, "
+                    "have %dx%d fmt=%d use=%#x",
+                    w, h, format, usage,
+                    mSinkBufferWidth, mSinkBufferHeight,
+                    buf->getPixelFormat(), buf->getUsage());
+            mOutputFormat = format;
+            mOutputUsage = usage;
             result = refreshOutputBuffer();
             if (result < 0)
                 return result;
@@ -297,7 +321,7 @@
         *fence = mOutputFence;
     } else {
         int sslot;
-        result = dequeueBuffer(source, format, &sslot, fence);
+        result = dequeueBuffer(source, format, usage, &sslot, fence);
         if (result >= 0) {
             *pslot = mapSource2ProducerSlot(source, sslot);
         }
@@ -414,7 +438,8 @@
     }
 
     int sslot;
-    status_t result = dequeueBuffer(SOURCE_SINK, 0, &sslot, &mOutputFence);
+    status_t result = dequeueBuffer(SOURCE_SINK, mOutputFormat, mOutputUsage,
+            &sslot, &mOutputFence);
     if (result < 0)
         return result;
     mOutputProducerSlot = mapSource2ProducerSlot(SOURCE_SINK, sslot);
diff --git a/services/surfaceflinger/DisplayHardware/VirtualDisplaySurface.h b/services/surfaceflinger/DisplayHardware/VirtualDisplaySurface.h
index 18fb5a7..57b5554 100644
--- a/services/surfaceflinger/DisplayHardware/VirtualDisplaySurface.h
+++ b/services/surfaceflinger/DisplayHardware/VirtualDisplaySurface.h
@@ -110,7 +110,7 @@
     // Utility methods
     //
     static Source fbSourceForCompositionType(CompositionType type);
-    status_t dequeueBuffer(Source source, uint32_t format,
+    status_t dequeueBuffer(Source source, uint32_t format, uint32_t usage,
             int* sslot, sp<Fence>* fence);
     void updateQueueBufferOutput(const QueueBufferOutput& qbo);
     void resetPerFrameState();
@@ -137,10 +137,12 @@
     // Inter-frame state
     //
 
-    // To avoid buffer reallocations, we track the buffer usage requested by
-    // the GLES driver in dequeueBuffer so we can use the same flags on
-    // HWC-only frames.
-    uint32_t mProducerUsage;
+    // To avoid buffer reallocations, we track the buffer usage and format
+    // we used on the previous frame and use it again on the new frame. If
+    // the composition type changes or the GLES driver starts requesting
+    // different usage/format, we'll get a new buffer.
+    uint32_t mOutputFormat;
+    uint32_t mOutputUsage;
 
     // Since we present a single producer interface to the GLES driver, but
     // are internally muxing between the sink and scratch producers, we have