Optimize dynamic buffers.

In D3D11, we would previously always use a staging buffer to proxy
data to the GPU. This change allows users which specify DYNAMIC_DRAW
to skip the staging buffer as long as they only write to index or
vertex buffers.

This improves performance on all tested GPU vendors, but in D3D11
on AMD and Intel our SubData calls are still significantly slower
than in D3D9.

BUG=angle:705
BUG=365078

Change-Id: I4f83164176d67ff00119bdd0a6a80d7c84fd0f03
Reviewed-on: https://chromium-review.googlesource.com/213813
Reviewed-by: Geoff Lang <geofflang@chromium.org>
Tested-by: Jamie Madill <jmadill@chromium.org>
diff --git a/src/common/mathutil.h b/src/common/mathutil.h
index ffcb908..9e7a8de 100644
--- a/src/common/mathutil.h
+++ b/src/common/mathutil.h
@@ -507,12 +507,12 @@
 struct Range
 {
     Range() {}
-    Range(T lo, T hi) : start(lo), end(hi) { ASSERT(lo <= hi); }
+    Range(T lo, T hi) : start(lo), end(hi) { }
 
     T start;
     T end;
 
-    T length() const { return end - start; }
+    T length() const { return (end > start ? (end - start) : 0); }
 };
 
 typedef Range<int> RangeI;
diff --git a/src/libGLESv2/renderer/d3d/MemoryBuffer.cpp b/src/libGLESv2/renderer/d3d/MemoryBuffer.cpp
index 301bbe8..4634a34 100644
--- a/src/libGLESv2/renderer/d3d/MemoryBuffer.cpp
+++ b/src/libGLESv2/renderer/d3d/MemoryBuffer.cpp
@@ -28,9 +28,7 @@
 {
     if (size == 0)
     {
-        free(mData);
-        mData = NULL;
-        mSize = 0;
+        clear();
     }
     else
     {
@@ -69,4 +67,11 @@
     return mData;
 }
 
+void MemoryBuffer::clear()
+{
+    free(mData);
+    mData = NULL;
+    mSize = 0;
+}
+
 }
diff --git a/src/libGLESv2/renderer/d3d/MemoryBuffer.h b/src/libGLESv2/renderer/d3d/MemoryBuffer.h
index 2484c07..47b5772 100644
--- a/src/libGLESv2/renderer/d3d/MemoryBuffer.h
+++ b/src/libGLESv2/renderer/d3d/MemoryBuffer.h
@@ -21,10 +21,13 @@
 
     bool resize(size_t size);
     size_t size() const;
+    void clear();
 
     const uint8_t *data() const;
     uint8_t *data();
 
+    bool empty() const { return (mSize == 0); }
+
   private:
     size_t mSize;
     uint8_t *mData;
diff --git a/src/libGLESv2/renderer/d3d/d3d11/Buffer11.cpp b/src/libGLESv2/renderer/d3d/d3d11/Buffer11.cpp
index 86d77c4..828a5cd 100644
--- a/src/libGLESv2/renderer/d3d/d3d11/Buffer11.cpp
+++ b/src/libGLESv2/renderer/d3d/d3d11/Buffer11.cpp
@@ -163,7 +163,9 @@
       mSize(0),
       mMappedStorage(NULL),
       mResolvedDataRevision(0),
-      mReadUsageCount(0)
+      mReadUsageCount(0),
+      mDynamicUsage(0),
+      mDynamicDirtyRange(std::numeric_limits<size_t>::max(), std::numeric_limits<size_t>::min())
 {}
 
 Buffer11::~Buffer11()
@@ -182,6 +184,16 @@
 
 void Buffer11::setData(const void *data, size_t size, GLenum usage)
 {
+    mDynamicUsage = (usage == GL_DYNAMIC_DRAW);
+
+    if (mDynamicUsage)
+    {
+        if (!mDynamicData.resize(size))
+        {
+            return gl::error(GL_OUT_OF_MEMORY);
+        }
+    }
+
     setSubData(data, size, 0);
 
     if (usage == GL_STATIC_DRAW)
@@ -240,6 +252,14 @@
 
     if (data && size > 0)
     {
+        if (mDynamicUsage)
+        {
+            mDynamicDirtyRange.start = std::min(mDynamicDirtyRange.start, offset);
+            mDynamicDirtyRange.end = std::max(mDynamicDirtyRange.end, size + offset);
+            memcpy(mDynamicData.data() + offset, data, size);
+            return;
+        }
+
         NativeBuffer11 *stagingBuffer = getStagingBuffer();
 
         if (!stagingBuffer)
@@ -297,7 +317,7 @@
             {
                 if (source->getUsage() == BUFFER_USAGE_STAGING)
                 {
-                    source = getBufferStorage(BUFFER_USAGE_VERTEX_OR_TRANSFORM_FEEDBACK);
+                    source = getBufferStorage(BUFFER_USAGE_VERTEX);
                 }
                 else
                 {
@@ -358,7 +378,7 @@
 
 void Buffer11::markTransformFeedbackUsage()
 {
-    BufferStorage11 *transformFeedbackStorage = getBufferStorage(BUFFER_USAGE_VERTEX_OR_TRANSFORM_FEEDBACK);
+    BufferStorage11 *transformFeedbackStorage = getBufferStorage(BUFFER_USAGE_TRANSFORM_FEEDBACK);
 
     if (transformFeedbackStorage)
     {
@@ -464,10 +484,38 @@
     }
 }
 
-Buffer11::BufferStorage11 *Buffer11::getBufferStorage(BufferUsage usage)
+Buffer11::BufferStorage11 *Buffer11::getBufferStorage(BufferUsage requestedUsage)
 {
+    ASSERT(requestedUsage != BUFFER_USAGE_VERTEX_DYNAMIC);
+    ASSERT(requestedUsage != BUFFER_USAGE_INDEX_DYNAMIC);
+
+    BufferUsage internalUsage = requestedUsage;
+
+    if (mDynamicUsage)
+    {
+        if (requestedUsage == BUFFER_USAGE_VERTEX)
+        {
+            internalUsage = BUFFER_USAGE_VERTEX_DYNAMIC;
+        }
+        else if (requestedUsage == BUFFER_USAGE_INDEX)
+        {
+            internalUsage = BUFFER_USAGE_INDEX_DYNAMIC;
+        }
+        else
+        {
+            // Convert out of dynamic usage
+            setData(mDynamicData.data(), mDynamicData.size(), GL_STATIC_DRAW);
+        }
+    }
+
+    // Internally we share the same NativeBuffer11 for stream out and vertex data
+    if (requestedUsage == BUFFER_USAGE_TRANSFORM_FEEDBACK)
+    {
+        internalUsage = BUFFER_USAGE_VERTEX;
+    }
+
     BufferStorage11 *directBuffer = NULL;
-    auto directBufferIt = mBufferStorages.find(usage);
+    auto directBufferIt = mBufferStorages.find(internalUsage);
     if (directBufferIt != mBufferStorages.end())
     {
         directBuffer = directBufferIt->second;
@@ -475,17 +523,17 @@
 
     if (!directBuffer)
     {
-        if (usage == BUFFER_USAGE_PIXEL_PACK)
+        if (internalUsage == BUFFER_USAGE_PIXEL_PACK)
         {
             directBuffer = new PackStorage11(mRenderer);
         }
         else
         {
             // buffer is not allocated, create it
-            directBuffer = new NativeBuffer11(mRenderer, usage);
+            directBuffer = new NativeBuffer11(mRenderer, internalUsage);
         }
 
-        mBufferStorages.insert(std::make_pair(usage, directBuffer));
+        mBufferStorages.insert(std::make_pair(internalUsage, directBuffer));
     }
 
     // resize buffer
@@ -498,6 +546,18 @@
         }
     }
 
+    if (mDynamicUsage)
+    {
+        if (!mDynamicData.empty() && mDynamicDirtyRange.length() > 0)
+        {
+            ASSERT(HAS_DYNAMIC_TYPE(NativeBuffer11*, directBuffer));
+            NativeBuffer11 *dynamicBuffer = static_cast<NativeBuffer11*>(directBuffer);
+            dynamicBuffer->setData(D3D11_MAP_WRITE_NO_OVERWRITE, mDynamicData.data(), mDynamicDirtyRange.length(), mDynamicDirtyRange.start);
+        }
+
+        return directBuffer;
+    }
+
     BufferStorage11 *latestBuffer = getLatestBufferStorage();
     if (latestBuffer && latestBuffer->getDataRevision() > directBuffer->getDataRevision())
     {
@@ -701,7 +761,8 @@
         bufferDesc->CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
         break;
 
-      case BUFFER_USAGE_VERTEX_OR_TRANSFORM_FEEDBACK:
+      case BUFFER_USAGE_VERTEX:
+      case BUFFER_USAGE_TRANSFORM_FEEDBACK:
         bufferDesc->Usage = D3D11_USAGE_DEFAULT;
         bufferDesc->BindFlags = D3D11_BIND_VERTEX_BUFFER | D3D11_BIND_STREAM_OUTPUT;
         bufferDesc->CPUAccessFlags = 0;
@@ -730,6 +791,18 @@
         bufferDesc->ByteWidth = std::min(bufferDesc->ByteWidth, renderer->getMaxUniformBufferSize());
         break;
 
+      case BUFFER_USAGE_VERTEX_DYNAMIC:
+        bufferDesc->Usage = D3D11_USAGE_DYNAMIC;
+        bufferDesc->BindFlags = D3D11_BIND_VERTEX_BUFFER;
+        bufferDesc->CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
+        break;
+
+      case BUFFER_USAGE_INDEX_DYNAMIC:
+        bufferDesc->Usage = D3D11_USAGE_DYNAMIC;
+        bufferDesc->BindFlags = D3D11_BIND_INDEX_BUFFER;
+        bufferDesc->CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
+        break;
+
     default:
         UNREACHABLE();
     }
diff --git a/src/libGLESv2/renderer/d3d/d3d11/Buffer11.h b/src/libGLESv2/renderer/d3d/d3d11/Buffer11.h
index 51d47ec..41ab780 100644
--- a/src/libGLESv2/renderer/d3d/d3d11/Buffer11.h
+++ b/src/libGLESv2/renderer/d3d/d3d11/Buffer11.h
@@ -20,11 +20,16 @@
 enum BufferUsage
 {
     BUFFER_USAGE_STAGING,
-    BUFFER_USAGE_VERTEX_OR_TRANSFORM_FEEDBACK,
+    BUFFER_USAGE_VERTEX,
+    BUFFER_USAGE_TRANSFORM_FEEDBACK,
     BUFFER_USAGE_INDEX,
     BUFFER_USAGE_PIXEL_UNPACK,
     BUFFER_USAGE_PIXEL_PACK,
     BUFFER_USAGE_UNIFORM,
+
+    // Internal flags
+    BUFFER_USAGE_VERTEX_DYNAMIC,
+    BUFFER_USAGE_INDEX_DYNAMIC
 };
 
 struct PackPixelsParams
@@ -78,6 +83,13 @@
     class NativeBuffer11;
     class PackStorage11;
 
+    void markBufferUsage();
+    NativeBuffer11 *getStagingBuffer();
+    PackStorage11 *getPackStorage();
+
+    BufferStorage11 *getBufferStorage(BufferUsage usage);
+    BufferStorage11 *getLatestBufferStorage() const;
+
     rx::Renderer11 *mRenderer;
     size_t mSize;
 
@@ -92,12 +104,9 @@
     DataRevision mResolvedDataRevision;
     unsigned int mReadUsageCount;
 
-    void markBufferUsage();
-    NativeBuffer11 *getStagingBuffer();
-    PackStorage11 *getPackStorage();
-
-    BufferStorage11 *getBufferStorage(BufferUsage usage);
-    BufferStorage11 *getLatestBufferStorage() const;
+    MemoryBuffer mDynamicData;
+    bool mDynamicUsage;
+    Range<size_t> mDynamicDirtyRange;
 };
 
 }
diff --git a/src/libGLESv2/renderer/d3d/d3d11/InputLayoutCache.cpp b/src/libGLESv2/renderer/d3d/d3d11/InputLayoutCache.cpp
index 2b9a7e5..780bfa0 100644
--- a/src/libGLESv2/renderer/d3d/d3d11/InputLayoutCache.cpp
+++ b/src/libGLESv2/renderer/d3d/d3d11/InputLayoutCache.cpp
@@ -196,7 +196,7 @@
             VertexBuffer11 *vertexBuffer = VertexBuffer11::makeVertexBuffer11(attributes[i].vertexBuffer);
             Buffer11 *bufferStorage = attributes[i].storage ? Buffer11::makeBuffer11(attributes[i].storage) : NULL;
 
-            buffer = bufferStorage ? bufferStorage->getBuffer(BUFFER_USAGE_VERTEX_OR_TRANSFORM_FEEDBACK)
+            buffer = bufferStorage ? bufferStorage->getBuffer(BUFFER_USAGE_VERTEX)
                                    : vertexBuffer->getBuffer();
         }
 
diff --git a/src/libGLESv2/renderer/d3d/d3d11/Renderer11.cpp b/src/libGLESv2/renderer/d3d/d3d11/Renderer11.cpp
index f6098a4..ed60209 100644
--- a/src/libGLESv2/renderer/d3d/d3d11/Renderer11.cpp
+++ b/src/libGLESv2/renderer/d3d/d3d11/Renderer11.cpp
@@ -968,7 +968,7 @@
         if (transformFeedbackBuffers[i])
         {
             Buffer11 *storage = Buffer11::makeBuffer11(transformFeedbackBuffers[i]->getImplementation());
-            ID3D11Buffer *buffer = storage->getBuffer(BUFFER_USAGE_VERTEX_OR_TRANSFORM_FEEDBACK);
+            ID3D11Buffer *buffer = storage->getBuffer(BUFFER_USAGE_TRANSFORM_FEEDBACK);
 
             d3dBuffers[i] = buffer;
             d3dOffsets[i] = (mAppliedTFBuffers[i] != buffer) ? static_cast<UINT>(offsets[i]) : -1;