Improve TranslatedVertexBuffer::map performance
TRAC #11392
- For small draws, load stride 0 streams into regular streaming buffer
Signed-off-by: Shannon Woods
Signed-off-by: Daniel Koch
Author: Andrew Lewycky
git-svn-id: https://angleproject.googlecode.com/svn/trunk@265 736b8ea6-26fd-11df-bfd4-992fa37f6226
diff --git a/src/libGLESv2/geometry/VertexDataManager.cpp b/src/libGLESv2/geometry/VertexDataManager.cpp
index 25e63a8..acac7dc 100644
--- a/src/libGLESv2/geometry/VertexDataManager.cpp
+++ b/src/libGLESv2/geometry/VertexDataManager.cpp
@@ -20,6 +20,8 @@
namespace
{
enum { INITIAL_STREAM_BUFFER_SIZE = 1024*1024 };
+ enum { MAX_CURRENT_VALUE_EXPANSION = 16 };
+ enum { CURRENT_VALUES_REQUIRED_SPACE = 4 * sizeof(float) * gl::MAX_VERTEX_ATTRIBS * MAX_CURRENT_VALUE_EXPANSION };
}
namespace gl
@@ -31,7 +33,7 @@
mStreamBuffer = mBackend->createVertexBuffer(INITIAL_STREAM_BUFFER_SIZE);
try
{
- mCurrentValueBuffer = mBackend->createVertexBufferForStrideZero(4*sizeof(float)*MAX_VERTEX_ATTRIBS);
+ mCurrentValueBuffer = mBackend->createVertexBufferForStrideZero(4 * sizeof(float) * MAX_VERTEX_ATTRIBS);
}
catch (...)
{
@@ -77,8 +79,18 @@
translated[i].enabled = activeAttribs[i];
}
- processNonArrayAttributes(attribs, activeAttribs, translated);
+ bool usesCurrentValues = false;
+ for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++)
+ {
+ if (activeAttribs[i] && !attribs[i].mEnabled)
+ {
+ usesCurrentValues = true;
+ break;
+ }
+ }
+
+ // Handle the identity-mapped attributes.
// Process array attributes.
std::size_t requiredSpace = 0;
@@ -91,6 +103,11 @@
}
}
+ if (usesCurrentValues)
+ {
+ requiredSpace += CURRENT_VALUES_REQUIRED_SPACE;
+ }
+
if (requiredSpace > mStreamBuffer->size())
{
std::size_t newSize = std::max(requiredSpace, 3 * mStreamBuffer->size() / 2); // 1.5 x mStreamBuffer->size() is arbitrary and should be checked to see we don't have too many reallocations.
@@ -145,30 +162,12 @@
}
}
- return GL_NO_ERROR;
-}
-
-void VertexDataManager::reloadCurrentValues(const AttributeState *attribs, std::size_t *offset)
-{
- if (mDirtyCurrentValues)
+ if (usesCurrentValues)
{
- std::size_t totalSize = 4 * sizeof(float) * MAX_VERTEX_ATTRIBS;
-
- mCurrentValueBuffer->reserveSpace(totalSize);
-
- float* p = static_cast<float*>(mCurrentValueBuffer->map(totalSize, &mCurrentValueOffset));
-
- for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++)
- {
- memcpy(&p[i*4], attribs[i].mCurrentValue, sizeof(attribs[i].mCurrentValue)); // FIXME: this should be doing a translation. This assumes that GL_FLOATx4 is supported.
- }
-
- mCurrentValueBuffer->unmap();
-
- mDirtyCurrentValues = false;
+ processNonArrayAttributes(attribs, activeAttribs, translated, count);
}
- *offset = mCurrentValueOffset;
+ return GL_NO_ERROR;
}
std::size_t VertexDataManager::typeSize(GLenum type) const
@@ -214,37 +213,67 @@
return roundUp(size, 4 * sizeof(GLfloat));
}
-void VertexDataManager::processNonArrayAttributes(const AttributeState *attribs, const std::bitset<MAX_VERTEX_ATTRIBS> &activeAttribs, TranslatedAttribute *translated)
+void VertexDataManager::processNonArrayAttributes(const AttributeState *attribs, const std::bitset<MAX_VERTEX_ATTRIBS> &activeAttribs, TranslatedAttribute *translated, std::size_t count)
{
- bool usesCurrentValues = false;
-
- for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++)
+ if (count <= MAX_CURRENT_VALUE_EXPANSION)
{
- if (activeAttribs[i] && !attribs[i].mEnabled)
+ if (mDirtyCurrentValues || mCurrentValueLoadBuffer != mStreamBuffer)
{
- usesCurrentValues = true;
- break;
+ float *p = static_cast<float*>(mStreamBuffer->map(CURRENT_VALUES_REQUIRED_SPACE, &mCurrentValueOffset));
+
+ for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++)
+ {
+ float *out = p + MAX_CURRENT_VALUE_EXPANSION * 4 * i;
+ for (unsigned int j = 0; j < MAX_CURRENT_VALUE_EXPANSION; j++)
+ {
+ *out++ = attribs[i].mCurrentValue[0];
+ *out++ = attribs[i].mCurrentValue[1];
+ *out++ = attribs[i].mCurrentValue[2];
+ *out++ = attribs[i].mCurrentValue[3];
+ }
+ }
+
+ mStreamBuffer->unmap();
+
+ mCurrentValueLoadBuffer = mStreamBuffer;
+ mCurrentValueSize = MAX_CURRENT_VALUE_EXPANSION;
+ mCurrentValueStride = 4 * sizeof(float);
+ }
+ }
+ else
+ {
+ if (mDirtyCurrentValues || mCurrentValueLoadBuffer != mCurrentValueBuffer)
+ {
+ std::size_t totalSize = 4 * sizeof(float) * MAX_VERTEX_ATTRIBS;
+
+ mCurrentValueBuffer->reserveSpace(totalSize);
+
+ float* p = static_cast<float*>(mCurrentValueBuffer->map(totalSize, &mCurrentValueOffset));
+
+ for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++)
+ {
+ memcpy(&p[i*4], attribs[i].mCurrentValue, sizeof(attribs[i].mCurrentValue)); // FIXME: this should be doing a translation. This assumes that GL_FLOATx4 is supported.
+ }
+
+ mCurrentValueBuffer->unmap();
+
+ mCurrentValueLoadBuffer = mCurrentValueBuffer;
+ mCurrentValueSize = 1;
+ mCurrentValueStride = 0;
}
}
- if (usesCurrentValues)
+ for (std::size_t i = 0; i < MAX_VERTEX_ATTRIBS; i++)
{
- std::size_t currentValueOffset;
-
- reloadCurrentValues(attribs, ¤tValueOffset);
-
- for (std::size_t i = 0; i < MAX_VERTEX_ATTRIBS; i++)
+ if (activeAttribs[i] && !attribs[i].mEnabled)
{
- if (activeAttribs[i] && !attribs[i].mEnabled)
- {
- translated[i].buffer = mCurrentValueBuffer;
+ translated[i].buffer = mCurrentValueLoadBuffer;
- translated[i].type = GL_FLOAT;
- translated[i].size = 4;
- translated[i].normalized = false;
- translated[i].stride = 0;
- translated[i].offset = currentValueOffset + 4 * sizeof(float) * i;
- }
+ translated[i].type = GL_FLOAT;
+ translated[i].size = 4;
+ translated[i].normalized = false;
+ translated[i].stride = mCurrentValueStride;
+ translated[i].offset = mCurrentValueOffset + 4 * sizeof(float) * i * mCurrentValueSize;
}
}
}
diff --git a/src/libGLESv2/geometry/VertexDataManager.h b/src/libGLESv2/geometry/VertexDataManager.h
index 7f5e72c..b5583bb 100644
--- a/src/libGLESv2/geometry/VertexDataManager.h
+++ b/src/libGLESv2/geometry/VertexDataManager.h
@@ -43,9 +43,7 @@
private:
std::bitset<MAX_VERTEX_ATTRIBS> getActiveAttribs() const;
- void reloadCurrentValues(const AttributeState *attribs, std::size_t *offset);
-
- void processNonArrayAttributes(const AttributeState *attribs, const std::bitset<MAX_VERTEX_ATTRIBS> &activeAttribs, TranslatedAttribute *translated);
+ void processNonArrayAttributes(const AttributeState *attribs, const std::bitset<MAX_VERTEX_ATTRIBS> &activeAttribs, TranslatedAttribute *translated, std::size_t count);
std::size_t typeSize(GLenum type) const;
std::size_t interpretGlStride(const AttributeState &attrib) const;
@@ -61,6 +59,9 @@
bool mDirtyCurrentValues;
std::size_t mCurrentValueOffset; // Offset within mCurrentValueBuffer that the current attribute values were last loaded at.
TranslatedVertexBuffer *mCurrentValueBuffer;
+ TranslatedVertexBuffer *mCurrentValueLoadBuffer;
+ std::size_t mCurrentValueStride;
+ std::size_t mCurrentValueSize;
};
}