Reland "Tightly pack LinkedUniform by using int16_t"
This is a reland of commit 152cf62b38874238095a91307e4ea9bcdedf8f46
Original change's description:
> Tightly pack LinkedUniform by using int16_t
>
> There is a check of vector size when we link uniforms and the maximum
> vector size is 4096 due to we clamp the maxUniformBlockSize to 64KB. In
> reality, if we exceeds this number, program link will take really long
> time and then hit failure. So there is no real need to keep all the
> variables in 32 bit integer. This CL changes to 16 bit integer. Further,
> sh::BlockMemberInfo and ActiveVariable data members are embeded into
> LinkedUniform struct as well so that the unused variables can be removed
> and data can be tightly packed. This also makes LinkedUniform easier to
> maintain as a simple struct with basic data types. With this change,
> LinkedUniform size is reduced from 108 bytes down to 60 bytes, 48 bytes
> reduction. Given some apps has 200-ish uniforms, this CL reduces 48
> bytes x 200 = ~9K memory just for uniforms per program (which goes
> through hash compute and decompression and file reads).
>
> Bug: b/275102061
> Change-Id: I7fae20f5b75f3239305e2094a992e3040b8c8e4c
> Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/4754133
> Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>
> Commit-Queue: Charlie Lao <cclao@google.com>
Bug: b/275102061
Change-Id: I1cdec9407e930608d3239a104dcbf77c8d8e2113
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/4791661
Reviewed-by: Shahbaz Youssefi <syoussefi@chromium.org>
Commit-Queue: Charlie Lao <cclao@google.com>
diff --git a/src/common/angleutils.h b/src/common/angleutils.h
index 529f442..8a6ecec 100644
--- a/src/common/angleutils.h
+++ b/src/common/angleutils.h
@@ -60,7 +60,7 @@
class KeyEqual = std::equal_to<Key>>
using HashMap = std::unordered_map<Key, T, Hash, KeyEqual>;
template <typename Key, class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>>
-using HashSet = std::unordered_set<Key, Hash, KeyEqual>;
+using HashSet = std::unordered_set<Key, Hash, KeyEqual>;
# if __cpp_lib_generic_unordered_lookup >= 201811L
# define ANGLE_HAS_HASH_MAP_GENERIC_LOOKUP 1
# else
@@ -401,6 +401,15 @@
bool mUseMutex;
};
+// Helper macro that casts to a bitfield type then verifies no bits were dropped.
+#define SetBitField(lhs, rhs) \
+ do \
+ { \
+ auto ANGLE_LOCAL_VAR = rhs; \
+ lhs = static_cast<typename std::decay<decltype(lhs)>::type>(ANGLE_LOCAL_VAR); \
+ ASSERT(static_cast<decltype(ANGLE_LOCAL_VAR)>(lhs) == ANGLE_LOCAL_VAR); \
+ } while (0)
+
// snprintf is not defined with MSVC prior to to msvc14
#if defined(_MSC_VER) && _MSC_VER < 1900
# define snprintf _snprintf
diff --git a/src/libANGLE/ProgramExecutable.cpp b/src/libANGLE/ProgramExecutable.cpp
index 9711550..2f87992 100644
--- a/src/libANGLE/ProgramExecutable.cpp
+++ b/src/libANGLE/ProgramExecutable.cpp
@@ -1672,9 +1672,9 @@
{
// The arrays of arrays are flattened to arrays, it needs to record the array offset for
// the correct binding image unit.
- mImageBindings.emplace_back(ImageBinding(
- imageUniform.getBinding() + imageUniform.parentArrayIndex() * arraySize,
- imageUniform.getBasicTypeElementCount(), textureType));
+ mImageBindings.emplace_back(
+ ImageBinding(imageUniform.getBinding() + imageUniform.parentArrayIndex * arraySize,
+ imageUniform.getBasicTypeElementCount(), textureType));
}
*combinedImageUniforms += imageUniform.activeShaderCount() * arraySize;
@@ -1711,13 +1711,14 @@
{
auto &uniform = mUniforms[index];
- uniform.blockInfo.offset = uniform.getOffset();
- uniform.blockInfo.arrayStride = uniform.isArray() ? 4 : 0;
- uniform.blockInfo.matrixStride = 0;
- uniform.blockInfo.isRowMajorMatrix = false;
+ uniform.blockOffset = uniform.getOffset();
+ uniform.blockArrayStride = uniform.isArray() ? 4 : 0;
+ uniform.blockMatrixStride = 0;
+ uniform.flagBits.blockIsRowMajorMatrix = false;
+ uniform.flagBits.isBlock = true;
bool found = false;
- for (unsigned int bufferIndex = 0; bufferIndex < getActiveAtomicCounterBufferCount();
+ for (uint16_t bufferIndex = 0; bufferIndex < getActiveAtomicCounterBufferCount();
++bufferIndex)
{
auto &buffer = mAtomicCounterBuffers[bufferIndex];
@@ -1726,7 +1727,7 @@
buffer.memberIndexes.push_back(index);
uniform.bufferIndex = bufferIndex;
found = true;
- buffer.unionReferencesWith(uniform.activeVariable);
+ buffer.unionReferencesWith(uniform);
break;
}
}
@@ -1735,9 +1736,9 @@
AtomicCounterBuffer atomicCounterBuffer;
atomicCounterBuffer.binding = uniform.getBinding();
atomicCounterBuffer.memberIndexes.push_back(index);
- atomicCounterBuffer.unionReferencesWith(uniform.activeVariable);
+ atomicCounterBuffer.unionReferencesWith(uniform);
mAtomicCounterBuffers.push_back(atomicCounterBuffer);
- uniform.bufferIndex = static_cast<int>(getActiveAtomicCounterBufferCount() - 1);
+ uniform.bufferIndex = static_cast<uint16_t>(getActiveAtomicCounterBufferCount() - 1);
}
}
diff --git a/src/libANGLE/Uniform.cpp b/src/libANGLE/Uniform.cpp
index 5a8e386..187b4ec 100644
--- a/src/libANGLE/Uniform.cpp
+++ b/src/libANGLE/Uniform.cpp
@@ -30,26 +30,21 @@
mIds[shaderType] = id;
}
-void ActiveVariable::unionReferencesWith(const ActiveVariable &other)
+void ActiveVariable::unionReferencesWith(const LinkedUniform &other)
{
mActiveUseBits |= other.mActiveUseBits;
for (const ShaderType shaderType : AllShaderTypes())
{
- ASSERT(mIds[shaderType] == 0 || other.mIds[shaderType] == 0 ||
- mIds[shaderType] == other.mIds[shaderType]);
+ ASSERT(mIds[shaderType] == 0 || other.getId(shaderType) == 0 ||
+ mIds[shaderType] == other.getId(shaderType));
if (mIds[shaderType] == 0)
{
- mIds[shaderType] = other.mIds[shaderType];
+ mIds[shaderType] = other.getId(shaderType);
}
}
}
-LinkedUniform::LinkedUniform()
-{
- memset(this, 0, sizeof(*this));
- blockInfo = sh::BlockMemberInfo();
- activeVariable = ActiveVariable();
-}
+LinkedUniform::LinkedUniform() {}
LinkedUniform::LinkedUniform(GLenum typeIn,
GLenum precisionIn,
@@ -60,26 +55,27 @@
const int bufferIndexIn,
const sh::BlockMemberInfo &blockInfoIn)
{
- memset(this, 0, sizeof(*this));
- // Note: Ensure every data member is initialized.
- type = typeIn;
- precision = precisionIn;
- imageUnitFormat = GL_NONE;
- location = locationIn;
- binding = bindingIn;
- offset = offsetIn;
- bufferIndex = bufferIndexIn;
- blockInfo = blockInfoIn;
- activeVariable = ActiveVariable();
- id = 0;
- flattenedOffsetInParentArrays = -1;
- outerArraySizeProduct = 1;
- outerArrayOffset = 0;
- arraySize = arraySizesIn.empty() ? 1 : arraySizesIn[0];
-
- flagBitsAsUInt = 0;
- flagBits.isArray = !arraySizesIn.empty();
+ // arrays are always flattened, which means at most 1D array
ASSERT(arraySizesIn.size() <= 1);
+
+ memset(this, 0, sizeof(*this));
+ SetBitField(type, typeIn);
+ SetBitField(precision, precisionIn);
+ location = locationIn;
+ SetBitField(binding, bindingIn);
+ SetBitField(offset, offsetIn);
+ SetBitField(bufferIndex, bufferIndexIn);
+ outerArraySizeProduct = 1;
+ SetBitField(arraySize, arraySizesIn.empty() ? 1u : arraySizesIn[0]);
+ SetBitField(flagBits.isArray, !arraySizesIn.empty());
+ if (!(blockInfoIn == sh::kDefaultBlockMemberInfo))
+ {
+ flagBits.isBlock = 1;
+ flagBits.blockIsRowMajorMatrix = blockInfoIn.isRowMajorMatrix;
+ SetBitField(blockOffset, blockInfoIn.offset);
+ SetBitField(blockArrayStride, blockInfoIn.arrayStride);
+ SetBitField(blockMatrixStride, blockInfoIn.matrixStride);
+ }
}
LinkedUniform::LinkedUniform(const LinkedUniform &other)
@@ -89,32 +85,37 @@
LinkedUniform::LinkedUniform(const UsedUniform &usedUniform)
{
- memset(this, 0, sizeof(*this));
-
ASSERT(!usedUniform.isArrayOfArrays());
ASSERT(!usedUniform.isStruct());
ASSERT(usedUniform.active);
+ ASSERT(usedUniform.blockInfo == sh::kDefaultBlockMemberInfo);
// Note: Ensure every data member is initialized.
- type = usedUniform.type;
- precision = usedUniform.precision;
- imageUnitFormat = usedUniform.imageUnitFormat;
- location = usedUniform.location;
- binding = usedUniform.binding;
- offset = usedUniform.offset;
- bufferIndex = usedUniform.bufferIndex;
- blockInfo = usedUniform.blockInfo;
- activeVariable = usedUniform.activeVariable;
- id = usedUniform.id;
- flattenedOffsetInParentArrays = usedUniform.getFlattenedOffsetInParentArrays();
- outerArraySizeProduct = ArraySizeProduct(usedUniform.outerArraySizes);
- outerArrayOffset = usedUniform.outerArrayOffset;
- arraySize = usedUniform.isArray() ? usedUniform.getArraySizeProduct() : 1u;
+ flagBitsAsUByte = 0;
+ SetBitField(type, usedUniform.type);
+ SetBitField(precision, usedUniform.precision);
+ SetBitField(imageUnitFormat, usedUniform.imageUnitFormat);
+ location = usedUniform.location;
+ blockOffset = 0;
+ blockArrayStride = 0;
+ blockMatrixStride = 0;
+ SetBitField(binding, usedUniform.binding);
+ SetBitField(offset, usedUniform.offset);
- flagBitsAsUInt = 0;
- flagBits.isFragmentInOut = usedUniform.isFragmentInOut;
- flagBits.texelFetchStaticUse = usedUniform.texelFetchStaticUse;
- flagBits.isArray = usedUniform.isArray();
+ SetBitField(bufferIndex, usedUniform.bufferIndex);
+ SetBitField(parentArrayIndex, usedUniform.parentArrayIndex());
+ SetBitField(outerArraySizeProduct, ArraySizeProduct(usedUniform.outerArraySizes));
+ SetBitField(outerArrayOffset, usedUniform.outerArrayOffset);
+ SetBitField(arraySize, usedUniform.isArray() ? usedUniform.getArraySizeProduct() : 1u);
+ SetBitField(flagBits.isArray, usedUniform.isArray());
+
+ id = usedUniform.id;
+ mActiveUseBits = usedUniform.activeVariable.activeShaders();
+ mIds = usedUniform.activeVariable.getIds();
+
+ SetBitField(flagBits.isFragmentInOut, usedUniform.isFragmentInOut);
+ SetBitField(flagBits.texelFetchStaticUse, usedUniform.texelFetchStaticUse);
+ ASSERT(!usedUniform.isArray() || arraySize == usedUniform.getArraySizeProduct());
}
LinkedUniform::~LinkedUniform() {}
diff --git a/src/libANGLE/Uniform.h b/src/libANGLE/Uniform.h
index 7a5ff1e..40fc82c 100644
--- a/src/libANGLE/Uniform.h
+++ b/src/libANGLE/Uniform.h
@@ -23,6 +23,7 @@
class BinaryOutputStream;
struct UniformTypeInfo;
struct UsedUniform;
+struct LinkedUniform;
// Note: keep this struct memcpy-able: i.e, a simple struct with basic types only and no virtual
// functions. LinkedUniform relies on this so that it can use memcpy to initialize uniform for
@@ -40,7 +41,7 @@
return static_cast<ShaderType>(ScanForward(mActiveUseBits.bits()));
}
void setActive(ShaderType shaderType, bool used, uint32_t id);
- void unionReferencesWith(const ActiveVariable &other);
+ void unionReferencesWith(const LinkedUniform &otherUniform);
bool isActive(ShaderType shaderType) const
{
ASSERT(shaderType != ShaderType::InvalidEnum);
@@ -49,7 +50,6 @@
const ShaderMap<uint32_t> &getIds() const { return mIds; }
uint32_t getId(ShaderType shaderType) const { return mIds[shaderType]; }
ShaderBitSet activeShaders() const { return mActiveUseBits; }
- GLuint activeShaderCount() const { return static_cast<GLuint>(mActiveUseBits.count()); }
private:
ShaderBitSet mActiveUseBits;
@@ -62,6 +62,7 @@
// not put any std::vector or objects with virtual functions in it.
// Helper struct representing a single shader uniform. Most of this structure's data member and
// access functions mirrors ShaderVariable; See ShaderVars.h for more info.
+ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
struct LinkedUniform
{
LinkedUniform();
@@ -88,70 +89,82 @@
bool isFragmentInOut() const { return flagBits.isFragmentInOut; }
bool isArray() const { return flagBits.isArray; }
- unsigned int getBasicTypeElementCount() const
+ uint16_t getBasicTypeElementCount() const
{
ASSERT(flagBits.isArray || arraySize == 1u);
return arraySize;
}
GLenum getType() const { return type; }
- unsigned int getOuterArrayOffset() const { return outerArrayOffset; }
- unsigned int getOuterArraySizeProduct() const { return outerArraySizeProduct; }
- int getBinding() const { return binding; }
- int getOffset() const { return offset; }
- const sh::BlockMemberInfo &getBlockInfo() const { return blockInfo; }
+ uint16_t getOuterArrayOffset() const { return outerArrayOffset; }
+ uint16_t getOuterArraySizeProduct() const { return outerArraySizeProduct; }
+ int16_t getBinding() const { return binding; }
+ int16_t getOffset() const { return offset; }
int getBufferIndex() const { return bufferIndex; }
int getLocation() const { return location; }
GLenum getImageUnitFormat() const { return imageUnitFormat; }
- int parentArrayIndex() const
- {
- return flattenedOffsetInParentArrays != -1 ? flattenedOffsetInParentArrays : 0;
- }
-
ShaderType getFirstActiveShaderType() const
{
- return activeVariable.getFirstActiveShaderType();
+ return static_cast<ShaderType>(ScanForward(mActiveUseBits.bits()));
}
void setActive(ShaderType shaderType, bool used, uint32_t _id)
{
- activeVariable.setActive(shaderType, used, _id);
+ mActiveUseBits.set(shaderType, used);
+ mIds[shaderType] = id;
}
- bool isActive(ShaderType shaderType) const { return activeVariable.isActive(shaderType); }
- const ShaderMap<uint32_t> &getIds() const { return activeVariable.getIds(); }
- uint32_t getId(ShaderType shaderType) const { return activeVariable.getId(shaderType); }
- ShaderBitSet activeShaders() const { return activeVariable.activeShaders(); }
- GLuint activeShaderCount() const { return activeVariable.activeShaderCount(); }
+ bool isActive(ShaderType shaderType) const { return mActiveUseBits[shaderType]; }
+ const ShaderMap<uint32_t> &getIds() const { return mIds; }
+ uint32_t getId(ShaderType shaderType) const { return mIds[shaderType]; }
+ ShaderBitSet activeShaders() const { return mActiveUseBits; }
+ GLuint activeShaderCount() const { return static_cast<GLuint>(mActiveUseBits.count()); }
- sh::BlockMemberInfo blockInfo;
- ActiveVariable activeVariable;
+ uint16_t type;
+ uint16_t precision;
- GLenum type;
- GLenum precision;
- GLenum imageUnitFormat;
int location;
- int binding;
- int offset;
- uint32_t id;
- int flattenedOffsetInParentArrays;
- int bufferIndex;
- unsigned int outerArraySizeProduct;
- unsigned int outerArrayOffset;
- unsigned int arraySize;
+ // These are from sh::struct BlockMemberInfo struct. See locklayout.h for detail.
+ uint16_t blockOffset;
+ uint16_t blockArrayStride;
+
+ uint16_t blockMatrixStride;
+ uint16_t imageUnitFormat;
+
+ // maxUniformVectorsCount is 4K due to we clamp maxUniformBlockSize to 64KB. All of these
+ // variable should be enough to pack into 16 bits to reduce the size of mUniforms.
+ int16_t binding;
+ int16_t bufferIndex;
+
+ int16_t offset;
+ uint16_t arraySize;
+
+ uint16_t outerArraySizeProduct;
+ uint16_t outerArrayOffset;
+
+ uint16_t parentArrayIndex;
union
{
struct
{
- uint32_t isFragmentInOut : 1;
- uint32_t texelFetchStaticUse : 1;
- uint32_t isArray : 1;
- uint32_t padding : 29;
+ uint8_t isFragmentInOut : 1;
+ uint8_t texelFetchStaticUse : 1;
+ uint8_t isArray : 1;
+ uint8_t blockIsRowMajorMatrix : 1;
+ uint8_t isBlock : 1;
+ uint8_t padding : 3;
} flagBits;
-
- uint32_t flagBitsAsUInt;
+ uint8_t flagBitsAsUByte;
};
+ ShaderBitSet mActiveUseBits;
+
+ uint32_t id;
+
+ // The id of a linked variable in each shader stage. This id originates from
+ // sh::ShaderVariable::id or sh::InterfaceBlock::id
+ ShaderMap<uint32_t> mIds;
};
+ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
struct BufferVariable : public sh::ShaderVariable
{
@@ -195,9 +208,9 @@
{
activeVariable.setActive(shaderType, used, _id);
}
- void unionReferencesWith(const ActiveVariable &other)
+ void unionReferencesWith(const LinkedUniform &otherUniform)
{
- activeVariable.unionReferencesWith(other);
+ activeVariable.unionReferencesWith(otherUniform);
}
bool isActive(ShaderType shaderType) const { return activeVariable.isActive(shaderType); }
const ShaderMap<uint32_t> &getIds() const { return activeVariable.getIds(); }
diff --git a/src/libANGLE/queryutils.cpp b/src/libANGLE/queryutils.cpp
index 8db8bb7..3ff9535 100644
--- a/src/libANGLE/queryutils.cpp
+++ b/src/libANGLE/queryutils.cpp
@@ -1926,16 +1926,16 @@
return (uniform.isAtomicCounter() ? -1 : uniform.getBufferIndex());
case GL_OFFSET:
- return uniform.getBlockInfo().offset;
+ return uniform.flagBits.isBlock ? uniform.blockOffset : -1;
case GL_ARRAY_STRIDE:
- return uniform.getBlockInfo().arrayStride;
+ return uniform.flagBits.isBlock ? uniform.blockArrayStride : -1;
case GL_MATRIX_STRIDE:
- return uniform.getBlockInfo().matrixStride;
+ return uniform.flagBits.isBlock ? uniform.blockMatrixStride : -1;
case GL_IS_ROW_MAJOR:
- return static_cast<GLint>(uniform.getBlockInfo().isRowMajorMatrix);
+ return uniform.flagBits.blockIsRowMajorMatrix ? 1 : 0;
case GL_REFERENCED_BY_VERTEX_SHADER:
return uniform.isActive(ShaderType::Vertex);
diff --git a/src/libANGLE/renderer/d3d/ProgramD3D.cpp b/src/libANGLE/renderer/d3d/ProgramD3D.cpp
index f76e625..60b718a 100644
--- a/src/libANGLE/renderer/d3d/ProgramD3D.cpp
+++ b/src/libANGLE/renderer/d3d/ProgramD3D.cpp
@@ -2874,7 +2874,7 @@
std::vector<unsigned int> subscripts;
const std::string baseName = gl::ParseResourceName(d3dUniform->name, &subscripts);
unsigned int registerOffset =
- mState.getUniforms()[uniformIndex].parentArrayIndex() * d3dUniform->getArraySizeProduct();
+ mState.getUniforms()[uniformIndex].parentArrayIndex * d3dUniform->getArraySizeProduct();
bool hasUniform = false;
for (gl::ShaderType shaderType : gl::AllShaderTypes())
@@ -2976,7 +2976,7 @@
std::vector<unsigned int> subscripts;
const std::string baseName = gl::ParseResourceName(d3dUniform->name, &subscripts);
unsigned int registerOffset =
- mState.getUniforms()[uniformIndex].parentArrayIndex() * d3dUniform->getArraySizeProduct();
+ mState.getUniforms()[uniformIndex].parentArrayIndex * d3dUniform->getArraySizeProduct();
const gl::Shader *computeShader = mState.getAttachedShader(gl::ShaderType::Compute);
if (computeShader)
diff --git a/src/libANGLE/renderer/renderer_utils.h b/src/libANGLE/renderer/renderer_utils.h
index 65e3c3c..c795c3c 100644
--- a/src/libANGLE/renderer/renderer_utils.h
+++ b/src/libANGLE/renderer/renderer_utils.h
@@ -494,13 +494,4 @@
#define ANGLE_MARK_TRANSFORM_FEEDBACK_USAGE(instanced) \
ANGLE_MARK_TRANSFORM_FEEDBACK_USAGE##instanced
-// Helper macro that casts to a bitfield type then verifies no bits were dropped.
-#define SetBitField(lhs, rhs) \
- do \
- { \
- auto ANGLE_LOCAL_VAR = rhs; \
- lhs = static_cast<typename std::decay<decltype(lhs)>::type>(ANGLE_LOCAL_VAR); \
- ASSERT(static_cast<decltype(ANGLE_LOCAL_VAR)>(lhs) == ANGLE_LOCAL_VAR); \
- } while (0)
-
#endif // LIBANGLE_RENDERER_RENDERER_UTILS_H_