Reland "Workaround for Mac Intel drawArraysInstanced with first > 0"

This is a reland of 027bc47ca5b7b291fbda907173eefa05ad3d45a8

Original change's description:
> Workaround for Mac Intel drawArraysInstanced with first > 0
>
> Workaround by forcefully set instanced arrays (divisor > 0)
> as streaming attributes and apply extra offset at front. Recover
> those attribute bindings when first == 0 and other draw calls
> (drawElementsInstanced)
>
> Bug: chromium:1144207, chromium:1144247, chromium:1144373
> Change-Id: Ie7836cc71b45a290513f34f90d49bd15b14ddba8
> Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2661095
> Commit-Queue: Shrek Shao <shrekshao@google.com>
> Reviewed-by: Geoff Lang <geofflang@chromium.org>

Bug: chromium:1144207
Bug: chromium:1144247
Bug: chromium:1144373
Bug: angleproject:5271
Change-Id: Id0b818b25a605376c98c2366c1f2029e2490c6cb
Reviewed-on: https://chromium-review.googlesource.com/c/angle/angle/+/2704799
Commit-Queue: Shrek Shao <shrekshao@google.com>
Reviewed-by: Geoff Lang <geofflang@chromium.org>
diff --git a/include/platform/FeaturesGL.h b/include/platform/FeaturesGL.h
index 8e2145b..91c9cd1 100644
--- a/include/platform/FeaturesGL.h
+++ b/include/platform/FeaturesGL.h
@@ -522,6 +522,13 @@
     Feature initFragmentOutputVariables = {
         "init_fragment_output_variables", FeatureCategory::OpenGLWorkarounds,
         "No init gl_FragColor causes context lost", &members, "http://crbug.com/1171371"};
+
+    // On macOS with Intel GPUs, instanced array with divisor > 0 is buggy when first > 0 in
+    // drawArraysInstanced. Shift the attributes with extra offset to workaround.
+    Feature shiftInstancedArrayDataWithExtraOffset = {
+        "shift_instanced_array_data_with_offset", FeatureCategory::OpenGLWorkarounds,
+        "glDrawArraysInstanced is buggy on certain new Mac Intel GPUs", &members,
+        "http://crbug.com/1144207"};
 };
 
 inline FeaturesGL::FeaturesGL()  = default;
diff --git a/src/libANGLE/renderer/gl/ContextGL.cpp b/src/libANGLE/renderer/gl/ContextGL.cpp
index 39475fe..e6dc913 100644
--- a/src/libANGLE/renderer/gl/ContextGL.cpp
+++ b/src/libANGLE/renderer/gl/ContextGL.cpp
@@ -210,7 +210,9 @@
                                                          GLsizei count,
                                                          GLsizei instanceCount)
 {
-    if (context->getStateCache().hasAnyActiveClientAttrib())
+    const angle::FeaturesGL &features = getFeaturesGL();
+    if (context->getStateCache().hasAnyActiveClientAttrib() ||
+        (features.shiftInstancedArrayDataWithExtraOffset.enabled && first > 0))
     {
         const gl::State &glState                = context->getState();
         const gl::ProgramExecutable *executable = getState().getProgramExecutable();
@@ -224,8 +226,16 @@
         vaoGL->validateState(context);
 #endif  // ANGLE_STATE_VALIDATION_ENABLED
     }
+    else if (features.shiftInstancedArrayDataWithExtraOffset.enabled && first == 0)
+    {
+        // There could be previous draw call that has modified the attributes
+        // Instead of forcefully streaming attributes, we just rebind the original ones
+        const gl::State &glState   = context->getState();
+        const gl::VertexArray *vao = glState.getVertexArray();
+        const VertexArrayGL *vaoGL = GetImplAs<VertexArrayGL>(vao);
+        vaoGL->recoverForcedStreamingAttributesForDrawArraysInstanced(context);
+    }
 
-    const angle::FeaturesGL &features = getFeaturesGL();
     if (features.setPrimitiveRestartFixedIndexForDrawArrays.enabled)
     {
         StateManagerGL *stateManager           = getStateManager();
@@ -248,6 +258,15 @@
     const gl::VertexArray *vao              = glState.getVertexArray();
     const gl::StateCache &stateCache        = context->getStateCache();
 
+    const angle::FeaturesGL &features = getFeaturesGL();
+    if (features.shiftInstancedArrayDataWithExtraOffset.enabled)
+    {
+        // There might be instanced arrays that are forced streaming for drawArraysInstanced
+        // They cannot be ELEMENT_ARRAY_BUFFER
+        const VertexArrayGL *vaoGL = GetImplAs<VertexArrayGL>(vao);
+        vaoGL->recoverForcedStreamingAttributesForDrawArraysInstanced(context);
+    }
+
     if (stateCache.hasAnyActiveClientAttrib() || vao->getElementArrayBuffer() == nullptr)
     {
         const VertexArrayGL *vaoGL = GetImplAs<VertexArrayGL>(vao);
@@ -260,7 +279,6 @@
         *outIndices = indices;
     }
 
-    const angle::FeaturesGL &features = getFeaturesGL();
     if (glState.isPrimitiveRestartEnabled() && features.emulatePrimitiveRestartFixedIndex.enabled)
     {
         StateManagerGL *stateManager = getStateManager();
diff --git a/src/libANGLE/renderer/gl/RendererGL.cpp b/src/libANGLE/renderer/gl/RendererGL.cpp
index c11ec9c..53ba7f8 100644
--- a/src/libANGLE/renderer/gl/RendererGL.cpp
+++ b/src/libANGLE/renderer/gl/RendererGL.cpp
@@ -457,4 +457,9 @@
     return mValid;
 }
 
+void RendererGL::handleGPUSwitch()
+{
+    nativegl_gl::ReInitializeFeaturesAtGPUSwitch(mFunctions.get(), &mFeatures);
+}
+
 }  // namespace rx
diff --git a/src/libANGLE/renderer/gl/RendererGL.h b/src/libANGLE/renderer/gl/RendererGL.h
index 983361e..692bca6 100644
--- a/src/libANGLE/renderer/gl/RendererGL.h
+++ b/src/libANGLE/renderer/gl/RendererGL.h
@@ -135,6 +135,8 @@
     void setNeedsFlushBeforeDeleteTextures();
     void flushIfNecessaryBeforeDeleteTextures();
 
+    void handleGPUSwitch();
+
   protected:
     virtual WorkerContext *createWorkerContext(std::string *infoLog) = 0;
 
diff --git a/src/libANGLE/renderer/gl/VertexArrayGL.cpp b/src/libANGLE/renderer/gl/VertexArrayGL.cpp
index f04c010..15b84e8 100644
--- a/src/libANGLE/renderer/gl/VertexArrayGL.cpp
+++ b/src/libANGLE/renderer/gl/VertexArrayGL.cpp
@@ -103,6 +103,7 @@
     {
         mAppliedAttributes.emplace_back(i);
     }
+    mForcedStreamingAttributesFirstOffsets.fill(0);
 }
 
 VertexArrayGL::~VertexArrayGL() {}
@@ -170,6 +171,7 @@
     // Determine if an index buffer needs to be streamed and the range of vertices that need to be
     // copied
     IndexRange indexRange;
+    const angle::FeaturesGL &features = GetFeaturesGL(context);
     if (type != gl::DrawElementsType::InvalidEnum)
     {
         ANGLE_TRY(syncIndexData(context, count, type, indices, primitiveRestartEnabled,
@@ -180,11 +182,47 @@
         // Not an indexed call, set the range to [first, first + count - 1]
         indexRange.start = first;
         indexRange.end   = first + count - 1;
+
+        if (features.shiftInstancedArrayDataWithExtraOffset.enabled && first > 0)
+        {
+            gl::AttributesMask updatedStreamingAttribsMask = needsStreamingAttribs;
+            auto candidateAttributesMask =
+                mInstancedAttributesMask & mProgramActiveAttribLocationsMask;
+            for (auto attribIndex : candidateAttributesMask)
+            {
+
+                if (mForcedStreamingAttributesFirstOffsets[attribIndex] != first)
+                {
+                    updatedStreamingAttribsMask.set(attribIndex);
+                    mForcedStreamingAttributesForDrawArraysInstancedMask.set(attribIndex);
+                    mForcedStreamingAttributesFirstOffsets[attribIndex] = first;
+                }
+            }
+
+            // We need to recover attributes whose divisor used to be > 0 but is reset to 0 now if
+            // any
+            auto forcedStreamingAttributesNeedRecoverMask =
+                candidateAttributesMask ^ mForcedStreamingAttributesForDrawArraysInstancedMask;
+            if (forcedStreamingAttributesNeedRecoverMask.any())
+            {
+                recoverForcedStreamingAttributesForDrawArraysInstanced(
+                    context, &forcedStreamingAttributesNeedRecoverMask);
+                mForcedStreamingAttributesForDrawArraysInstancedMask = candidateAttributesMask;
+            }
+
+            if (updatedStreamingAttribsMask.any())
+            {
+                ANGLE_TRY(streamAttributes(context, updatedStreamingAttribsMask, instanceCount,
+                                           indexRange, true));
+            }
+            return angle::Result::Continue;
+        }
     }
 
     if (needsStreamingAttribs.any())
     {
-        ANGLE_TRY(streamAttributes(context, needsStreamingAttribs, instanceCount, indexRange));
+        ANGLE_TRY(
+            streamAttributes(context, needsStreamingAttribs, instanceCount, indexRange, false));
     }
 
     return angle::Result::Continue;
@@ -302,10 +340,12 @@
     }
 }
 
-angle::Result VertexArrayGL::streamAttributes(const gl::Context *context,
-                                              const gl::AttributesMask &attribsToStream,
-                                              GLsizei instanceCount,
-                                              const gl::IndexRange &indexRange) const
+angle::Result VertexArrayGL::streamAttributes(
+    const gl::Context *context,
+    const gl::AttributesMask &attribsToStream,
+    GLsizei instanceCount,
+    const gl::IndexRange &indexRange,
+    bool applyExtraOffsetWorkaroundForInstancedAttributes) const
 {
     const FunctionsGL *functions = GetFunctionsGL(context);
     StateManagerGL *stateManager = GetStateManagerGL(context);
@@ -329,8 +369,9 @@
     }
 
     // If first is greater than zero, a slack space needs to be left at the beginning of the buffer
-    // so that the same 'first' argument can be passed into the draw call.
-    const size_t bufferEmptySpace   = maxAttributeDataSize * indexRange.start;
+    // for each attribute so that the same 'first' argument can be passed into the draw call.
+    const size_t bufferEmptySpace =
+        attribsToStream.count() * maxAttributeDataSize * indexRange.start;
     const size_t requiredBufferSize = streamingDataSize + bufferEmptySpace;
 
     stateManager->bindBuffer(gl::BufferBinding::Array, mStreamingArrayBuffer);
@@ -351,7 +392,7 @@
     {
         uint8_t *bufferPointer = MapBufferRangeWithFallback(functions, GL_ARRAY_BUFFER, 0,
                                                             requiredBufferSize, GL_MAP_WRITE_BIT);
-        size_t curBufferOffset = bufferEmptySpace;
+        size_t curBufferOffset = maxAttributeDataSize * indexRange.start;
 
         const auto &attribs  = mState.getVertexAttributes();
         const auto &bindings = mState.getVertexBindings();
@@ -364,7 +405,9 @@
             const auto &binding = bindings[attrib.bindingIndex];
 
             GLuint adjustedDivisor = GetAdjustedDivisor(mAppliedNumViews, binding.getDivisor());
-            const size_t streamedVertexCount = ComputeVertexBindingElementCount(
+            // streamedVertexCount is only going to be modified by
+            // shiftInstancedArrayDataWithExtraOffset workaround, otherwise it's const
+            size_t streamedVertexCount = ComputeVertexBindingElementCount(
                 adjustedDivisor, indexRange.vertexCount(), instanceCount);
 
             const size_t sourceStride = ComputeVertexAttributeStride(attrib, binding);
@@ -372,31 +415,83 @@
 
             // Vertices do not apply the 'start' offset when the divisor is non-zero even when doing
             // a non-instanced draw call
-            const size_t firstIndex = adjustedDivisor == 0 ? indexRange.start : 0;
+            const size_t firstIndex =
+                (adjustedDivisor == 0 || applyExtraOffsetWorkaroundForInstancedAttributes)
+                    ? indexRange.start
+                    : 0;
 
             // Attributes using client memory ignore the VERTEX_ATTRIB_BINDING state.
             // https://www.opengl.org/registry/specs/ARB/vertex_attrib_binding.txt
             const uint8_t *inputPointer = static_cast<const uint8_t *>(attrib.pointer);
+            // store batchMemcpySize since streamedVertexCount could be changed by workaround
+            const size_t batchMemcpySize = destStride * streamedVertexCount;
+
+            size_t batchMemcpyInputOffset                    = sourceStride * firstIndex;
+            bool needsUnmapAndRebindStreamingAttributeBuffer = false;
+            size_t firstIndexForSeparateCopy                 = firstIndex;
+
+            if (applyExtraOffsetWorkaroundForInstancedAttributes && adjustedDivisor > 0)
+            {
+                const size_t originalStreamedVertexCount = streamedVertexCount;
+                streamedVertexCount =
+                    (instanceCount + indexRange.start + adjustedDivisor - 1u) / adjustedDivisor;
+
+                const size_t copySize =
+                    sourceStride *
+                    originalStreamedVertexCount;  // the real data in the buffer we are streaming
+
+                const gl::Buffer *bindingBufferPointer = binding.getBuffer().get();
+                if (!bindingBufferPointer)
+                {
+                    if (!inputPointer)
+                    {
+                        continue;
+                    }
+                    inputPointer = static_cast<const uint8_t *>(attrib.pointer);
+                }
+                else
+                {
+                    needsUnmapAndRebindStreamingAttributeBuffer = true;
+                    const auto buffer = GetImplAs<BufferGL>(bindingBufferPointer);
+                    stateManager->bindBuffer(gl::BufferBinding::Array, buffer->getBufferID());
+                    // The workaround is only for latest Mac Intel so glMapBufferRange should be
+                    // supported
+                    ASSERT(CanMapBufferForRead(functions));
+                    uint8_t *inputBufferPointer = MapBufferRangeWithFallback(
+                        functions, GL_ARRAY_BUFFER, binding.getOffset(), copySize, GL_MAP_READ_BIT);
+                    ASSERT(inputBufferPointer);
+                    inputPointer = inputBufferPointer;
+                }
+
+                batchMemcpyInputOffset    = 0;
+                firstIndexForSeparateCopy = 0;
+            }
 
             // Pack the data when copying it, user could have supplied a very large stride that
             // would cause the buffer to be much larger than needed.
             if (destStride == sourceStride)
             {
                 // Can copy in one go, the data is packed
-                memcpy(bufferPointer + curBufferOffset, inputPointer + (sourceStride * firstIndex),
-                       destStride * streamedVertexCount);
+                memcpy(bufferPointer + curBufferOffset, inputPointer + batchMemcpyInputOffset,
+                       batchMemcpySize);
             }
             else
             {
-                // Copy each vertex individually
                 for (size_t vertexIdx = 0; vertexIdx < streamedVertexCount; vertexIdx++)
                 {
-                    uint8_t *out      = bufferPointer + curBufferOffset + (destStride * vertexIdx);
-                    const uint8_t *in = inputPointer + sourceStride * (vertexIdx + firstIndex);
+                    uint8_t *out = bufferPointer + curBufferOffset + (destStride * vertexIdx);
+                    const uint8_t *in =
+                        inputPointer + sourceStride * (vertexIdx + firstIndexForSeparateCopy);
                     memcpy(out, in, destStride);
                 }
             }
 
+            if (needsUnmapAndRebindStreamingAttributeBuffer)
+            {
+                ANGLE_GL_TRY(context, functions->unmapBuffer(GL_ARRAY_BUFFER));
+                stateManager->bindBuffer(gl::BufferBinding::Array, mStreamingArrayBuffer);
+            }
+
             // Compute where the 0-index vertex would be.
             const size_t vertexStartOffset = curBufferOffset - (firstIndex * destStride);
 
@@ -414,7 +509,10 @@
             mAppliedBindings[idx].setOffset(static_cast<GLintptr>(vertexStartOffset));
             mAppliedBindings[idx].setBuffer(context, nullptr);
 
-            curBufferOffset += destStride * streamedVertexCount;
+            // There's maxAttributeDataSize * indexRange.start of empty space allocated for each
+            // streaming attributes
+            curBufferOffset +=
+                destStride * streamedVertexCount + maxAttributeDataSize * indexRange.start;
         }
 
         unmapResult = functions->unmapBuffer(GL_ARRAY_BUFFER);
@@ -425,6 +523,56 @@
     return angle::Result::Continue;
 }
 
+void VertexArrayGL::recoverForcedStreamingAttributesForDrawArraysInstanced(
+    const gl::Context *context) const
+{
+    recoverForcedStreamingAttributesForDrawArraysInstanced(
+        context, &mForcedStreamingAttributesForDrawArraysInstancedMask);
+}
+
+void VertexArrayGL::recoverForcedStreamingAttributesForDrawArraysInstanced(
+    const gl::Context *context,
+    gl::AttributesMask *attributeMask) const
+{
+    if (attributeMask->none())
+    {
+        return;
+    }
+
+    StateManagerGL *stateManager = GetStateManagerGL(context);
+
+    stateManager->bindVertexArray(mVertexArrayID, getAppliedElementArrayBufferID());
+
+    const auto &attribs  = mState.getVertexAttributes();
+    const auto &bindings = mState.getVertexBindings();
+    for (auto idx : *attributeMask)
+    {
+        const auto &attrib = attribs[idx];
+        ASSERT(IsVertexAttribPointerSupported(idx, attrib));
+
+        const auto &binding = bindings[attrib.bindingIndex];
+        const auto buffer   = GetImplAs<BufferGL>(binding.getBuffer().get());
+        stateManager->bindBuffer(gl::BufferBinding::Array, buffer->getBufferID());
+
+        callVertexAttribPointer(context, static_cast<GLuint>(idx), attrib,
+                                static_cast<GLsizei>(binding.getStride()),
+                                static_cast<GLintptr>(binding.getOffset()));
+
+        // Restore the state to track their original buffers
+        mAppliedAttributes[idx].format = attrib.format;
+
+        mAppliedAttributes[idx].relativeOffset = 0;
+        mAppliedAttributes[idx].bindingIndex   = static_cast<GLuint>(attrib.bindingIndex);
+
+        mAppliedBindings[idx].setStride(binding.getStride());
+        mAppliedBindings[idx].setOffset(binding.getOffset());
+        mAppliedBindings[idx].setBuffer(context, binding.getBuffer().get());
+    }
+
+    attributeMask->reset();
+    mForcedStreamingAttributesFirstOffsets.fill(0);
+}
+
 GLuint VertexArrayGL::getVertexArrayID() const
 {
     return mVertexArrayID;
@@ -648,6 +796,16 @@
     }
 
     mAppliedBindings[bindingIndex].setDivisor(adjustedDivisor);
+
+    if (adjustedDivisor > 0)
+    {
+        mInstancedAttributesMask.set(bindingIndex);
+    }
+    else if (mInstancedAttributesMask.test(bindingIndex))
+    {
+        // divisor is reset to 0
+        mInstancedAttributesMask.reset(bindingIndex);
+    }
 }
 
 void VertexArrayGL::syncDirtyAttrib(const gl::Context *context,
diff --git a/src/libANGLE/renderer/gl/VertexArrayGL.h b/src/libANGLE/renderer/gl/VertexArrayGL.h
index 7c35122..a038612 100644
--- a/src/libANGLE/renderer/gl/VertexArrayGL.h
+++ b/src/libANGLE/renderer/gl/VertexArrayGL.h
@@ -57,6 +57,8 @@
 
     void validateState(const gl::Context *context) const;
 
+    void recoverForcedStreamingAttributesForDrawArraysInstanced(const gl::Context *context) const;
+
   private:
     angle::Result syncDrawState(const gl::Context *context,
                                 const gl::AttributesMask &activeAttributesMask,
@@ -90,7 +92,8 @@
     angle::Result streamAttributes(const gl::Context *context,
                                    const gl::AttributesMask &attribsToStream,
                                    GLsizei instanceCount,
-                                   const gl::IndexRange &indexRange) const;
+                                   const gl::IndexRange &indexRange,
+                                   bool applyExtraOffsetWorkaroundForInstancedAttributes) const;
     void syncDirtyAttrib(const gl::Context *context,
                          size_t attribIndex,
                          const gl::VertexArray::DirtyAttribBits &dirtyAttribBits);
@@ -116,6 +119,10 @@
                                  GLsizei stride,
                                  GLintptr offset) const;
 
+    void recoverForcedStreamingAttributesForDrawArraysInstanced(
+        const gl::Context *context,
+        gl::AttributesMask *attributeMask) const;
+
     GLuint mVertexArrayID;
     int mAppliedNumViews;
 
@@ -133,6 +140,11 @@
 
     mutable size_t mStreamingArrayBufferSize;
     mutable GLuint mStreamingArrayBuffer;
+
+    // Used for Mac Intel instanced draw workaround
+    mutable gl::AttributesMask mForcedStreamingAttributesForDrawArraysInstancedMask;
+    mutable gl::AttributesMask mInstancedAttributesMask;
+    mutable std::array<GLint, gl::MAX_VERTEX_ATTRIBS> mForcedStreamingAttributesFirstOffsets;
 };
 
 ANGLE_INLINE angle::Result VertexArrayGL::syncDrawElementsState(
diff --git a/src/libANGLE/renderer/gl/cgl/DisplayCGL.mm b/src/libANGLE/renderer/gl/cgl/DisplayCGL.mm
index 811fadd..e818a65 100644
--- a/src/libANGLE/renderer/gl/cgl/DisplayCGL.mm
+++ b/src/libANGLE/renderer/gl/cgl/DisplayCGL.mm
@@ -658,6 +658,8 @@
             CGLSetCurrentContext(mContext);
             onStateChange(angle::SubjectMessage::SubjectChanged);
             mCurrentGPUID = gpuID;
+
+            mRenderer->handleGPUSwitch();
         }
     }
 
diff --git a/src/libANGLE/renderer/gl/renderergl_utils.cpp b/src/libANGLE/renderer/gl/renderergl_utils.cpp
index ddcad4a..f23f191 100644
--- a/src/libANGLE/renderer/gl/renderergl_utils.cpp
+++ b/src/libANGLE/renderer/gl/renderergl_utils.cpp
@@ -1647,23 +1647,33 @@
     extensions->yuvTargetEXT = functions->hasGLESExtension("GL_EXT_YUV_target");
 }
 
+bool GetSystemInfoVendorIDAndDeviceID(const FunctionsGL *functions,
+                                      angle::SystemInfo *outSystemInfo,
+                                      angle::VendorID *outVendor,
+                                      angle::DeviceID *outDevice)
+{
+    bool isGetSystemInfoSuccess = angle::GetSystemInfo(outSystemInfo);
+    if (isGetSystemInfoSuccess && !outSystemInfo->gpus.empty())
+    {
+        *outVendor = outSystemInfo->gpus[outSystemInfo->activeGPUIndex].vendorId;
+        *outDevice = outSystemInfo->gpus[outSystemInfo->activeGPUIndex].deviceId;
+    }
+    else
+    {
+        *outVendor = GetVendorID(functions);
+        *outDevice = GetDeviceID(functions);
+    }
+    return isGetSystemInfoSuccess;
+}
+
 void InitializeFeatures(const FunctionsGL *functions, angle::FeaturesGL *features)
 {
     angle::VendorID vendor;
     angle::DeviceID device;
-
     angle::SystemInfo systemInfo;
-    bool isGetSystemInfoSuccess = angle::GetSystemInfo(&systemInfo);
-    if (isGetSystemInfoSuccess && !systemInfo.gpus.empty())
-    {
-        vendor = systemInfo.gpus[systemInfo.activeGPUIndex].vendorId;
-        device = systemInfo.gpus[systemInfo.activeGPUIndex].deviceId;
-    }
-    else
-    {
-        vendor = GetVendorID(functions);
-        device = GetDeviceID(functions);
-    }
+
+    bool isGetSystemInfoSuccess =
+        GetSystemInfoVendorIDAndDeviceID(functions, &systemInfo, &vendor, &device);
 
     bool isAMD      = IsAMD(vendor);
     bool isIntel    = IsIntel(vendor);
@@ -1931,6 +1941,12 @@
     // If output variable gl_FragColor is written by fragment shader, it may cause context lost with
     // Adreno 42x and 3xx.
     ANGLE_FEATURE_CONDITION(features, initFragmentOutputVariables, IsAdreno42xOr3xx(functions));
+
+    // http://crbug.com/1144207
+    // The Mac bot with Intel Iris GPU seems unaffected by this bug. Exclude the Haswell family for
+    // now.
+    ANGLE_FEATURE_CONDITION(features, shiftInstancedArrayDataWithExtraOffset,
+                            IsApple() && IsIntel(vendor) && !IsHaswell(device));
 }
 
 void InitializeFrontendFeatures(const FunctionsGL *functions, angle::FrontendFeatures *features)
@@ -1943,6 +1959,22 @@
     ANGLE_FEATURE_CONDITION(features, syncFramebufferBindingsOnTexImage, false);
 }
 
+void ReInitializeFeaturesAtGPUSwitch(const FunctionsGL *functions, angle::FeaturesGL *features)
+{
+    angle::VendorID vendor;
+    angle::DeviceID device;
+    angle::SystemInfo systemInfo;
+
+    GetSystemInfoVendorIDAndDeviceID(functions, &systemInfo, &vendor, &device);
+
+    // http://crbug.com/1144207
+    // The Mac bot with Intel Iris GPU seems unaffected by this bug. Exclude the Haswell family for
+    // now.
+    // We need to reinitialize this feature when switching between buggy and non-buggy GPUs.
+    ANGLE_FEATURE_CONDITION(features, shiftInstancedArrayDataWithExtraOffset,
+                            IsApple() && IsIntel(vendor) && !IsHaswell(device));
+}
+
 }  // namespace nativegl_gl
 
 namespace nativegl
diff --git a/src/libANGLE/renderer/gl/renderergl_utils.h b/src/libANGLE/renderer/gl/renderergl_utils.h
index 1a2d87a..7fad4dc 100644
--- a/src/libANGLE/renderer/gl/renderergl_utils.h
+++ b/src/libANGLE/renderer/gl/renderergl_utils.h
@@ -106,6 +106,7 @@
 
 void InitializeFeatures(const FunctionsGL *functions, angle::FeaturesGL *features);
 void InitializeFrontendFeatures(const FunctionsGL *functions, angle::FrontendFeatures *features);
+void ReInitializeFeaturesAtGPUSwitch(const FunctionsGL *functions, angle::FeaturesGL *features);
 }  // namespace nativegl_gl
 
 namespace nativegl
diff --git a/src/tests/gl_tests/InstancingTest.cpp b/src/tests/gl_tests/InstancingTest.cpp
index a9b5639..9aa18aa 100644
--- a/src/tests/gl_tests/InstancingTest.cpp
+++ b/src/tests/gl_tests/InstancingTest.cpp
@@ -122,10 +122,6 @@
         // Unknown problem.  FL9_3 is not officially supported anyway.
         ANGLE_SKIP_TEST_IF(IsD3D11_FL93() && geometry == Quad && draw == NonIndexed);
 
-        // http://anglebug.com/5271
-        ANGLE_SKIP_TEST_IF(IsOSX() && IsIntelUHD630Mobile() && IsDesktopOpenGL() &&
-                           draw == NonIndexed && offset != 0);
-
         // The window is divided into kMaxDrawn slices of size kDrawSize.
         // The slice drawn into is determined by the instance datum.
         // The instance data array selects all the slices in order.