diff --git a/.appveyor.yml b/.appveyor.yml
index e7c4282..32e8f2b 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -66,7 +66,6 @@
     bin\glslangValidator.exe
     bin\spirv-remap.exe
     include\glslang\*
-    include\SPIRV\*
     lib\glslang%SUFFIX%.lib
     lib\HLSL%SUFFIX%.lib
     lib\OGLCompiler%SUFFIX%.lib
diff --git a/.travis.yml b/.travis.yml
index 2478912..1fa3fc0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -99,7 +99,6 @@
       zip ${TARBALL}
         bin/glslangValidator
         include/glslang/*
-        include/SPIRV/*
         lib/libglslang${SUFFIX}.a
         lib/libHLSL${SUFFIX}.a
         lib/libOGLCompiler${SUFFIX}.a
diff --git a/BUILD.bazel b/BUILD.bazel
new file mode 100644
index 0000000..b1a1967
--- /dev/null
+++ b/BUILD.bazel
@@ -0,0 +1,245 @@
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+# Description:
+#
+# Khronos reference front-end for GLSL and ESSL, and sample SPIR-V generator.
+
+licenses(["notice"])  # Mixed: BSD, MIT, Khronos, Apache 2.0
+
+exports_files(["LICENSE"])
+
+COMMON_COPTS = select({
+    "@bazel_tools//src/conditions:windows": [""],
+    "//conditions:default": [
+        "-Wall",
+        "-Wuninitialized",
+        "-Wunused",
+        "-Wunused-local-typedefs",
+        "-Wunused-parameter",
+        "-Wunused-value",
+        "-Wunused-variable",
+        "-Wno-reorder",
+        "-std=c++11",
+        "-fvisibility=hidden",
+        "-fvisibility-inlines-hidden",
+        "-fno-exceptions",
+        "-fno-rtti",
+    ],
+})
+
+cc_library(
+    name = "glslang",
+    srcs = glob(
+        [
+            "glslang/GenericCodeGen/*.cpp",
+            "glslang/MachineIndependent/*.cpp",
+            "glslang/MachineIndependent/preprocessor/*.cpp",
+            "hlsl/*.cpp",
+        ],
+        exclude = [
+            "glslang/MachineIndependent/pch.cpp",
+            "glslang/MachineIndependent/pch.h",
+            "hlsl/pch.cpp",
+            "hlsl/pch.h",
+        ],
+    ) + [
+        "OGLCompilersDLL/InitializeDll.cpp",
+    ] + select({
+        "@bazel_tools//src/conditions:windows":
+            ["glslang/OSDependent/Windows/ossource.cpp"],
+        "//conditions:default":
+            ["glslang/OSDependent/Unix/ossource.cpp"],
+    }),
+    hdrs = glob([
+        "glslang/Include/*.h",
+        "glslang/MachineIndependent/*.h",
+        "glslang/MachineIndependent/preprocessor/*.h",
+        "hlsl/*.h",
+    ]) + [
+        "OGLCompilersDLL/InitializeDll.h",
+        "StandAlone/DirStackFileIncluder.h",
+        "glslang/OSDependent/osinclude.h",
+        "glslang/Public/ShaderLang.h",
+    ],
+    copts = COMMON_COPTS,
+    defines = [
+        "AMD_EXTENSIONS",
+        "ENABLE_HLSL=0",
+        "ENABLE_OPT=0",
+        "NV_EXTENSIONS",
+    ],
+    linkopts = select({
+        "@bazel_tools//src/conditions:windows": [""],
+        "//conditions:default": ["-lm", "-lpthread"],
+    }),
+    linkstatic = 1,
+)
+
+genrule(
+    name = "export_spirv_headers",
+    srcs = [
+        "SPIRV/GLSL.ext.AMD.h",
+        "SPIRV/GLSL.ext.EXT.h",
+        "SPIRV/GLSL.ext.KHR.h",
+        "SPIRV/GLSL.ext.NV.h",
+        "SPIRV/GLSL.std.450.h",
+        "SPIRV/spirv.hpp",
+    ],
+    outs = [
+        "include/SPIRV/GLSL.ext.AMD.h",
+        "include/SPIRV/GLSL.ext.EXT.h",
+        "include/SPIRV/GLSL.ext.KHR.h",
+        "include/SPIRV/GLSL.ext.NV.h",
+        "include/SPIRV/GLSL.std.450.h",
+        "include/SPIRV/spirv.hpp",
+    ],
+    cmd = "mkdir -p $(@D)/include/SPIRV && cp $(SRCS) $(@D)/include/SPIRV/",
+)
+
+cc_library(
+    name = "SPIRV_headers",
+    hdrs = [":export_spirv_headers"],
+    copts = COMMON_COPTS,
+    includes = [
+        "include",
+        "include/SPIRV",
+    ],
+    linkstatic = 1,
+)
+
+cc_library(
+    name = "SPIRV",
+    srcs = glob(
+        ["SPIRV/*.cpp"],
+        exclude = [
+            "SPIRV/SpvTools.cpp",
+        ],
+    ),
+    hdrs = [
+        "SPIRV/GlslangToSpv.h",
+        "SPIRV/Logger.h",
+        "SPIRV/SPVRemapper.h",
+        "SPIRV/SpvBuilder.h",
+        "SPIRV/SpvTools.h",
+        "SPIRV/bitutils.h",
+        "SPIRV/disassemble.h",
+        "SPIRV/doc.h",
+        "SPIRV/hex_float.h",
+        "SPIRV/spvIR.h",
+    ],
+    copts = COMMON_COPTS,
+    includes = ["SPIRV"],
+    linkopts = select({
+        "@bazel_tools//src/conditions:windows": [""],
+        "//conditions:default": ["-lm"],
+    }),
+    linkstatic = 1,
+    deps = [
+        ":SPIRV_headers",
+        ":glslang",
+    ],
+)
+
+cc_library(
+    name = "glslang-default-resource-limits",
+    srcs = ["StandAlone/ResourceLimits.cpp"],
+    hdrs = ["StandAlone/ResourceLimits.h"],
+    copts = COMMON_COPTS,
+    linkstatic = 1,
+    deps = [":glslang"],
+)
+
+cc_binary(
+    name = "glslangValidator",
+    srcs = [
+        "StandAlone/StandAlone.cpp",
+        "StandAlone/Worklist.h",
+    ],
+    copts = COMMON_COPTS,
+    deps = [
+        ":SPIRV",
+        ":glslang",
+        ":glslang-default-resource-limits",
+    ],
+)
+
+cc_binary(
+    name = "spirv-remap",
+    srcs = ["StandAlone/spirv-remap.cpp"],
+    copts = COMMON_COPTS,
+    deps = [
+        ":SPIRV",
+        ":glslang",
+        ":glslang-default-resource-limits",
+    ],
+)
+
+filegroup(
+    name = "test_files",
+    srcs = glob(
+        ["Test/**"],
+        exclude = [
+            "Test/bump",
+            "Test/glslangValidator",
+            "Test/runtests",
+        ],
+    ),
+)
+
+cc_library(
+    name = "glslang_test_lib",
+    testonly = 1,
+    srcs = [
+        "gtests/HexFloat.cpp",
+        "gtests/Initializer.h",
+        "gtests/Settings.cpp",
+        "gtests/Settings.h",
+        "gtests/TestFixture.cpp",
+        "gtests/TestFixture.h",
+        "gtests/main.cpp",
+    ],
+    copts = COMMON_COPTS,
+    data = [":test_files"],
+    defines = select({
+        # Unfortunately we can't use $(location) in cc_library at the moment.
+        # See https://github.com/bazelbuild/bazel/issues/1023
+        # So we'll specify the path manually.
+        "@bazel_tools//src/conditions:windows":
+            ["GLSLANG_TEST_DIRECTORY='\"../../../../../Test\"'"],
+        "//conditions:default":
+            ["GLSLANG_TEST_DIRECTORY='\"Test\"'"],
+    }),
+    linkstatic = 1,
+    deps = [
+        ":SPIRV",
+        ":glslang",
+        ":glslang-default-resource-limits",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
+GLSLANG_TESTS = glob(
+    ["gtests/*.FromFile.cpp"],
+    # Since we are not building the SPIRV-Tools dependency, the following tests
+    # cannot be performed.
+    exclude = [
+        "gtests/Hlsl.FromFile.cpp",
+        "gtests/Spv.FromFile.cpp",
+    ],
+)
+
+[cc_test(
+    name = test_file.replace("gtests/", "").replace(".FromFile.cpp", "") + "_test",
+    srcs = [test_file],
+    copts = COMMON_COPTS,
+    data = [
+        ":test_files",
+    ],
+    deps = [
+        ":SPIRV",
+        ":glslang",
+        ":glslang_test_lib",
+    ],
+) for test_file in GLSLANG_TESTS]
diff --git a/BUILD.gn b/BUILD.gn
index be9e1ab..77d596e 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -159,7 +159,7 @@
   }
 
   if (is_clang) {
-    cflags_cc = [
+    cflags = [
       "-Wno-extra-semi",
       "-Wno-ignored-qualifiers",
       "-Wno-implicit-fallthrough",
@@ -167,6 +167,7 @@
       "-Wno-sign-compare",
       "-Wno-unused-variable",
       "-Wno-missing-field-initializers",
+      "-Wno-newline-eof",
     ]
   }
   if (is_win && !is_clang) {
@@ -180,6 +181,9 @@
     "${spirv_tools_dir}:spvtools_opt",
     "${spirv_tools_dir}:spvtools_val",
   ]
+
+  configs -= [ "//build/config/compiler:chromium_code" ]
+  configs += [ "//build/config/compiler:no_chromium_code" ]
 }
 
 source_set("glslang_default_resource_limits_sources") {
@@ -191,6 +195,9 @@
     ":glslang_sources",
   ]
   public_configs = [ ":glslang_public" ]
+
+  configs -= [ "//build/config/compiler:chromium_code" ]
+  configs += [ "//build/config/compiler:no_chromium_code" ]
 }
 
 executable("glslang_validator") {
@@ -206,6 +213,9 @@
     ":glslang_default_resource_limits_sources",
     ":glslang_sources",
   ]
+
+  configs -= [ "//build/config/compiler:chromium_code" ]
+  configs += [ "//build/config/compiler:no_chromium_code" ]
 }
 
 executable("spirv-remap") {
@@ -216,4 +226,7 @@
   deps = [
     ":glslang_sources",
   ]
+
+  configs -= [ "//build/config/compiler:chromium_code" ]
+  configs += [ "//build/config/compiler:no_chromium_code" ]
 }
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 196194d..dcb2d0d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,6 +6,9 @@
 endif()
 set_property(GLOBAL PROPERTY USE_FOLDERS ON)
 
+# Enable compile commands database
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
 # Adhere to GNU filesystem layout conventions
 include(GNUInstallDirs)
 
@@ -13,6 +16,7 @@
 include(CMakeDependentOption)
 
 option(BUILD_SHARED_LIBS "Build Shared Libraries" OFF)
+option(BUILD_EXTERNAL "Build external dependencies in /External" ON)
 
 set(LIB_TYPE STATIC)
 
@@ -28,13 +32,15 @@
 
 option(ENABLE_GLSLANG_BINARIES "Builds glslangValidator and spirv-remap" ON)
 
-option(ENABLE_GLSLANG_WEB "Reduces glslang to minumum needed for web use" OFF)
-option(ENABLE_EMSCRIPTEN_SINGLE_FILE "If using emscripten, enables SINGLE_FILE build" OFF)
-option(ENABLE_EMSCRIPTEN_ENVIRONMENT_NODE "If using emscripten, builds to run on Node instead of Web" OFF)
+option(ENABLE_GLSLANG_WEB "Reduces glslang to minimum needed for web use" OFF)
+option(ENABLE_GLSLANG_WEB_DEVEL "For ENABLE_GLSLANG_WEB builds, enables compilation error messages" OFF)
+option(ENABLE_EMSCRIPTEN_SINGLE_FILE "If using Emscripten, enables SINGLE_FILE build" OFF)
+option(ENABLE_EMSCRIPTEN_ENVIRONMENT_NODE "If using Emscripten, builds to run on Node instead of Web" OFF)
 
 CMAKE_DEPENDENT_OPTION(ENABLE_HLSL "Enables HLSL input support" ON "NOT ENABLE_GLSLANG_WEB" OFF)
 
 option(ENABLE_OPT "Enables spirv-opt capability if present" ON)
+option(ENABLE_PCH "Enables Precompiled header" ON)
 
 if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT AND WIN32)
     set(CMAKE_INSTALL_PREFIX "install" CACHE STRING "..." FORCE)
@@ -50,7 +56,7 @@
 
 # Precompiled header macro. Parameters are source file list and filename for pch cpp file.
 macro(glslang_pch SRCS PCHCPP)
-  if(MSVC AND CMAKE_GENERATOR MATCHES "^Visual Studio")
+  if(MSVC AND CMAKE_GENERATOR MATCHES "^Visual Studio" AND ENABLE_PCH)
     set(PCH_NAME "$(IntDir)\\pch.pch")
     # make source files use/depend on PCH_NAME
     set_source_files_properties(${${SRCS}} PROPERTIES COMPILE_FLAGS "/Yupch.h /FIpch.h /Fp${PCH_NAME} /Zm300" OBJECT_DEPENDS "${PCH_NAME}")
@@ -70,6 +76,9 @@
 
 if(ENABLE_GLSLANG_WEB)
     add_definitions(-DGLSLANG_WEB)
+    if(ENABLE_GLSLANG_WEB_DEVEL)
+        add_definitions(-DGLSLANG_WEB_DEVEL)
+    endif(ENABLE_GLSLANG_WEB_DEVEL)
 endif(ENABLE_GLSLANG_WEB)
 
 if(WIN32)
@@ -98,38 +107,29 @@
     add_compile_options(/GR-) # Disable RTTI
 endif()
 
-if(ENABLE_GLSLANG_WEB)
-    if(EMSCRIPTEN)
-        add_compile_options(-Os -fno-exceptions)
-        add_compile_options("SHELL: -s WASM=1")
-        add_compile_options("SHELL: -s WASM_OBJECT_FILES=0")
-        add_link_options(-Os)
-        add_link_options("SHELL: -s FILESYSTEM=0")
-        add_link_options("SHELL: --llvm-lto 1")
-        add_link_options("SHELL: --closure 1")
-        add_link_options("SHELL: -s ALLOW_MEMORY_GROWTH=1")
+if(EMSCRIPTEN)
+    add_compile_options(-Os -fno-exceptions)
+    add_compile_options("SHELL: -s WASM=1")
+    add_compile_options("SHELL: -s WASM_OBJECT_FILES=0")
+    add_link_options(-Os)
+    add_link_options("SHELL: -s FILESYSTEM=0")
+    add_link_options("SHELL: --llvm-lto 1")
+    add_link_options("SHELL: --closure 1")
+    add_link_options("SHELL: -s ALLOW_MEMORY_GROWTH=1")
 
-        add_link_options("SHELL: -s MODULARIZE=1")
-        if(ENABLE_EMSCRIPTEN_SINGLE_FILE)
-            add_link_options("SHELL: -s SINGLE_FILE=1")
-        endif(ENABLE_EMSCRIPTEN_SINGLE_FILE)
-
-        if(ENABLE_EMSCRIPTEN_ENVIRONMENT_NODE)
-            add_link_options("SHELL: -s ENVIRONMENT=node")
-            add_link_options("SHELL: -s BINARYEN_ASYNC_COMPILATION=0")
+    if(ENABLE_EMSCRIPTEN_SINGLE_FILE)
+        add_link_options("SHELL: -s SINGLE_FILE=1")
+    endif(ENABLE_EMSCRIPTEN_SINGLE_FILE)
+else()
+    if(ENABLE_GLSLANG_WEB)
+        if(MSVC)
+            add_compile_options(/Os /GR-)
         else()
-            add_link_options("SHELL: -s ENVIRONMENT=web,worker")
-            add_link_options("SHELL: -s EXPORT_ES6=1")
+            add_compile_options(-Os -fno-exceptions)
+            add_link_options(-Os)
         endif()
-    else()
-      if(MSVC)
-        add_compile_options(/Os /GR-)
-      else()
-        add_compile_options(-Os -fno-exceptions)
-        add_link_options(-Os)
-      endif()
-    endif(EMSCRIPTEN)
-endif(ENABLE_GLSLANG_WEB)
+    endif(ENABLE_GLSLANG_WEB)
+endif(EMSCRIPTEN)
 
 # Request C++11
 if(${CMAKE_VERSION} VERSION_LESS 3.1)
@@ -153,12 +153,12 @@
 
 # CMake needs to find the right version of python, right from the beginning,
 # otherwise, it will find the wrong version and fail later
-if(IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/External)
+if(BUILD_EXTERNAL AND IS_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/External)
     find_package(PythonInterp 3 REQUIRED)
-endif()
 
-# We depend on these for later projects, so they should come first.
-add_subdirectory(External)
+	# We depend on these for later projects, so they should come first.
+	add_subdirectory(External)
+endif()
 
 if(NOT TARGET SPIRV-Tools-opt)
     set(ENABLE_OPT OFF)
@@ -184,3 +184,26 @@
     add_subdirectory(hlsl)
 endif(ENABLE_HLSL)
 add_subdirectory(gtests)
+
+if(BUILD_TESTING)
+    # glslang-testsuite runs a bash script on Windows.
+    # Make sure to use '-o igncr' flag to ignore carriage returns (\r).
+    set(IGNORE_CR_FLAG "")
+    if(WIN32)
+        set(IGNORE_CR_FLAG -o igncr)
+    endif()
+
+    if (CMAKE_CONFIGURATION_TYPES)
+        set(RESULTS_PATH ${CMAKE_CURRENT_BINARY_DIR}/$<CONFIGURATION>/localResults)
+        set(VALIDATOR_PATH ${CMAKE_CURRENT_BINARY_DIR}/StandAlone/$<CONFIGURATION>/glslangValidator)
+        set(REMAP_PATH ${CMAKE_CURRENT_BINARY_DIR}/StandAlone/$<CONFIGURATION>/spirv-remap)
+    else(CMAKE_CONFIGURATION_TYPES)
+        set(RESULTS_PATH ${CMAKE_CURRENT_BINARY_DIR}/localResults)
+        set(VALIDATOR_PATH ${CMAKE_CURRENT_BINARY_DIR}/StandAlone/glslangValidator)
+        set(REMAP_PATH ${CMAKE_CURRENT_BINARY_DIR}/StandAlone/spirv-remap)
+    endif(CMAKE_CONFIGURATION_TYPES)
+
+    add_test(NAME glslang-testsuite
+        COMMAND bash ${IGNORE_CR_FLAG} runtests ${RESULTS_PATH} ${VALIDATOR_PATH} ${REMAP_PATH}
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/Test/)
+endif(BUILD_TESTING)
diff --git a/OGLCompilersDLL/CMakeLists.txt b/OGLCompilersDLL/CMakeLists.txt
index 5bb3f0e..af4ab58 100644
--- a/OGLCompilersDLL/CMakeLists.txt
+++ b/OGLCompilersDLL/CMakeLists.txt
@@ -9,6 +9,7 @@
 endif(WIN32)
 
 if(ENABLE_GLSLANG_INSTALL)
-    install(TARGETS OGLCompiler
+    install(TARGETS OGLCompiler EXPORT OGLCompilerTargets
             ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+	install(EXPORT OGLCompilerTargets DESTINATION lib/cmake)
 endif(ENABLE_GLSLANG_INSTALL)
diff --git a/README.md b/README.md
index 427cf5a..f5651e1 100755
--- a/README.md
+++ b/README.md
@@ -166,26 +166,30 @@
 With no arguments it builds the full grammar, and with a "web" argument,
 the web grammar subset (see more about the web subset in the next section).
 
-### WASM for the the Web
+### Building to WASM for the Web and Node
 
-Use the steps in [Build Steps](#build-steps), which following notes/exceptions:
+Use the steps in [Build Steps](#build-steps), with the following notes/exceptions:
 * For building the web subset of core glslang:
-  + `m4` also needs a `-DGLSLANG_WEB` argument, or simply execute `updateGrammar web` from the glslang subdirectory
-  + turn off the CMAKE options for `BUILD_TESTING`, `ENABLE_OPT`, and `INSTALL_GTEST`,
-    while turning on `ENABLE_GLSLANG_WEB`
+  + execute `updateGrammar web` from the glslang subdirectory
+    (or if using your own scripts, `m4` needs a `-DGLSLANG_WEB` argument)
+  + set `-DENABLE_HLSL=OFF -DBUILD_TESTING=OFF -DENABLE_OPT=OFF -DINSTALL_GTEST=OFF`
+  + turn on `-DENABLE_GLSLANG_WEB=ON`
+  + optionally, for GLSL compilation error messages, turn on `-DENABLE_GLSLANG_WEB_DEVEL=ON`
 * `emsdk` needs to be present in your executable search path, *PATH* for
   Bash-like enivironments
-  + Instructions located
-    [here](https://emscripten.org/docs/getting_started/downloads.html#sdk-download-and-install)
-* Do not checkout SPIRV-Tools into `External`
-  + Does not work correctly with emscripten out of the box and we don't want it
-    in the build anyway. *TBD* Have build ignore SPIRV-Tools for web build
-* Wrap call to `cmake` using `emconfigure` with ENABLE_GLSLANG_WEB=ON:
-  + e.g. For Linux, `emconfigure cmake -DCMAKE_BUILD_TYPE=Release
-    -DENABLE_GLSLANG_WEB=ON -DCMAKE_INSTALL_PREFIX="$(pwd)/install" ..`
-* To get a 'true' minimized build, make sure to use `brotli` to compress the .js
+  + [Instructions located
+    here](https://emscripten.org/docs/getting_started/downloads.html#sdk-download-and-install)
+* Wrap cmake call: `emcmake cmake`
+* To get a fully minimized build, make sure to use `brotli` to compress the .js
   and .wasm files
 
+Example:
+
+```sh
+emcmake cmake -DCMAKE_BUILD_TYPE=Release -DENABLE_GLSLANG_WEB=ON \
+    -DENABLE_HLSL=OFF -DBUILD_TESTING=OFF -DENABLE_OPT=OFF -DINSTALL_GTEST=OFF ..
+```
+
 Testing
 -------
 
@@ -303,7 +307,7 @@
 details. There is a block comment giving more detail above the calls for
 `setEnvInput, setEnvClient, and setEnvTarget`.
 
-### C Functional Interface (orignal)
+### C Functional Interface (original)
 
 This interface is in roughly the first 2/3 of `ShaderLang.h`, and referred to
 as the `Sh*()` interface, as all the entry points start `Sh`.
diff --git a/SPIRV/CMakeLists.txt b/SPIRV/CMakeLists.txt
index 94d2ebe..e25ec0a 100644
--- a/SPIRV/CMakeLists.txt
+++ b/SPIRV/CMakeLists.txt
@@ -36,7 +36,9 @@
 add_library(SPIRV ${LIB_TYPE} ${SOURCES} ${HEADERS})
 set_property(TARGET SPIRV PROPERTY FOLDER glslang)
 set_property(TARGET SPIRV PROPERTY POSITION_INDEPENDENT_CODE ON)
-target_include_directories(SPIRV PUBLIC ..)
+target_include_directories(SPIRV PUBLIC 
+	$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>
+	$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
 
 if (ENABLE_SPVREMAPPER)
     add_library(SPVRemapper ${LIB_TYPE} ${SPVREMAP_SOURCES} ${SPVREMAP_HEADERS})
@@ -57,7 +59,9 @@
         PRIVATE ${spirv-tools_SOURCE_DIR}/source
     )
     target_link_libraries(SPIRV glslang SPIRV-Tools-opt)
-    target_include_directories(SPIRV PUBLIC ../External)
+    target_include_directories(SPIRV PUBLIC
+		$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../External>
+		$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/External>)
 else()
     target_link_libraries(SPIRV glslang)
 endif(ENABLE_OPT)
@@ -70,22 +74,25 @@
 if(ENABLE_GLSLANG_INSTALL)
     if(BUILD_SHARED_LIBS)
         if (ENABLE_SPVREMAPPER)
-            install(TARGETS SPVRemapper
+            install(TARGETS SPVRemapper EXPORT SPVRemapperTargets
                     ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
                     LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
         endif()
-        install(TARGETS SPIRV
+        install(TARGETS SPIRV EXPORT SPIRVTargets
                 ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
                 LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
                 RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
     else()
         if (ENABLE_SPVREMAPPER)
-            install(TARGETS SPVRemapper
+            install(TARGETS SPVRemapper EXPORT SPVRemapperTargets
                     ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
         endif()
-        install(TARGETS SPIRV
+        install(TARGETS SPIRV EXPORT SPIRVTargets
                 ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
     endif()
+	
+	install(EXPORT SPVRemapperTargets DESTINATION lib/cmake)
+	install(EXPORT SPIRVTargets DESTINATION lib/cmake)
 
-    install(FILES ${HEADERS} ${SPVREMAP_HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/SPIRV/)
+    install(FILES ${HEADERS} ${SPVREMAP_HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/glslang/SPIRV/)
 endif(ENABLE_GLSLANG_INSTALL)
diff --git a/SPIRV/GLSL.ext.KHR.h b/SPIRV/GLSL.ext.KHR.h
index 1140bef..e58e836 100644
--- a/SPIRV/GLSL.ext.KHR.h
+++ b/SPIRV/GLSL.ext.KHR.h
@@ -41,6 +41,7 @@
 static const char* const E_SPV_KHR_post_depth_coverage          = "SPV_KHR_post_depth_coverage";
 static const char* const E_SPV_KHR_vulkan_memory_model          = "SPV_KHR_vulkan_memory_model";
 static const char* const E_SPV_EXT_physical_storage_buffer      = "SPV_EXT_physical_storage_buffer";
+static const char* const E_SPV_KHR_physical_storage_buffer      = "SPV_KHR_physical_storage_buffer";
 static const char* const E_SPV_EXT_fragment_shader_interlock    = "SPV_EXT_fragment_shader_interlock";
 static const char* const E_SPV_KHR_shader_clock                 = "SPV_KHR_shader_clock";
 
diff --git a/SPIRV/GlslangToSpv.cpp b/SPIRV/GlslangToSpv.cpp
index 2c9fcd8..ae5f24d 100644
--- a/SPIRV/GlslangToSpv.cpp
+++ b/SPIRV/GlslangToSpv.cpp
@@ -100,11 +100,11 @@
     spv::Decoration precision;
 
 #ifdef GLSLANG_WEB
-        void addNoContraction(spv::Builder&, spv::Id) const { };
-        void addNonUniform(spv::Builder&, spv::Id) const { };
+        void addNoContraction(spv::Builder&, spv::Id) const { }
+        void addNonUniform(spv::Builder&, spv::Id) const { }
 #else
-        void addNoContraction(spv::Builder& builder, spv::Id t) { builder.addDecoration(t, noContraction); };
-        void addNonUniform(spv::Builder& builder, spv::Id t)  { builder.addDecoration(t, nonUniform); };
+        void addNoContraction(spv::Builder& builder, spv::Id t) { builder.addDecoration(t, noContraction); }
+        void addNonUniform(spv::Builder& builder, spv::Id t)  { builder.addDecoration(t, nonUniform); }
     protected:
         spv::Decoration noContraction;
         spv::Decoration nonUniform;
@@ -217,11 +217,6 @@
     bool isTrivial(const glslang::TIntermTyped* node);
     spv::Id createShortCircuit(glslang::TOperator, glslang::TIntermTyped& left, glslang::TIntermTyped& right);
     spv::Id getExtBuiltins(const char* name);
-    void addPre13Extension(const char* ext)
-    {
-        if (builder.getSpvVersion() < glslang::EShTargetSpv_1_3)
-            builder.addExtension(ext);
-    }
     std::pair<spv::Id, spv::Id> getForcedType(spv::BuiltIn, const glslang::TType&);
     spv::Id translateForcedType(spv::Id object);
     spv::Id createCompositeConstruct(spv::Id typeId, std::vector<spv::Id> constituents);
@@ -517,7 +512,7 @@
 {
 #ifndef GLSLANG_WEB
     if (qualifier.isNonUniform()) {
-        builder.addExtension("SPV_EXT_descriptor_indexing");
+        builder.addIncorporatedExtension("SPV_EXT_descriptor_indexing", spv::Spv_1_5);
         builder.addCapability(spv::CapabilityShaderNonUniformEXT);
         return spv::DecorationNonUniformEXT;
     } else
@@ -701,7 +696,7 @@
             glslangIntermediate->getStage() == EShLangTessControl ||
             glslangIntermediate->getStage() == EShLangTessEvaluation) {
 
-            builder.addExtension(spv::E_SPV_EXT_shader_viewport_index_layer);
+            builder.addIncorporatedExtension(spv::E_SPV_EXT_shader_viewport_index_layer, spv::Spv_1_5);
             builder.addCapability(spv::CapabilityShaderViewportIndexLayerEXT);
         }
         return spv::BuiltInViewportIndex;
@@ -726,23 +721,23 @@
             glslangIntermediate->getStage() == EShLangTessControl ||
             glslangIntermediate->getStage() == EShLangTessEvaluation) {
 
-            builder.addExtension(spv::E_SPV_EXT_shader_viewport_index_layer);
+            builder.addIncorporatedExtension(spv::E_SPV_EXT_shader_viewport_index_layer, spv::Spv_1_5);
             builder.addCapability(spv::CapabilityShaderViewportIndexLayerEXT);
         }
         return spv::BuiltInLayer;
 
     case glslang::EbvBaseVertex:
-        addPre13Extension(spv::E_SPV_KHR_shader_draw_parameters);
+        builder.addIncorporatedExtension(spv::E_SPV_KHR_shader_draw_parameters, spv::Spv_1_3);
         builder.addCapability(spv::CapabilityDrawParameters);
         return spv::BuiltInBaseVertex;
 
     case glslang::EbvBaseInstance:
-        addPre13Extension(spv::E_SPV_KHR_shader_draw_parameters);
+        builder.addIncorporatedExtension(spv::E_SPV_KHR_shader_draw_parameters, spv::Spv_1_3);
         builder.addCapability(spv::CapabilityDrawParameters);
         return spv::BuiltInBaseInstance;
 
     case glslang::EbvDrawId:
-        addPre13Extension(spv::E_SPV_KHR_shader_draw_parameters);
+        builder.addIncorporatedExtension(spv::E_SPV_KHR_shader_draw_parameters, spv::Spv_1_3);
         builder.addCapability(spv::CapabilityDrawParameters);
         return spv::BuiltInDrawIndex;
 
@@ -874,12 +869,12 @@
         return spv::BuiltInBaryCoordPullModelAMD;
 
     case glslang::EbvDeviceIndex:
-        addPre13Extension(spv::E_SPV_KHR_device_group);
+        builder.addIncorporatedExtension(spv::E_SPV_KHR_device_group, spv::Spv_1_3);
         builder.addCapability(spv::CapabilityDeviceGroup);
         return spv::BuiltInDeviceIndex;
 
     case glslang::EbvViewIndex:
-        addPre13Extension(spv::E_SPV_KHR_multiview);
+        builder.addIncorporatedExtension(spv::E_SPV_KHR_multiview, spv::Spv_1_3);
         builder.addCapability(spv::CapabilityMultiView);
         return spv::BuiltInViewIndex;
 
@@ -1192,7 +1187,7 @@
     }
 
     if (glslangIntermediate->usingStorageBuffer() && type.getQualifier().storage == glslang::EvqBuffer) {
-        addPre13Extension(spv::E_SPV_KHR_storage_buffer_storage_class);
+        builder.addIncorporatedExtension(spv::E_SPV_KHR_storage_buffer_storage_class, spv::Spv_1_3);
         return spv::StorageClassStorageBuffer;
     }
 
@@ -1253,13 +1248,13 @@
         // assume a dynamically uniform index
         if (baseType.getBasicType() == glslang::EbtSampler) {
             if (baseType.getQualifier().hasAttachment()) {
-                builder.addExtension("SPV_EXT_descriptor_indexing");
+                builder.addIncorporatedExtension("SPV_EXT_descriptor_indexing", spv::Spv_1_5);
                 builder.addCapability(spv::CapabilityInputAttachmentArrayDynamicIndexingEXT);
             } else if (baseType.isImage() && baseType.getSampler().isBuffer()) {
-                builder.addExtension("SPV_EXT_descriptor_indexing");
+                builder.addIncorporatedExtension("SPV_EXT_descriptor_indexing", spv::Spv_1_5);
                 builder.addCapability(spv::CapabilityStorageTexelBufferArrayDynamicIndexingEXT);
             } else if (baseType.isTexture() && baseType.getSampler().isBuffer()) {
-                builder.addExtension("SPV_EXT_descriptor_indexing");
+                builder.addIncorporatedExtension("SPV_EXT_descriptor_indexing", spv::Spv_1_5);
                 builder.addCapability(spv::CapabilityUniformTexelBufferArrayDynamicIndexingEXT);
             }
         }
@@ -1404,13 +1399,13 @@
 
     if (glslangIntermediate->usingPhysicalStorageBuffer()) {
         addressingModel = spv::AddressingModelPhysicalStorageBuffer64EXT;
-        builder.addExtension(spv::E_SPV_EXT_physical_storage_buffer);
+        builder.addIncorporatedExtension(spv::E_SPV_EXT_physical_storage_buffer, spv::Spv_1_5);
         builder.addCapability(spv::CapabilityPhysicalStorageBufferAddressesEXT);
     };
     if (glslangIntermediate->usingVulkanMemoryModel()) {
         memoryModel = spv::MemoryModelVulkanKHR;
         builder.addCapability(spv::CapabilityVulkanMemoryModelKHR);
-        builder.addExtension(spv::E_SPV_KHR_vulkan_memory_model);
+        builder.addIncorporatedExtension(spv::E_SPV_KHR_vulkan_memory_model, spv::Spv_1_5);
     }
     builder.setMemoryModel(addressingModel, memoryModel);
 
@@ -3242,11 +3237,11 @@
         switch (storageClass) {
         case spv::StorageClassInput:
         case spv::StorageClassOutput:
-            addPre13Extension(spv::E_SPV_KHR_16bit_storage);
+            builder.addIncorporatedExtension(spv::E_SPV_KHR_16bit_storage, spv::Spv_1_3);
             builder.addCapability(spv::CapabilityStorageInputOutput16);
             break;
         case spv::StorageClassUniform:
-            addPre13Extension(spv::E_SPV_KHR_16bit_storage);
+            builder.addIncorporatedExtension(spv::E_SPV_KHR_16bit_storage, spv::Spv_1_3);
             if (node->getType().getQualifier().storage == glslang::EvqBuffer)
                 builder.addCapability(spv::CapabilityStorageUniformBufferBlock16);
             else
@@ -3254,12 +3249,12 @@
             break;
 #ifndef GLSLANG_WEB
         case spv::StorageClassPushConstant:
-            addPre13Extension(spv::E_SPV_KHR_16bit_storage);
+            builder.addIncorporatedExtension(spv::E_SPV_KHR_16bit_storage, spv::Spv_1_3);
             builder.addCapability(spv::CapabilityStoragePushConstant16);
             break;
         case spv::StorageClassStorageBuffer:
         case spv::StorageClassPhysicalStorageBufferEXT:
-            addPre13Extension(spv::E_SPV_KHR_16bit_storage);
+            builder.addIncorporatedExtension(spv::E_SPV_KHR_16bit_storage, spv::Spv_1_3);
             builder.addCapability(spv::CapabilityStorageUniformBufferBlock16);
             break;
 #endif
@@ -3274,13 +3269,13 @@
 
     if (node->getType().contains8BitInt()) {
         if (storageClass == spv::StorageClassPushConstant) {
-            builder.addExtension(spv::E_SPV_KHR_8bit_storage);
+            builder.addIncorporatedExtension(spv::E_SPV_KHR_8bit_storage, spv::Spv_1_5);
             builder.addCapability(spv::CapabilityStoragePushConstant8);
         } else if (storageClass == spv::StorageClassUniform) {
-            builder.addExtension(spv::E_SPV_KHR_8bit_storage);
+            builder.addIncorporatedExtension(spv::E_SPV_KHR_8bit_storage, spv::Spv_1_5);
             builder.addCapability(spv::CapabilityUniformAndStorageBuffer8BitAccess);
         } else if (storageClass == spv::StorageClassStorageBuffer) {
-            builder.addExtension(spv::E_SPV_KHR_8bit_storage);
+            builder.addIncorporatedExtension(spv::E_SPV_KHR_8bit_storage, spv::Spv_1_5);
             builder.addCapability(spv::CapabilityStorageBuffer8BitAccess);
         } else {
             builder.addCapability(spv::CapabilityInt8);
@@ -3537,7 +3532,7 @@
         else {
 #ifndef GLSLANG_WEB
             if (!lastBufferBlockMember) {
-                builder.addExtension("SPV_EXT_descriptor_indexing");
+                builder.addIncorporatedExtension("SPV_EXT_descriptor_indexing", spv::Spv_1_5);
                 builder.addCapability(spv::CapabilityRuntimeDescriptorArrayEXT);
             }
             spvType = builder.makeRuntimeArray(spvType);
@@ -6278,6 +6273,13 @@
     case glslang::EOpConvPtrToUint64:
         convOp = spv::OpConvertPtrToU;
         break;
+    case glslang::EOpConvPtrToUvec2:
+    case glslang::EOpConvUvec2ToPtr:
+        if (builder.isVector(operand))
+            builder.promoteIncorporatedExtension(spv::E_SPV_EXT_physical_storage_buffer,
+                                                 spv::E_SPV_KHR_physical_storage_buffer, spv::Spv_1_5);
+        convOp = spv::OpBitcast;
+        break;
 #endif
 
     default:
@@ -6399,7 +6401,7 @@
         scopeId = builder.makeUintConstant(spv::ScopeDevice);
     }
     // semantics default to relaxed 
-    spv::Id semanticsId = builder.makeUintConstant(lvalueCoherentFlags.isVolatile() ? 
+    spv::Id semanticsId = builder.makeUintConstant(lvalueCoherentFlags.isVolatile() && glslangIntermediate->usingVulkanMemoryModel() ?
                                                     spv::MemorySemanticsVolatileMask :
                                                     spv::MemorySemanticsMaskNone);
     spv::Id semanticsId2 = semanticsId;
@@ -6837,8 +6839,9 @@
     default: assert(0 && "Unhandled subgroup operation!");
     }
 
-    const bool isUnsigned = typeProxy == glslang::EbtUint || typeProxy == glslang::EbtUint64;
-    const bool isFloat = typeProxy == glslang::EbtFloat || typeProxy == glslang::EbtDouble;
+
+    const bool isUnsigned = isTypeUnsignedInt(typeProxy);
+    const bool isFloat = isTypeFloat(typeProxy);
     const bool isBool = typeProxy == glslang::EbtBool;
 
     spv::Op opCode = spv::OpNop;
diff --git a/SPIRV/SpvBuilder.h b/SPIRV/SpvBuilder.h
index a99a0c3..55754f6 100644
--- a/SPIRV/SpvBuilder.h
+++ b/SPIRV/SpvBuilder.h
@@ -67,6 +67,7 @@
     Spv_1_2 = (1 << 16) | (2 << 8),
     Spv_1_3 = (1 << 16) | (3 << 8),
     Spv_1_4 = (1 << 16) | (4 << 8),
+    Spv_1_5 = (1 << 16) | (5 << 8),
 } SpvVersion;
 
 class Builder {
@@ -105,6 +106,20 @@
     void addModuleProcessed(const std::string& p) { moduleProcesses.push_back(p.c_str()); }
     void setEmitOpLines() { emitOpLines = true; }
     void addExtension(const char* ext) { extensions.insert(ext); }
+    void removeExtension(const char* ext)
+    {
+        extensions.erase(ext);
+    }
+    void addIncorporatedExtension(const char* ext, SpvVersion incorporatedVersion)
+    {
+        if (getSpvVersion() < static_cast<unsigned>(incorporatedVersion))
+            addExtension(ext);
+    }
+    void promoteIncorporatedExtension(const char* baseExt, const char* promoExt, SpvVersion incorporatedVersion)
+    {
+        removeExtension(baseExt);
+        addIncorporatedExtension(promoExt, incorporatedVersion);
+    }
     void addInclude(const std::string& name, const std::string& text)
     {
         spv::Id incId = getStringId(name);
diff --git a/SPIRV/SpvPostProcess.cpp b/SPIRV/SpvPostProcess.cpp
index 18765a3..832ee3e 100644
--- a/SPIRV/SpvPostProcess.cpp
+++ b/SPIRV/SpvPostProcess.cpp
@@ -363,12 +363,12 @@
         Instruction* type = groupedTypes[OpTypePointer][t];
         if (type->getImmediateOperand(0) == (unsigned)StorageClassPhysicalStorageBufferEXT) {
             if (containsType(type->getIdOperand(1), OpTypeInt, 8)) {
-                addExtension(spv::E_SPV_KHR_8bit_storage);
+                addIncorporatedExtension(spv::E_SPV_KHR_8bit_storage, spv::Spv_1_5);
                 addCapability(spv::CapabilityStorageBuffer8BitAccess);
             }
             if (containsType(type->getIdOperand(1), OpTypeInt, 16) ||
                 containsType(type->getIdOperand(1), OpTypeFloat, 16)) {
-                addExtension(spv::E_SPV_KHR_16bit_storage);
+                addIncorporatedExtension(spv::E_SPV_KHR_16bit_storage, spv::Spv_1_3);
                 addCapability(spv::CapabilityStorageBuffer16BitAccess);
             }
         }
diff --git a/SPIRV/spirv.hpp b/SPIRV/spirv.hpp
index 59fdece..1e96f7b 100644
--- a/SPIRV/spirv.hpp
+++ b/SPIRV/spirv.hpp
@@ -91,6 +91,7 @@
     AddressingModelLogical = 0,
     AddressingModelPhysical32 = 1,
     AddressingModelPhysical64 = 2,
+    AddressingModelPhysicalStorageBuffer64 = 5348,
     AddressingModelPhysicalStorageBuffer64EXT = 5348,
     AddressingModelMax = 0x7fffffff,
 };
@@ -99,6 +100,7 @@
     MemoryModelSimple = 0,
     MemoryModelGLSL450 = 1,
     MemoryModelOpenCL = 2,
+    MemoryModelVulkan = 3,
     MemoryModelVulkanKHR = 3,
     MemoryModelMax = 0x7fffffff,
 };
@@ -183,6 +185,7 @@
     StorageClassHitAttributeNV = 5339,
     StorageClassIncomingRayPayloadNV = 5342,
     StorageClassShaderRecordBufferNV = 5343,
+    StorageClassPhysicalStorageBuffer = 5349,
     StorageClassPhysicalStorageBufferEXT = 5349,
     StorageClassMax = 0x7fffffff,
 };
@@ -311,9 +314,13 @@
     ImageOperandsConstOffsetsShift = 5,
     ImageOperandsSampleShift = 6,
     ImageOperandsMinLodShift = 7,
+    ImageOperandsMakeTexelAvailableShift = 8,
     ImageOperandsMakeTexelAvailableKHRShift = 8,
+    ImageOperandsMakeTexelVisibleShift = 9,
     ImageOperandsMakeTexelVisibleKHRShift = 9,
+    ImageOperandsNonPrivateTexelShift = 10,
     ImageOperandsNonPrivateTexelKHRShift = 10,
+    ImageOperandsVolatileTexelShift = 11,
     ImageOperandsVolatileTexelKHRShift = 11,
     ImageOperandsSignExtendShift = 12,
     ImageOperandsZeroExtendShift = 13,
@@ -330,9 +337,13 @@
     ImageOperandsConstOffsetsMask = 0x00000020,
     ImageOperandsSampleMask = 0x00000040,
     ImageOperandsMinLodMask = 0x00000080,
+    ImageOperandsMakeTexelAvailableMask = 0x00000100,
     ImageOperandsMakeTexelAvailableKHRMask = 0x00000100,
+    ImageOperandsMakeTexelVisibleMask = 0x00000200,
     ImageOperandsMakeTexelVisibleKHRMask = 0x00000200,
+    ImageOperandsNonPrivateTexelMask = 0x00000400,
     ImageOperandsNonPrivateTexelKHRMask = 0x00000400,
+    ImageOperandsVolatileTexelMask = 0x00000800,
     ImageOperandsVolatileTexelKHRMask = 0x00000800,
     ImageOperandsSignExtendMask = 0x00001000,
     ImageOperandsZeroExtendMask = 0x00002000,
@@ -448,8 +459,11 @@
     DecorationPerViewNV = 5272,
     DecorationPerTaskNV = 5273,
     DecorationPerVertexNV = 5285,
+    DecorationNonUniform = 5300,
     DecorationNonUniformEXT = 5300,
+    DecorationRestrictPointer = 5355,
     DecorationRestrictPointerEXT = 5355,
+    DecorationAliasedPointer = 5356,
     DecorationAliasedPointerEXT = 5356,
     DecorationCounterBuffer = 5634,
     DecorationHlslCounterBufferGOOGLE = 5634,
@@ -630,8 +644,11 @@
     MemorySemanticsCrossWorkgroupMemoryShift = 9,
     MemorySemanticsAtomicCounterMemoryShift = 10,
     MemorySemanticsImageMemoryShift = 11,
+    MemorySemanticsOutputMemoryShift = 12,
     MemorySemanticsOutputMemoryKHRShift = 12,
+    MemorySemanticsMakeAvailableShift = 13,
     MemorySemanticsMakeAvailableKHRShift = 13,
+    MemorySemanticsMakeVisibleShift = 14,
     MemorySemanticsMakeVisibleKHRShift = 14,
     MemorySemanticsVolatileShift = 15,
     MemorySemanticsMax = 0x7fffffff,
@@ -649,8 +666,11 @@
     MemorySemanticsCrossWorkgroupMemoryMask = 0x00000200,
     MemorySemanticsAtomicCounterMemoryMask = 0x00000400,
     MemorySemanticsImageMemoryMask = 0x00000800,
+    MemorySemanticsOutputMemoryMask = 0x00001000,
     MemorySemanticsOutputMemoryKHRMask = 0x00001000,
+    MemorySemanticsMakeAvailableMask = 0x00002000,
     MemorySemanticsMakeAvailableKHRMask = 0x00002000,
+    MemorySemanticsMakeVisibleMask = 0x00004000,
     MemorySemanticsMakeVisibleKHRMask = 0x00004000,
     MemorySemanticsVolatileMask = 0x00008000,
 };
@@ -659,8 +679,11 @@
     MemoryAccessVolatileShift = 0,
     MemoryAccessAlignedShift = 1,
     MemoryAccessNontemporalShift = 2,
+    MemoryAccessMakePointerAvailableShift = 3,
     MemoryAccessMakePointerAvailableKHRShift = 3,
+    MemoryAccessMakePointerVisibleShift = 4,
     MemoryAccessMakePointerVisibleKHRShift = 4,
+    MemoryAccessNonPrivatePointerShift = 5,
     MemoryAccessNonPrivatePointerKHRShift = 5,
     MemoryAccessMax = 0x7fffffff,
 };
@@ -670,8 +693,11 @@
     MemoryAccessVolatileMask = 0x00000001,
     MemoryAccessAlignedMask = 0x00000002,
     MemoryAccessNontemporalMask = 0x00000004,
+    MemoryAccessMakePointerAvailableMask = 0x00000008,
     MemoryAccessMakePointerAvailableKHRMask = 0x00000008,
+    MemoryAccessMakePointerVisibleMask = 0x00000010,
     MemoryAccessMakePointerVisibleKHRMask = 0x00000010,
+    MemoryAccessNonPrivatePointerMask = 0x00000020,
     MemoryAccessNonPrivatePointerKHRMask = 0x00000020,
 };
 
@@ -681,6 +707,7 @@
     ScopeWorkgroup = 2,
     ScopeSubgroup = 3,
     ScopeInvocation = 4,
+    ScopeQueueFamily = 5,
     ScopeQueueFamilyKHR = 5,
     ScopeMax = 0x7fffffff,
 };
@@ -781,6 +808,8 @@
     CapabilityGroupNonUniformShuffleRelative = 66,
     CapabilityGroupNonUniformClustered = 67,
     CapabilityGroupNonUniformQuad = 68,
+    CapabilityShaderLayer = 69,
+    CapabilityShaderViewportIndex = 70,
     CapabilitySubgroupBallotKHR = 4423,
     CapabilityDrawParameters = 4427,
     CapabilitySubgroupVoteKHR = 4431,
@@ -825,21 +854,36 @@
     CapabilityFragmentDensityEXT = 5291,
     CapabilityShadingRateNV = 5291,
     CapabilityGroupNonUniformPartitionedNV = 5297,
+    CapabilityShaderNonUniform = 5301,
     CapabilityShaderNonUniformEXT = 5301,
+    CapabilityRuntimeDescriptorArray = 5302,
     CapabilityRuntimeDescriptorArrayEXT = 5302,
+    CapabilityInputAttachmentArrayDynamicIndexing = 5303,
     CapabilityInputAttachmentArrayDynamicIndexingEXT = 5303,
+    CapabilityUniformTexelBufferArrayDynamicIndexing = 5304,
     CapabilityUniformTexelBufferArrayDynamicIndexingEXT = 5304,
+    CapabilityStorageTexelBufferArrayDynamicIndexing = 5305,
     CapabilityStorageTexelBufferArrayDynamicIndexingEXT = 5305,
+    CapabilityUniformBufferArrayNonUniformIndexing = 5306,
     CapabilityUniformBufferArrayNonUniformIndexingEXT = 5306,
+    CapabilitySampledImageArrayNonUniformIndexing = 5307,
     CapabilitySampledImageArrayNonUniformIndexingEXT = 5307,
+    CapabilityStorageBufferArrayNonUniformIndexing = 5308,
     CapabilityStorageBufferArrayNonUniformIndexingEXT = 5308,
+    CapabilityStorageImageArrayNonUniformIndexing = 5309,
     CapabilityStorageImageArrayNonUniformIndexingEXT = 5309,
+    CapabilityInputAttachmentArrayNonUniformIndexing = 5310,
     CapabilityInputAttachmentArrayNonUniformIndexingEXT = 5310,
+    CapabilityUniformTexelBufferArrayNonUniformIndexing = 5311,
     CapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311,
+    CapabilityStorageTexelBufferArrayNonUniformIndexing = 5312,
     CapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312,
     CapabilityRayTracingNV = 5340,
+    CapabilityVulkanMemoryModel = 5345,
     CapabilityVulkanMemoryModelKHR = 5345,
+    CapabilityVulkanMemoryModelDeviceScope = 5346,
     CapabilityVulkanMemoryModelDeviceScopeKHR = 5346,
+    CapabilityPhysicalStorageBufferAddresses = 5347,
     CapabilityPhysicalStorageBufferAddressesEXT = 5347,
     CapabilityComputeDerivativeGroupLinearNV = 5350,
     CapabilityCooperativeMatrixNV = 5357,
diff --git a/StandAlone/CMakeLists.txt b/StandAlone/CMakeLists.txt
index 5cea53d..0878965 100644
--- a/StandAlone/CMakeLists.txt
+++ b/StandAlone/CMakeLists.txt
@@ -4,8 +4,9 @@
 set_property(TARGET glslang-default-resource-limits PROPERTY POSITION_INDEPENDENT_CODE ON)
 
 target_include_directories(glslang-default-resource-limits
-                           PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}
-                           PUBLIC ${PROJECT_SOURCE_DIR})
+                           PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+                           PUBLIC $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}>)
+
 
 set(SOURCES StandAlone.cpp DirStackFileIncluder.h)
 set(REMAPPER_SOURCES spirv-remap.cpp)
@@ -33,21 +34,27 @@
 
 target_link_libraries(glslangValidator ${LIBRARIES})
 target_link_libraries(spirv-remap ${LIBRARIES})
-target_include_directories(glslangValidator PUBLIC ../External)
+target_include_directories(glslangValidator PUBLIC 
+	$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../External>
+	$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/External>)
 
 if(WIN32)
     source_group("Source" FILES ${SOURCES})
 endif(WIN32)
 
 if(ENABLE_GLSLANG_INSTALL)
-    install(TARGETS glslangValidator
+    install(TARGETS glslangValidator EXPORT glslangValidatorTargets
             RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 
-    install(TARGETS spirv-remap
+    install(TARGETS spirv-remap EXPORT spirv-remapTargets
             RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+	
+	install(EXPORT glslangValidatorTargets DESTINATION lib/cmake)
+	install(EXPORT spirv-remapTargets DESTINATION lib/cmake)
             
     if(BUILD_SHARED_LIBS)
-        install(TARGETS glslang-default-resource-limits
+        install(TARGETS glslang-default-resource-limits EXPORT glslang-default-resource-limitsTargets
                 LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR})
+		install(EXPORT glslang-default-resource-limitsTargets DESTINATION lib/cmake)
     endif()
 endif(ENABLE_GLSLANG_INSTALL)
diff --git a/StandAlone/StandAlone.cpp b/StandAlone/StandAlone.cpp
index 9fa311b..4be3a4f 100644
--- a/StandAlone/StandAlone.cpp
+++ b/StandAlone/StandAlone.cpp
@@ -615,8 +615,12 @@
                             } else if (strcmp(argv[1], "spirv1.4") == 0) {
                                 TargetLanguage = glslang::EShTargetSpv;
                                 TargetVersion = glslang::EShTargetSpv_1_4;
+                            } else if (strcmp(argv[1], "spirv1.5") == 0) {
+                                TargetLanguage = glslang::EShTargetSpv;
+                                TargetVersion = glslang::EShTargetSpv_1_5;
                             } else
-                                Error("--target-env expected one of: vulkan1.0, vulkan1.1, opengl, spirv1.0, spirv1.1, spirv1.2, or spirv1.3");
+                                Error("--target-env expected one of: vulkan1.0, vulkan1.1, opengl,\n"
+                                      "spirv1.0, spirv1.1, spirv1.2, spirv1.3, spirv1.4, or spirv1.5");
                         }
                         bumpArg();
                     } else if (lowerword == "variable-name" || // synonyms
@@ -1618,7 +1622,7 @@
            "  --stdin                           read from stdin instead of from a file;\n"
            "                                    requires providing the shader stage using -S\n"
            "  --target-env {vulkan1.0 | vulkan1.1 | opengl | \n"
-           "                spirv1.0 | spirv1.1 | spirv1.2 | spirv1.3}\n"
+           "                spirv1.0 | spirv1.1 | spirv1.2 | spirv1.3 | spirv1.4 | spirv1.5}\n"
            "                                    set execution environment that emitted code\n"
            "                                    will execute in (versus source language\n"
            "                                    semantics selected by --client) defaults:\n"
diff --git a/Test/130.frag b/Test/130.frag
index 3e39411..8a5bfd5 100644
--- a/Test/130.frag
+++ b/Test/130.frag
@@ -167,3 +167,12 @@
 }
 
 layout(early_fragment_tests) out;         // ERROR
+
+#extension GL_ARB_explicit_uniform_location : enable
+
+layout(location = 3) uniform vec4 ucolor0; // ERROR: explicit attrib location is also required for version < 330
+
+#extension GL_ARB_explicit_attrib_location : enable
+
+layout(location = 4) uniform vec4 ucolor1;
+
diff --git a/Test/330.frag b/Test/330.frag
index 9afa8f8..b37d8de 100644
--- a/Test/330.frag
+++ b/Test/330.frag
@@ -149,4 +149,17 @@
     KeyMem.precise;
 }
 
-layout(location=28, index=2) out vec4 outIndex2; // ERROR index out of range
\ No newline at end of file
+layout(location=28, index=2) out vec4 outIndex2; // ERROR index out of range
+
+layout(location=4) uniform vec4 ucolor0; // ERROR: extension is not enabled
+
+#extension GL_ARB_explicit_uniform_location : enable
+
+layout(location=5) uniform vec4 ucolor1;
+
+layout(location=6) uniform ColorsBuffer // ERROR: location cannot be applied in uniform buffer block
+{
+    vec4 colors[128];
+} colorsBuffer;
+
+
diff --git a/Test/430.comp b/Test/430.comp
index 0929432..178b994 100644
--- a/Test/430.comp
+++ b/Test/430.comp
@@ -48,6 +48,9 @@
 layout(location = 2) shared vec4 sl;  // ERROR
 shared float fs = 4.2;                // ERROR
 
+layout(local_size_y = 1) in;
+layout(local_size_y = 2) in;     // ERROR, changing
+layout(local_size_y = 1) in;
 layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) out;  // ERROR
 
 int arrX[gl_WorkGroupSize.x];
diff --git a/Test/baseResults/130.frag.out b/Test/baseResults/130.frag.out
index 81d055b..0af9ab9 100644
--- a/Test/baseResults/130.frag.out
+++ b/Test/baseResults/130.frag.out
@@ -29,10 +29,13 @@
 ERROR: 0:154: 'image load store' : not supported for this version or the enabled extensions 
 ERROR: 0:154: 'iimage2D' : Reserved word. 
 ERROR: 0:169: 'early_fragment_tests' : can only apply to 'in' 
-ERROR: 28 compilation errors.  No code generated.
+ERROR: 0:173: 'location qualifier on uniform or buffer' : not supported for this version or the enabled extensions 
+ERROR: 29 compilation errors.  No code generated.
 
 
 Shader version: 130
+Requested GL_ARB_explicit_attrib_location
+Requested GL_ARB_explicit_uniform_location
 Requested GL_ARB_gpu_shader5
 Requested GL_ARB_separate_shader_objects
 Requested GL_ARB_shader_image_load_store
@@ -402,12 +405,16 @@
 0:?     'gl_FogFragCoord' ( smooth in float)
 0:?     'iimg2Dbad' (layout( r32i) uniform iimage2D)
 0:?     'iimg2D' (layout( r32i) uniform iimage2D)
+0:?     'ucolor0' (layout( location=3) uniform 4-component vector of float)
+0:?     'ucolor1' (layout( location=4) uniform 4-component vector of float)
 
 
 Linked fragment stage:
 
 
 Shader version: 130
+Requested GL_ARB_explicit_attrib_location
+Requested GL_ARB_explicit_uniform_location
 Requested GL_ARB_gpu_shader5
 Requested GL_ARB_separate_shader_objects
 Requested GL_ARB_shader_image_load_store
@@ -457,4 +464,6 @@
 0:?     'gl_FogFragCoord' ( smooth in float)
 0:?     'iimg2Dbad' (layout( r32i) uniform iimage2D)
 0:?     'iimg2D' (layout( r32i) uniform iimage2D)
+0:?     'ucolor0' (layout( location=3) uniform 4-component vector of float)
+0:?     'ucolor1' (layout( location=4) uniform 4-component vector of float)
 
diff --git a/Test/baseResults/330.frag.out b/Test/baseResults/330.frag.out
index 36ba7a2..bb2770f 100644
--- a/Test/baseResults/330.frag.out
+++ b/Test/baseResults/330.frag.out
@@ -40,11 +40,14 @@
 ERROR: 0:141: 'textureQueryLod' : no matching overloaded function found 
 ERROR: 0:141: 'assign' :  cannot convert from ' const float' to ' temp 2-component vector of float'
 ERROR: 0:152: 'index' : value must be 0 or 1 
-ERROR: 41 compilation errors.  No code generated.
+ERROR: 0:154: 'location qualifier on uniform or buffer' : not supported for this version or the enabled extensions 
+ERROR: 0:160: 'location' : cannot apply to uniform or buffer block 
+ERROR: 43 compilation errors.  No code generated.
 
 
 Shader version: 330
 Requested GL_ARB_enhanced_layouts
+Requested GL_ARB_explicit_uniform_location
 Requested GL_ARB_separate_shader_objects
 ERROR: node is still EOpNull!
 0:8  Function Definition: main( ( global void)
@@ -126,6 +129,9 @@
 0:?     'precise' ( global int)
 0:?     'KeyMem' ( global structure{ global int precise})
 0:?     'outIndex2' (layout( location=28 index=0) out 4-component vector of float)
+0:?     'ucolor0' (layout( location=4) uniform 4-component vector of float)
+0:?     'ucolor1' (layout( location=5) uniform 4-component vector of float)
+0:?     'colorsBuffer' (layout( location=6 column_major shared) uniform block{layout( column_major shared) uniform 128-element array of 4-component vector of float colors})
 
 
 Linked fragment stage:
@@ -135,6 +141,7 @@
 
 Shader version: 330
 Requested GL_ARB_enhanced_layouts
+Requested GL_ARB_explicit_uniform_location
 Requested GL_ARB_separate_shader_objects
 ERROR: node is still EOpNull!
 0:8  Function Definition: main( ( global void)
@@ -191,4 +198,7 @@
 0:?     'precise' ( global int)
 0:?     'KeyMem' ( global structure{ global int precise})
 0:?     'outIndex2' (layout( location=28 index=0) out 4-component vector of float)
+0:?     'ucolor0' (layout( location=4) uniform 4-component vector of float)
+0:?     'ucolor1' (layout( location=5) uniform 4-component vector of float)
+0:?     'colorsBuffer' (layout( location=6 column_major shared) uniform block{layout( column_major shared) uniform 128-element array of 4-component vector of float colors})
 
diff --git a/Test/baseResults/430.comp.out b/Test/baseResults/430.comp.out
index ef8d19e..55c8238 100644
--- a/Test/baseResults/430.comp.out
+++ b/Test/baseResults/430.comp.out
@@ -8,14 +8,15 @@
 ERROR: 0:48: 'shared' : cannot apply layout qualifiers to a shared variable 
 ERROR: 0:48: 'location' : can only apply to uniform, buffer, in, or out storage qualifiers 
 ERROR: 0:49: 'shared' :  cannot initialize this type of qualifier  
-ERROR: 0:51: 'local_size' : can only apply to 'in' 
-ERROR: 0:51: 'local_size' : can only apply to 'in' 
-ERROR: 0:51: 'local_size' : can only apply to 'in' 
-ERROR: 0:65: 'assign' :  l-value required "ro" (can't modify a readonly buffer)
-ERROR: 0:77: '=' :  cannot convert from ' temp double' to ' temp int'
-ERROR: 0:81: 'input block' : not supported in this stage: compute
-ERROR: 0:85: 'output block' : not supported in this stage: compute
-ERROR: 16 compilation errors.  No code generated.
+ERROR: 0:52: 'local_size' : cannot change previously set size 
+ERROR: 0:54: 'local_size' : can only apply to 'in' 
+ERROR: 0:54: 'local_size' : can only apply to 'in' 
+ERROR: 0:54: 'local_size' : can only apply to 'in' 
+ERROR: 0:68: 'assign' :  l-value required "ro" (can't modify a readonly buffer)
+ERROR: 0:80: '=' :  cannot convert from ' temp double' to ' temp int'
+ERROR: 0:84: 'input block' : not supported in this stage: compute
+ERROR: 0:88: 'output block' : not supported in this stage: compute
+ERROR: 17 compilation errors.  No code generated.
 
 
 Shader version: 430
@@ -51,77 +52,77 @@
 0:39            10 (const int)
 0:39        true case
 0:40        Barrier ( global void)
-0:63  Function Definition: foo( ( global void)
-0:63    Function Parameters: 
-0:65    Sequence
-0:65      move second child to first child ( temp float)
-0:65        direct index (layout( column_major shared) readonly temp float)
-0:65          values: direct index for structure (layout( column_major shared) readonly buffer unsized 3-element array of float)
-0:65            'ro' (layout( column_major shared) readonly buffer block{layout( column_major shared) readonly buffer int value, layout( column_major shared) readonly buffer unsized 3-element array of float values})
-0:65            Constant:
-0:65              1 (const int)
-0:65          Constant:
-0:65            2 (const int)
-0:65        Constant:
-0:65          4.700000
-0:66      array length ( temp int)
-0:66        values: direct index for structure (layout( column_major shared) readonly buffer unsized 3-element array of float)
-0:66          'ro' (layout( column_major shared) readonly buffer block{layout( column_major shared) readonly buffer int value, layout( column_major shared) readonly buffer unsized 3-element array of float values})
-0:66          Constant:
-0:66            1 (const int)
-0:67      Barrier ( global void)
-0:72  Function Definition: fooaoeu( ( global void)
-0:72    Function Parameters: 
-0:73    Sequence
-0:73      Sequence
-0:73        move second child to first child ( temp 2-component vector of int)
-0:73          'storePos' ( temp 2-component vector of int)
-0:73          Convert uint to int ( temp 2-component vector of int)
-0:73            vector swizzle ( temp 2-component vector of uint)
-0:73              'gl_GlobalInvocationID' ( in 3-component vector of uint GlobalInvocationID)
-0:73              Sequence
-0:73                Constant:
-0:73                  0 (const int)
-0:73                Constant:
-0:73                  1 (const int)
-0:74      Sequence
-0:74        move second child to first child ( temp double)
-0:74          'localCoef' ( temp double)
-0:74          Convert float to double ( temp double)
-0:74            length ( global float)
-0:74              divide ( temp 2-component vector of float)
-0:74                Convert int to float ( temp 2-component vector of float)
-0:74                  subtract ( temp 2-component vector of int)
-0:74                    Convert uint to int ( temp 2-component vector of int)
-0:74                      vector swizzle ( temp 2-component vector of uint)
-0:74                        'gl_LocalInvocationID' ( in 3-component vector of uint LocalInvocationID)
-0:74                        Sequence
-0:74                          Constant:
-0:74                            0 (const int)
-0:74                          Constant:
-0:74                            1 (const int)
-0:74                    Constant:
-0:74                      8 (const int)
-0:74                Constant:
-0:74                  8.000000
-0:75      Sequence
-0:75        move second child to first child ( temp 4-component vector of double)
-0:75          'aa' ( temp 4-component vector of double)
-0:75          Constant:
-0:75            0.400000
-0:75            0.200000
-0:75            0.300000
-0:75            0.400000
+0:66  Function Definition: foo( ( global void)
+0:66    Function Parameters: 
+0:68    Sequence
+0:68      move second child to first child ( temp float)
+0:68        direct index (layout( column_major shared) readonly temp float)
+0:68          values: direct index for structure (layout( column_major shared) readonly buffer unsized 3-element array of float)
+0:68            'ro' (layout( column_major shared) readonly buffer block{layout( column_major shared) readonly buffer int value, layout( column_major shared) readonly buffer unsized 3-element array of float values})
+0:68            Constant:
+0:68              1 (const int)
+0:68          Constant:
+0:68            2 (const int)
+0:68        Constant:
+0:68          4.700000
+0:69      array length ( temp int)
+0:69        values: direct index for structure (layout( column_major shared) readonly buffer unsized 3-element array of float)
+0:69          'ro' (layout( column_major shared) readonly buffer block{layout( column_major shared) readonly buffer int value, layout( column_major shared) readonly buffer unsized 3-element array of float values})
+0:69          Constant:
+0:69            1 (const int)
+0:70      Barrier ( global void)
+0:75  Function Definition: fooaoeu( ( global void)
+0:75    Function Parameters: 
+0:76    Sequence
 0:76      Sequence
-0:76        move second child to first child ( temp double)
-0:76          'globalCoef' ( temp double)
-0:76          Constant:
-0:76            1.000000
+0:76        move second child to first child ( temp 2-component vector of int)
+0:76          'storePos' ( temp 2-component vector of int)
+0:76          Convert uint to int ( temp 2-component vector of int)
+0:76            vector swizzle ( temp 2-component vector of uint)
+0:76              'gl_GlobalInvocationID' ( in 3-component vector of uint GlobalInvocationID)
+0:76              Sequence
+0:76                Constant:
+0:76                  0 (const int)
+0:76                Constant:
+0:76                  1 (const int)
+0:77      Sequence
+0:77        move second child to first child ( temp double)
+0:77          'localCoef' ( temp double)
+0:77          Convert float to double ( temp double)
+0:77            length ( global float)
+0:77              divide ( temp 2-component vector of float)
+0:77                Convert int to float ( temp 2-component vector of float)
+0:77                  subtract ( temp 2-component vector of int)
+0:77                    Convert uint to int ( temp 2-component vector of int)
+0:77                      vector swizzle ( temp 2-component vector of uint)
+0:77                        'gl_LocalInvocationID' ( in 3-component vector of uint LocalInvocationID)
+0:77                        Sequence
+0:77                          Constant:
+0:77                            0 (const int)
+0:77                          Constant:
+0:77                            1 (const int)
+0:77                    Constant:
+0:77                      8 (const int)
+0:77                Constant:
+0:77                  8.000000
 0:78      Sequence
-0:78        move second child to first child ( temp double)
-0:78          'di' ( temp double)
-0:78          Convert int to double ( temp double)
-0:78            'i' ( temp int)
+0:78        move second child to first child ( temp 4-component vector of double)
+0:78          'aa' ( temp 4-component vector of double)
+0:78          Constant:
+0:78            0.400000
+0:78            0.200000
+0:78            0.300000
+0:78            0.400000
+0:79      Sequence
+0:79        move second child to first child ( temp double)
+0:79          'globalCoef' ( temp double)
+0:79          Constant:
+0:79            1.000000
+0:81      Sequence
+0:81        move second child to first child ( temp double)
+0:81          'di' ( temp double)
+0:81          Convert int to double ( temp double)
+0:81            'i' ( temp int)
 0:?   Linker Objects
 0:?     'gl_WorkGroupSize' ( const 3-component vector of uint WorkGroupSize)
 0:?       2 (const uint)
diff --git a/Test/baseResults/glsl.450.subgroup.frag.out b/Test/baseResults/glsl.450.subgroup.frag.out
index e61523e..817abb2 100644
--- a/Test/baseResults/glsl.450.subgroup.frag.out
+++ b/Test/baseResults/glsl.450.subgroup.frag.out
@@ -85,11 +85,13 @@
 ERROR: 0:96: 'subgroupPartitionedExclusiveAndNV' : required extension not requested: GL_NV_shader_subgroup_partitioned
 ERROR: 0:97: 'subgroupPartitionedExclusiveOrNV' : required extension not requested: GL_NV_shader_subgroup_partitioned
 ERROR: 0:98: 'subgroupPartitionedExclusiveXorNV' : required extension not requested: GL_NV_shader_subgroup_partitioned
-ERROR: 0:232: 'gl_WarpsPerSMNV' : required extension not requested: GL_NV_shader_sm_builtins
-ERROR: 0:233: 'gl_SMCountNV' : required extension not requested: GL_NV_shader_sm_builtins
-ERROR: 0:234: 'gl_WarpIDNV' : required extension not requested: GL_NV_shader_sm_builtins
-ERROR: 0:235: 'gl_SMIDNV' : required extension not requested: GL_NV_shader_sm_builtins
-ERROR: 90 compilation errors.  No code generated.
+ERROR: 0:124: 'id' : argument must be compile-time constant 
+ERROR: 0:199: 'id' : argument must be compile-time constant 
+ERROR: 0:236: 'gl_WarpsPerSMNV' : required extension not requested: GL_NV_shader_sm_builtins
+ERROR: 0:237: 'gl_SMCountNV' : required extension not requested: GL_NV_shader_sm_builtins
+ERROR: 0:238: 'gl_WarpIDNV' : required extension not requested: GL_NV_shader_sm_builtins
+ERROR: 0:239: 'gl_SMIDNV' : required extension not requested: GL_NV_shader_sm_builtins
+ERROR: 92 compilation errors.  No code generated.
 
 
 Shader version: 450
@@ -352,270 +354,278 @@
 0:116  Function Definition: ballot_works(vf4; ( global void)
 0:116    Function Parameters: 
 0:116      'f4' ( in 4-component vector of float)
-0:117    Sequence
-0:117      'gl_SubgroupEqMask' ( flat in 4-component vector of uint SubgroupEqMask)
-0:118      'gl_SubgroupGeMask' ( flat in 4-component vector of uint SubgroupGeMask)
-0:119      'gl_SubgroupGtMask' ( flat in 4-component vector of uint SubgroupGtMask)
-0:120      'gl_SubgroupLeMask' ( flat in 4-component vector of uint SubgroupLeMask)
-0:121      'gl_SubgroupLtMask' ( flat in 4-component vector of uint SubgroupLtMask)
-0:122      subgroupBroadcast ( global 4-component vector of float)
-0:122        'f4' ( in 4-component vector of float)
-0:122        Constant:
-0:122          0 (const uint)
-0:123      subgroupBroadcastFirst ( global 4-component vector of float)
+0:?     Sequence
+0:118      'gl_SubgroupEqMask' ( flat in 4-component vector of uint SubgroupEqMask)
+0:119      'gl_SubgroupGeMask' ( flat in 4-component vector of uint SubgroupGeMask)
+0:120      'gl_SubgroupGtMask' ( flat in 4-component vector of uint SubgroupGtMask)
+0:121      'gl_SubgroupLeMask' ( flat in 4-component vector of uint SubgroupLeMask)
+0:122      'gl_SubgroupLtMask' ( flat in 4-component vector of uint SubgroupLtMask)
+0:123      subgroupBroadcast ( global 4-component vector of float)
 0:123        'f4' ( in 4-component vector of float)
-0:124      Sequence
-0:124        move second child to first child ( temp 4-component vector of uint)
-0:124          'ballot' ( temp 4-component vector of uint)
-0:124          subgroupBallot ( global 4-component vector of uint)
-0:124            Constant:
-0:124              false (const bool)
-0:125      subgroupInverseBallot ( global bool)
-0:125        Constant:
-0:125          1 (const uint)
-0:125          1 (const uint)
-0:125          1 (const uint)
-0:125          1 (const uint)
-0:126      subgroupBallotBitExtract ( global bool)
-0:126        'ballot' ( temp 4-component vector of uint)
-0:126        Constant:
-0:126          0 (const uint)
-0:127      subgroupBallotBitCount ( global uint)
-0:127        'ballot' ( temp 4-component vector of uint)
-0:128      subgroupBallotInclusiveBitCount ( global uint)
+0:123        Constant:
+0:123          0 (const uint)
+0:124      subgroupBroadcast ( global 4-component vector of float)
+0:124        'f4' ( in 4-component vector of float)
+0:124        Convert int to uint ( temp uint)
+0:124          'i' ( temp int)
+0:125      subgroupBroadcastFirst ( global 4-component vector of float)
+0:125        'f4' ( in 4-component vector of float)
+0:126      Sequence
+0:126        move second child to first child ( temp 4-component vector of uint)
+0:126          'ballot' ( temp 4-component vector of uint)
+0:126          subgroupBallot ( global 4-component vector of uint)
+0:126            Constant:
+0:126              false (const bool)
+0:127      subgroupInverseBallot ( global bool)
+0:127        Constant:
+0:127          1 (const uint)
+0:127          1 (const uint)
+0:127          1 (const uint)
+0:127          1 (const uint)
+0:128      subgroupBallotBitExtract ( global bool)
 0:128        'ballot' ( temp 4-component vector of uint)
-0:129      subgroupBallotExclusiveBitCount ( global uint)
+0:128        Constant:
+0:128          0 (const uint)
+0:129      subgroupBallotBitCount ( global uint)
 0:129        'ballot' ( temp 4-component vector of uint)
-0:130      subgroupBallotFindLSB ( global uint)
+0:130      subgroupBallotInclusiveBitCount ( global uint)
 0:130        'ballot' ( temp 4-component vector of uint)
-0:131      subgroupBallotFindMSB ( global uint)
+0:131      subgroupBallotExclusiveBitCount ( global uint)
 0:131        'ballot' ( temp 4-component vector of uint)
-0:135  Function Definition: vote_works(vf4; ( global void)
-0:135    Function Parameters: 
-0:135      'f4' ( in 4-component vector of float)
-0:137    Sequence
-0:137      subgroupAll ( global bool)
-0:137        Constant:
-0:137          true (const bool)
-0:138      subgroupAny ( global bool)
-0:138        Constant:
-0:138          false (const bool)
-0:139      subgroupAllEqual ( global bool)
-0:139        'f4' ( in 4-component vector of float)
-0:144  Function Definition: shuffle_works(vf4; ( global void)
-0:144    Function Parameters: 
-0:144      'f4' ( in 4-component vector of float)
-0:146    Sequence
-0:146      subgroupShuffle ( global 4-component vector of float)
-0:146        'f4' ( in 4-component vector of float)
-0:146        Constant:
-0:146          0 (const uint)
-0:147      subgroupShuffleXor ( global 4-component vector of float)
-0:147        'f4' ( in 4-component vector of float)
-0:147        Constant:
-0:147          1 (const uint)
-0:148      subgroupShuffleUp ( global 4-component vector of float)
+0:132      subgroupBallotFindLSB ( global uint)
+0:132        'ballot' ( temp 4-component vector of uint)
+0:133      subgroupBallotFindMSB ( global uint)
+0:133        'ballot' ( temp 4-component vector of uint)
+0:137  Function Definition: vote_works(vf4; ( global void)
+0:137    Function Parameters: 
+0:137      'f4' ( in 4-component vector of float)
+0:139    Sequence
+0:139      subgroupAll ( global bool)
+0:139        Constant:
+0:139          true (const bool)
+0:140      subgroupAny ( global bool)
+0:140        Constant:
+0:140          false (const bool)
+0:141      subgroupAllEqual ( global bool)
+0:141        'f4' ( in 4-component vector of float)
+0:146  Function Definition: shuffle_works(vf4; ( global void)
+0:146    Function Parameters: 
+0:146      'f4' ( in 4-component vector of float)
+0:148    Sequence
+0:148      subgroupShuffle ( global 4-component vector of float)
 0:148        'f4' ( in 4-component vector of float)
 0:148        Constant:
-0:148          1 (const uint)
-0:149      subgroupShuffleDown ( global 4-component vector of float)
+0:148          0 (const uint)
+0:149      subgroupShuffleXor ( global 4-component vector of float)
 0:149        'f4' ( in 4-component vector of float)
 0:149        Constant:
 0:149          1 (const uint)
-0:153  Function Definition: arith_works(vf4; ( global void)
-0:153    Function Parameters: 
-0:153      'f4' ( in 4-component vector of float)
+0:150      subgroupShuffleUp ( global 4-component vector of float)
+0:150        'f4' ( in 4-component vector of float)
+0:150        Constant:
+0:150          1 (const uint)
+0:151      subgroupShuffleDown ( global 4-component vector of float)
+0:151        'f4' ( in 4-component vector of float)
+0:151        Constant:
+0:151          1 (const uint)
+0:155  Function Definition: arith_works(vf4; ( global void)
+0:155    Function Parameters: 
+0:155      'f4' ( in 4-component vector of float)
 0:?     Sequence
-0:156      subgroupAdd ( global 4-component vector of float)
-0:156        'f4' ( in 4-component vector of float)
-0:157      subgroupMul ( global 4-component vector of float)
-0:157        'f4' ( in 4-component vector of float)
-0:158      subgroupMin ( global 4-component vector of float)
+0:158      subgroupAdd ( global 4-component vector of float)
 0:158        'f4' ( in 4-component vector of float)
-0:159      subgroupMax ( global 4-component vector of float)
+0:159      subgroupMul ( global 4-component vector of float)
 0:159        'f4' ( in 4-component vector of float)
-0:160      subgroupAnd ( global 4-component vector of uint)
-0:160        'ballot' ( temp 4-component vector of uint)
-0:161      subgroupOr ( global 4-component vector of uint)
-0:161        'ballot' ( temp 4-component vector of uint)
-0:162      subgroupXor ( global 4-component vector of uint)
+0:160      subgroupMin ( global 4-component vector of float)
+0:160        'f4' ( in 4-component vector of float)
+0:161      subgroupMax ( global 4-component vector of float)
+0:161        'f4' ( in 4-component vector of float)
+0:162      subgroupAnd ( global 4-component vector of uint)
 0:162        'ballot' ( temp 4-component vector of uint)
-0:163      subgroupInclusiveAdd ( global 4-component vector of float)
-0:163        'f4' ( in 4-component vector of float)
-0:164      subgroupInclusiveMul ( global 4-component vector of float)
-0:164        'f4' ( in 4-component vector of float)
-0:165      subgroupInclusiveMin ( global 4-component vector of float)
+0:163      subgroupOr ( global 4-component vector of uint)
+0:163        'ballot' ( temp 4-component vector of uint)
+0:164      subgroupXor ( global 4-component vector of uint)
+0:164        'ballot' ( temp 4-component vector of uint)
+0:165      subgroupInclusiveAdd ( global 4-component vector of float)
 0:165        'f4' ( in 4-component vector of float)
-0:166      subgroupInclusiveMax ( global 4-component vector of float)
+0:166      subgroupInclusiveMul ( global 4-component vector of float)
 0:166        'f4' ( in 4-component vector of float)
-0:167      subgroupInclusiveAnd ( global 4-component vector of uint)
-0:167        'ballot' ( temp 4-component vector of uint)
-0:168      subgroupInclusiveOr ( global 4-component vector of uint)
-0:168        'ballot' ( temp 4-component vector of uint)
-0:169      subgroupInclusiveXor ( global 4-component vector of uint)
+0:167      subgroupInclusiveMin ( global 4-component vector of float)
+0:167        'f4' ( in 4-component vector of float)
+0:168      subgroupInclusiveMax ( global 4-component vector of float)
+0:168        'f4' ( in 4-component vector of float)
+0:169      subgroupInclusiveAnd ( global 4-component vector of uint)
 0:169        'ballot' ( temp 4-component vector of uint)
-0:170      subgroupExclusiveAdd ( global 4-component vector of float)
-0:170        'f4' ( in 4-component vector of float)
-0:171      subgroupExclusiveMul ( global 4-component vector of float)
-0:171        'f4' ( in 4-component vector of float)
-0:172      subgroupExclusiveMin ( global 4-component vector of float)
+0:170      subgroupInclusiveOr ( global 4-component vector of uint)
+0:170        'ballot' ( temp 4-component vector of uint)
+0:171      subgroupInclusiveXor ( global 4-component vector of uint)
+0:171        'ballot' ( temp 4-component vector of uint)
+0:172      subgroupExclusiveAdd ( global 4-component vector of float)
 0:172        'f4' ( in 4-component vector of float)
-0:173      subgroupExclusiveMax ( global 4-component vector of float)
+0:173      subgroupExclusiveMul ( global 4-component vector of float)
 0:173        'f4' ( in 4-component vector of float)
-0:174      subgroupExclusiveAnd ( global 4-component vector of uint)
-0:174        'ballot' ( temp 4-component vector of uint)
-0:175      subgroupExclusiveOr ( global 4-component vector of uint)
-0:175        'ballot' ( temp 4-component vector of uint)
-0:176      subgroupExclusiveXor ( global 4-component vector of uint)
+0:174      subgroupExclusiveMin ( global 4-component vector of float)
+0:174        'f4' ( in 4-component vector of float)
+0:175      subgroupExclusiveMax ( global 4-component vector of float)
+0:175        'f4' ( in 4-component vector of float)
+0:176      subgroupExclusiveAnd ( global 4-component vector of uint)
 0:176        'ballot' ( temp 4-component vector of uint)
-0:180  Function Definition: clustered_works(vf4; ( global void)
-0:180    Function Parameters: 
-0:180      'f4' ( in 4-component vector of float)
-0:182    Sequence
-0:182      Sequence
-0:182        move second child to first child ( temp 4-component vector of uint)
-0:182          'ballot' ( temp 4-component vector of uint)
-0:182          Constant:
-0:182            85 (const uint)
-0:182            0 (const uint)
-0:182            0 (const uint)
-0:182            0 (const uint)
-0:183      subgroupClusteredAdd ( global 4-component vector of float)
-0:183        'f4' ( in 4-component vector of float)
-0:183        Constant:
-0:183          2 (const uint)
-0:184      subgroupClusteredMul ( global 4-component vector of float)
-0:184        'f4' ( in 4-component vector of float)
-0:184        Constant:
-0:184          2 (const uint)
-0:185      subgroupClusteredMin ( global 4-component vector of float)
+0:177      subgroupExclusiveOr ( global 4-component vector of uint)
+0:177        'ballot' ( temp 4-component vector of uint)
+0:178      subgroupExclusiveXor ( global 4-component vector of uint)
+0:178        'ballot' ( temp 4-component vector of uint)
+0:182  Function Definition: clustered_works(vf4; ( global void)
+0:182    Function Parameters: 
+0:182      'f4' ( in 4-component vector of float)
+0:184    Sequence
+0:184      Sequence
+0:184        move second child to first child ( temp 4-component vector of uint)
+0:184          'ballot' ( temp 4-component vector of uint)
+0:184          Constant:
+0:184            85 (const uint)
+0:184            0 (const uint)
+0:184            0 (const uint)
+0:184            0 (const uint)
+0:185      subgroupClusteredAdd ( global 4-component vector of float)
 0:185        'f4' ( in 4-component vector of float)
 0:185        Constant:
 0:185          2 (const uint)
-0:186      subgroupClusteredMax ( global 4-component vector of float)
+0:186      subgroupClusteredMul ( global 4-component vector of float)
 0:186        'f4' ( in 4-component vector of float)
 0:186        Constant:
 0:186          2 (const uint)
-0:187      subgroupClusteredAnd ( global 4-component vector of uint)
-0:187        'ballot' ( temp 4-component vector of uint)
+0:187      subgroupClusteredMin ( global 4-component vector of float)
+0:187        'f4' ( in 4-component vector of float)
 0:187        Constant:
 0:187          2 (const uint)
-0:188      subgroupClusteredOr ( global 4-component vector of uint)
-0:188        'ballot' ( temp 4-component vector of uint)
+0:188      subgroupClusteredMax ( global 4-component vector of float)
+0:188        'f4' ( in 4-component vector of float)
 0:188        Constant:
 0:188          2 (const uint)
-0:189      subgroupClusteredXor ( global 4-component vector of uint)
+0:189      subgroupClusteredAnd ( global 4-component vector of uint)
 0:189        'ballot' ( temp 4-component vector of uint)
 0:189        Constant:
 0:189          2 (const uint)
-0:193  Function Definition: quad_works(vf4; ( global void)
-0:193    Function Parameters: 
-0:193      'f4' ( in 4-component vector of float)
-0:195    Sequence
-0:195      subgroupQuadBroadcast ( global 4-component vector of float)
-0:195        'f4' ( in 4-component vector of float)
-0:195        Constant:
-0:195          0 (const uint)
-0:196      subgroupQuadSwapHorizontal ( global 4-component vector of float)
-0:196        'f4' ( in 4-component vector of float)
-0:197      subgroupQuadSwapVertical ( global 4-component vector of float)
-0:197        'f4' ( in 4-component vector of float)
-0:198      subgroupQuadSwapDiagonal ( global 4-component vector of float)
+0:190      subgroupClusteredOr ( global 4-component vector of uint)
+0:190        'ballot' ( temp 4-component vector of uint)
+0:190        Constant:
+0:190          2 (const uint)
+0:191      subgroupClusteredXor ( global 4-component vector of uint)
+0:191        'ballot' ( temp 4-component vector of uint)
+0:191        Constant:
+0:191          2 (const uint)
+0:195  Function Definition: quad_works(vf4; ( global void)
+0:195    Function Parameters: 
+0:195      'f4' ( in 4-component vector of float)
+0:?     Sequence
+0:198      subgroupQuadBroadcast ( global 4-component vector of float)
 0:198        'f4' ( in 4-component vector of float)
-0:202  Function Definition: partitioned_works(vf4; ( global void)
-0:202    Function Parameters: 
-0:202      'f4' ( in 4-component vector of float)
-0:204    Sequence
-0:204      Sequence
-0:204        move second child to first child ( temp 4-component vector of uint)
-0:204          'parti' ( temp 4-component vector of uint)
-0:204          subgroupPartitionNV ( global 4-component vector of uint)
-0:204            'f4' ( in 4-component vector of float)
-0:205      Sequence
-0:205        move second child to first child ( temp 4-component vector of uint)
-0:205          'ballot' ( temp 4-component vector of uint)
-0:205          Constant:
-0:205            85 (const uint)
-0:205            0 (const uint)
-0:205            0 (const uint)
-0:205            0 (const uint)
-0:206      subgroupPartitionedAddNV ( global 4-component vector of float)
-0:206        'f4' ( in 4-component vector of float)
-0:206        'parti' ( temp 4-component vector of uint)
-0:207      subgroupPartitionedMulNV ( global 4-component vector of float)
-0:207        'f4' ( in 4-component vector of float)
-0:207        'parti' ( temp 4-component vector of uint)
-0:208      subgroupPartitionedMinNV ( global 4-component vector of float)
-0:208        'f4' ( in 4-component vector of float)
-0:208        'parti' ( temp 4-component vector of uint)
-0:209      subgroupPartitionedMaxNV ( global 4-component vector of float)
-0:209        'f4' ( in 4-component vector of float)
-0:209        'parti' ( temp 4-component vector of uint)
-0:210      subgroupPartitionedAndNV ( global 4-component vector of uint)
-0:210        'ballot' ( temp 4-component vector of uint)
+0:198        Constant:
+0:198          0 (const uint)
+0:199      subgroupQuadBroadcast ( global 4-component vector of float)
+0:199        'f4' ( in 4-component vector of float)
+0:199        Convert int to uint ( temp uint)
+0:199          'i' ( temp int)
+0:200      subgroupQuadSwapHorizontal ( global 4-component vector of float)
+0:200        'f4' ( in 4-component vector of float)
+0:201      subgroupQuadSwapVertical ( global 4-component vector of float)
+0:201        'f4' ( in 4-component vector of float)
+0:202      subgroupQuadSwapDiagonal ( global 4-component vector of float)
+0:202        'f4' ( in 4-component vector of float)
+0:206  Function Definition: partitioned_works(vf4; ( global void)
+0:206    Function Parameters: 
+0:206      'f4' ( in 4-component vector of float)
+0:208    Sequence
+0:208      Sequence
+0:208        move second child to first child ( temp 4-component vector of uint)
+0:208          'parti' ( temp 4-component vector of uint)
+0:208          subgroupPartitionNV ( global 4-component vector of uint)
+0:208            'f4' ( in 4-component vector of float)
+0:209      Sequence
+0:209        move second child to first child ( temp 4-component vector of uint)
+0:209          'ballot' ( temp 4-component vector of uint)
+0:209          Constant:
+0:209            85 (const uint)
+0:209            0 (const uint)
+0:209            0 (const uint)
+0:209            0 (const uint)
+0:210      subgroupPartitionedAddNV ( global 4-component vector of float)
+0:210        'f4' ( in 4-component vector of float)
 0:210        'parti' ( temp 4-component vector of uint)
-0:211      subgroupPartitionedOrNV ( global 4-component vector of uint)
-0:211        'ballot' ( temp 4-component vector of uint)
+0:211      subgroupPartitionedMulNV ( global 4-component vector of float)
+0:211        'f4' ( in 4-component vector of float)
 0:211        'parti' ( temp 4-component vector of uint)
-0:212      subgroupPartitionedXorNV ( global 4-component vector of uint)
-0:212        'ballot' ( temp 4-component vector of uint)
+0:212      subgroupPartitionedMinNV ( global 4-component vector of float)
+0:212        'f4' ( in 4-component vector of float)
 0:212        'parti' ( temp 4-component vector of uint)
-0:213      subgroupPartitionedInclusiveAddNV ( global 4-component vector of float)
+0:213      subgroupPartitionedMaxNV ( global 4-component vector of float)
 0:213        'f4' ( in 4-component vector of float)
 0:213        'parti' ( temp 4-component vector of uint)
-0:214      subgroupPartitionedInclusiveMulNV ( global 4-component vector of float)
-0:214        'f4' ( in 4-component vector of float)
+0:214      subgroupPartitionedAndNV ( global 4-component vector of uint)
+0:214        'ballot' ( temp 4-component vector of uint)
 0:214        'parti' ( temp 4-component vector of uint)
-0:215      subgroupPartitionedInclusiveMinNV ( global 4-component vector of float)
-0:215        'f4' ( in 4-component vector of float)
+0:215      subgroupPartitionedOrNV ( global 4-component vector of uint)
+0:215        'ballot' ( temp 4-component vector of uint)
 0:215        'parti' ( temp 4-component vector of uint)
-0:216      subgroupPartitionedInclusiveMaxNV ( global 4-component vector of float)
-0:216        'f4' ( in 4-component vector of float)
+0:216      subgroupPartitionedXorNV ( global 4-component vector of uint)
+0:216        'ballot' ( temp 4-component vector of uint)
 0:216        'parti' ( temp 4-component vector of uint)
-0:217      subgroupPartitionedInclusiveAndNV ( global 4-component vector of uint)
-0:217        'ballot' ( temp 4-component vector of uint)
+0:217      subgroupPartitionedInclusiveAddNV ( global 4-component vector of float)
+0:217        'f4' ( in 4-component vector of float)
 0:217        'parti' ( temp 4-component vector of uint)
-0:218      subgroupPartitionedInclusiveOrNV ( global 4-component vector of uint)
-0:218        'ballot' ( temp 4-component vector of uint)
+0:218      subgroupPartitionedInclusiveMulNV ( global 4-component vector of float)
+0:218        'f4' ( in 4-component vector of float)
 0:218        'parti' ( temp 4-component vector of uint)
-0:219      subgroupPartitionedInclusiveXorNV ( global 4-component vector of uint)
-0:219        'ballot' ( temp 4-component vector of uint)
+0:219      subgroupPartitionedInclusiveMinNV ( global 4-component vector of float)
+0:219        'f4' ( in 4-component vector of float)
 0:219        'parti' ( temp 4-component vector of uint)
-0:220      subgroupPartitionedExclusiveAddNV ( global 4-component vector of float)
+0:220      subgroupPartitionedInclusiveMaxNV ( global 4-component vector of float)
 0:220        'f4' ( in 4-component vector of float)
 0:220        'parti' ( temp 4-component vector of uint)
-0:221      subgroupPartitionedExclusiveMulNV ( global 4-component vector of float)
-0:221        'f4' ( in 4-component vector of float)
+0:221      subgroupPartitionedInclusiveAndNV ( global 4-component vector of uint)
+0:221        'ballot' ( temp 4-component vector of uint)
 0:221        'parti' ( temp 4-component vector of uint)
-0:222      subgroupPartitionedExclusiveMinNV ( global 4-component vector of float)
-0:222        'f4' ( in 4-component vector of float)
+0:222      subgroupPartitionedInclusiveOrNV ( global 4-component vector of uint)
+0:222        'ballot' ( temp 4-component vector of uint)
 0:222        'parti' ( temp 4-component vector of uint)
-0:223      subgroupPartitionedExclusiveMaxNV ( global 4-component vector of float)
-0:223        'f4' ( in 4-component vector of float)
+0:223      subgroupPartitionedInclusiveXorNV ( global 4-component vector of uint)
+0:223        'ballot' ( temp 4-component vector of uint)
 0:223        'parti' ( temp 4-component vector of uint)
-0:224      subgroupPartitionedExclusiveAndNV ( global 4-component vector of uint)
-0:224        'ballot' ( temp 4-component vector of uint)
+0:224      subgroupPartitionedExclusiveAddNV ( global 4-component vector of float)
+0:224        'f4' ( in 4-component vector of float)
 0:224        'parti' ( temp 4-component vector of uint)
-0:225      subgroupPartitionedExclusiveOrNV ( global 4-component vector of uint)
-0:225        'ballot' ( temp 4-component vector of uint)
+0:225      subgroupPartitionedExclusiveMulNV ( global 4-component vector of float)
+0:225        'f4' ( in 4-component vector of float)
 0:225        'parti' ( temp 4-component vector of uint)
-0:226      subgroupPartitionedExclusiveXorNV ( global 4-component vector of uint)
-0:226        'ballot' ( temp 4-component vector of uint)
+0:226      subgroupPartitionedExclusiveMinNV ( global 4-component vector of float)
+0:226        'f4' ( in 4-component vector of float)
 0:226        'parti' ( temp 4-component vector of uint)
-0:230  Function Definition: sm_builtins_err( ( global void)
-0:230    Function Parameters: 
-0:232    Sequence
-0:232      'gl_WarpsPerSMNV' ( flat in uint WarpsPerSMNV)
-0:233      'gl_SMCountNV' ( flat in uint SMCountNV)
-0:234      'gl_WarpIDNV' ( flat in uint WarpIDNV)
-0:235      'gl_SMIDNV' ( flat in uint SMIDNV)
-0:242  Function Definition: sm_builtins( ( global void)
-0:242    Function Parameters: 
-0:244    Sequence
-0:244      'gl_WarpsPerSMNV' ( flat in uint WarpsPerSMNV)
-0:245      'gl_SMCountNV' ( flat in uint SMCountNV)
-0:246      'gl_WarpIDNV' ( flat in uint WarpIDNV)
-0:247      'gl_SMIDNV' ( flat in uint SMIDNV)
+0:227      subgroupPartitionedExclusiveMaxNV ( global 4-component vector of float)
+0:227        'f4' ( in 4-component vector of float)
+0:227        'parti' ( temp 4-component vector of uint)
+0:228      subgroupPartitionedExclusiveAndNV ( global 4-component vector of uint)
+0:228        'ballot' ( temp 4-component vector of uint)
+0:228        'parti' ( temp 4-component vector of uint)
+0:229      subgroupPartitionedExclusiveOrNV ( global 4-component vector of uint)
+0:229        'ballot' ( temp 4-component vector of uint)
+0:229        'parti' ( temp 4-component vector of uint)
+0:230      subgroupPartitionedExclusiveXorNV ( global 4-component vector of uint)
+0:230        'ballot' ( temp 4-component vector of uint)
+0:230        'parti' ( temp 4-component vector of uint)
+0:234  Function Definition: sm_builtins_err( ( global void)
+0:234    Function Parameters: 
+0:236    Sequence
+0:236      'gl_WarpsPerSMNV' ( flat in uint WarpsPerSMNV)
+0:237      'gl_SMCountNV' ( flat in uint SMCountNV)
+0:238      'gl_WarpIDNV' ( flat in uint WarpIDNV)
+0:239      'gl_SMIDNV' ( flat in uint SMIDNV)
+0:246  Function Definition: sm_builtins( ( global void)
+0:246    Function Parameters: 
+0:248    Sequence
+0:248      'gl_WarpsPerSMNV' ( flat in uint WarpsPerSMNV)
+0:249      'gl_SMCountNV' ( flat in uint SMCountNV)
+0:250      'gl_WarpIDNV' ( flat in uint WarpIDNV)
+0:251      'gl_SMIDNV' ( flat in uint SMIDNV)
 0:?   Linker Objects
 0:?     'data' (layout( location=0) out 4-component vector of uint)
 
diff --git a/Test/baseResults/hlsl.format.rwtexture.frag.out b/Test/baseResults/hlsl.format.rwtexture.frag.out
new file mode 100644
index 0000000..7ab5329
--- /dev/null
+++ b/Test/baseResults/hlsl.format.rwtexture.frag.out
@@ -0,0 +1,501 @@
+hlsl.format.rwtexture.frag
+Shader version: 500
+gl_FragCoord origin is upper left
+using depth_any
+0:? Sequence
+0:56  Function Definition: @main( ( temp structure{ temp 4-component vector of float Color,  temp float Depth})
+0:56    Function Parameters: 
+0:?     Sequence
+0:59      move second child to first child ( temp 4-component vector of float)
+0:59        Color: direct index for structure ( temp 4-component vector of float)
+0:59          'psout' ( temp structure{ temp 4-component vector of float Color,  temp float Depth})
+0:59          Constant:
+0:59            0 (const int)
+0:59        Constant:
+0:59          1.000000
+0:59          1.000000
+0:59          1.000000
+0:59          1.000000
+0:60      move second child to first child ( temp float)
+0:60        Depth: direct index for structure ( temp float)
+0:60          'psout' ( temp structure{ temp 4-component vector of float Color,  temp float Depth})
+0:60          Constant:
+0:60            1 (const int)
+0:60        Constant:
+0:60          1.000000
+0:62      Branch: Return with expression
+0:62        'psout' ( temp structure{ temp 4-component vector of float Color,  temp float Depth})
+0:56  Function Definition: main( ( temp void)
+0:56    Function Parameters: 
+0:?     Sequence
+0:56      Sequence
+0:56        move second child to first child ( temp structure{ temp 4-component vector of float Color,  temp float Depth})
+0:56          'flattenTemp' ( temp structure{ temp 4-component vector of float Color,  temp float Depth})
+0:56          Function Call: @main( ( temp structure{ temp 4-component vector of float Color,  temp float Depth})
+0:56        move second child to first child ( temp 4-component vector of float)
+0:?           '@entryPointOutput.Color' (layout( location=0) out 4-component vector of float)
+0:56          Color: direct index for structure ( temp 4-component vector of float)
+0:56            'flattenTemp' ( temp structure{ temp 4-component vector of float Color,  temp float Depth})
+0:56            Constant:
+0:56              0 (const int)
+0:56        move second child to first child ( temp float)
+0:?           '@entryPointOutput.Depth' ( out float FragDepth)
+0:56          Depth: direct index for structure ( temp float)
+0:56            'flattenTemp' ( temp structure{ temp 4-component vector of float Color,  temp float Depth})
+0:56            Constant:
+0:56              1 (const int)
+0:?   Linker Objects
+0:?     'g_sSamp' (layout( binding=0) uniform sampler)
+0:?     'g_tTex1df4' (layout( binding=0 rgba32f) uniform image1D)
+0:?     'g_tTex1di4' (layout( rg32f) uniform iimage1D)
+0:?     'g_tTex1du4' (layout( rgba8_snorm) uniform uimage1D)
+0:?     'g_tTex2df4' (layout( rgba8i) uniform image2D)
+0:?     'g_tTex2di4' (layout( r11f_g11f_b10f) uniform iimage2D)
+0:?     'g_tTex2du4' (layout( r8_snorm) uniform uimage2D)
+0:?     'g_tTex3df4' (layout( rg8) readonly uniform image3D)
+0:?     'g_tTex3di4' (layout( rgba16i) writeonly uniform iimage3D)
+0:?     'g_tTex3du4' (layout( r8i) readonly writeonly uniform uimage3D)
+0:?     'g_tTex1df4a' (layout( rgba8ui) uniform image1DArray)
+0:?     'g_tTex1di4a' (layout( rg32ui) uniform iimage1DArray)
+0:?     'g_tTex1du4a' (layout( r16ui) uniform uimage1DArray)
+0:?     'g_tTex2df4a' (layout( rgb10_a2ui) uniform image2DArray)
+0:?     'g_tTex2di4a' (layout( r8ui) uniform iimage2DArray)
+0:?     'g_tTex2du4a' (layout( rgba16f) uniform uimage2DArray)
+0:?     'g_tTex01' (layout( rgba8) uniform iimage2DArray)
+0:?     'g_tTex02' (layout( rg16f) uniform iimage2DArray)
+0:?     'g_tTex03' (layout( r16f) uniform iimage2DArray)
+0:?     'g_tTex04' (layout( rgb10_a2) uniform iimage2DArray)
+0:?     'g_tTex05' (layout( rg16) uniform iimage2DArray)
+0:?     'g_tTex06' (layout( r32f) uniform iimage2DArray)
+0:?     'g_tTex07' (layout( rgba16) uniform iimage2DArray)
+0:?     'g_tTex08' (layout( r16) uniform iimage2DArray)
+0:?     'g_tTex09' (layout( r8) uniform iimage2DArray)
+0:?     'g_tTex10' (layout( rgba16_snorm) uniform iimage2DArray)
+0:?     'g_tTex11' (layout( rg16_snorm) uniform iimage2DArray)
+0:?     'g_tTex12' (layout( r16_snorm) uniform iimage2DArray)
+0:?     'g_tTex13' (layout( r8_snorm) uniform iimage2DArray)
+0:?     'g_tTex14' (layout( rgba32i) uniform iimage2DArray)
+0:?     'g_tTex15' (layout( r32i) uniform iimage2DArray)
+0:?     'g_tTex16' (layout( r32ui) uniform iimage2DArray)
+0:?     'g_tTex17' (layout( rg16i) uniform iimage2DArray)
+0:?     'g_tTex18' (layout( r16i) uniform iimage2DArray)
+0:?     'g_tTex19' (layout( rg32i) uniform iimage2DArray)
+0:?     'g_tTex20' (layout( rg8i) uniform iimage2DArray)
+0:?     'g_tTex21' (layout( rg8ui) uniform iimage2DArray)
+0:?     'g_tTex22' (layout( rgba32ui) uniform iimage2DArray)
+0:?     'g_tTex23' (layout( rgba16ui) uniform iimage2DArray)
+0:?     'g_tTex24' (layout( rg32ui) uniform iimage2DArray)
+0:?     'g_tTex25' (layout( rg16ui) uniform iimage2DArray)
+0:?     '@entryPointOutput.Depth' ( out float FragDepth)
+0:?     '@entryPointOutput.Color' (layout( location=0) out 4-component vector of float)
+
+
+Linked fragment stage:
+
+
+Shader version: 500
+gl_FragCoord origin is upper left
+using depth_any
+0:? Sequence
+0:56  Function Definition: @main( ( temp structure{ temp 4-component vector of float Color,  temp float Depth})
+0:56    Function Parameters: 
+0:?     Sequence
+0:59      move second child to first child ( temp 4-component vector of float)
+0:59        Color: direct index for structure ( temp 4-component vector of float)
+0:59          'psout' ( temp structure{ temp 4-component vector of float Color,  temp float Depth})
+0:59          Constant:
+0:59            0 (const int)
+0:59        Constant:
+0:59          1.000000
+0:59          1.000000
+0:59          1.000000
+0:59          1.000000
+0:60      move second child to first child ( temp float)
+0:60        Depth: direct index for structure ( temp float)
+0:60          'psout' ( temp structure{ temp 4-component vector of float Color,  temp float Depth})
+0:60          Constant:
+0:60            1 (const int)
+0:60        Constant:
+0:60          1.000000
+0:62      Branch: Return with expression
+0:62        'psout' ( temp structure{ temp 4-component vector of float Color,  temp float Depth})
+0:56  Function Definition: main( ( temp void)
+0:56    Function Parameters: 
+0:?     Sequence
+0:56      Sequence
+0:56        move second child to first child ( temp structure{ temp 4-component vector of float Color,  temp float Depth})
+0:56          'flattenTemp' ( temp structure{ temp 4-component vector of float Color,  temp float Depth})
+0:56          Function Call: @main( ( temp structure{ temp 4-component vector of float Color,  temp float Depth})
+0:56        move second child to first child ( temp 4-component vector of float)
+0:?           '@entryPointOutput.Color' (layout( location=0) out 4-component vector of float)
+0:56          Color: direct index for structure ( temp 4-component vector of float)
+0:56            'flattenTemp' ( temp structure{ temp 4-component vector of float Color,  temp float Depth})
+0:56            Constant:
+0:56              0 (const int)
+0:56        move second child to first child ( temp float)
+0:?           '@entryPointOutput.Depth' ( out float FragDepth)
+0:56          Depth: direct index for structure ( temp float)
+0:56            'flattenTemp' ( temp structure{ temp 4-component vector of float Color,  temp float Depth})
+0:56            Constant:
+0:56              1 (const int)
+0:?   Linker Objects
+0:?     'g_sSamp' (layout( binding=0) uniform sampler)
+0:?     'g_tTex1df4' (layout( binding=0 rgba32f) uniform image1D)
+0:?     'g_tTex1di4' (layout( rg32f) uniform iimage1D)
+0:?     'g_tTex1du4' (layout( rgba8_snorm) uniform uimage1D)
+0:?     'g_tTex2df4' (layout( rgba8i) uniform image2D)
+0:?     'g_tTex2di4' (layout( r11f_g11f_b10f) uniform iimage2D)
+0:?     'g_tTex2du4' (layout( r8_snorm) uniform uimage2D)
+0:?     'g_tTex3df4' (layout( rg8) readonly uniform image3D)
+0:?     'g_tTex3di4' (layout( rgba16i) writeonly uniform iimage3D)
+0:?     'g_tTex3du4' (layout( r8i) readonly writeonly uniform uimage3D)
+0:?     'g_tTex1df4a' (layout( rgba8ui) uniform image1DArray)
+0:?     'g_tTex1di4a' (layout( rg32ui) uniform iimage1DArray)
+0:?     'g_tTex1du4a' (layout( r16ui) uniform uimage1DArray)
+0:?     'g_tTex2df4a' (layout( rgb10_a2ui) uniform image2DArray)
+0:?     'g_tTex2di4a' (layout( r8ui) uniform iimage2DArray)
+0:?     'g_tTex2du4a' (layout( rgba16f) uniform uimage2DArray)
+0:?     'g_tTex01' (layout( rgba8) uniform iimage2DArray)
+0:?     'g_tTex02' (layout( rg16f) uniform iimage2DArray)
+0:?     'g_tTex03' (layout( r16f) uniform iimage2DArray)
+0:?     'g_tTex04' (layout( rgb10_a2) uniform iimage2DArray)
+0:?     'g_tTex05' (layout( rg16) uniform iimage2DArray)
+0:?     'g_tTex06' (layout( r32f) uniform iimage2DArray)
+0:?     'g_tTex07' (layout( rgba16) uniform iimage2DArray)
+0:?     'g_tTex08' (layout( r16) uniform iimage2DArray)
+0:?     'g_tTex09' (layout( r8) uniform iimage2DArray)
+0:?     'g_tTex10' (layout( rgba16_snorm) uniform iimage2DArray)
+0:?     'g_tTex11' (layout( rg16_snorm) uniform iimage2DArray)
+0:?     'g_tTex12' (layout( r16_snorm) uniform iimage2DArray)
+0:?     'g_tTex13' (layout( r8_snorm) uniform iimage2DArray)
+0:?     'g_tTex14' (layout( rgba32i) uniform iimage2DArray)
+0:?     'g_tTex15' (layout( r32i) uniform iimage2DArray)
+0:?     'g_tTex16' (layout( r32ui) uniform iimage2DArray)
+0:?     'g_tTex17' (layout( rg16i) uniform iimage2DArray)
+0:?     'g_tTex18' (layout( r16i) uniform iimage2DArray)
+0:?     'g_tTex19' (layout( rg32i) uniform iimage2DArray)
+0:?     'g_tTex20' (layout( rg8i) uniform iimage2DArray)
+0:?     'g_tTex21' (layout( rg8ui) uniform iimage2DArray)
+0:?     'g_tTex22' (layout( rgba32ui) uniform iimage2DArray)
+0:?     'g_tTex23' (layout( rgba16ui) uniform iimage2DArray)
+0:?     'g_tTex24' (layout( rg32ui) uniform iimage2DArray)
+0:?     'g_tTex25' (layout( rg16ui) uniform iimage2DArray)
+0:?     '@entryPointOutput.Depth' ( out float FragDepth)
+0:?     '@entryPointOutput.Color' (layout( location=0) out 4-component vector of float)
+
+// Module Version 10000
+// Generated by (magic number): 80007
+// Id's are bound by 160
+
+                              Capability Shader
+                              Capability Image1D
+                              Capability StorageImageExtendedFormats
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint Fragment 4  "main" 29 33
+                              ExecutionMode 4 OriginUpperLeft
+                              ExecutionMode 4 DepthReplacing
+                              Source HLSL 500
+                              Name 4  "main"
+                              Name 8  "PS_OUTPUT"
+                              MemberName 8(PS_OUTPUT) 0  "Color"
+                              MemberName 8(PS_OUTPUT) 1  "Depth"
+                              Name 10  "@main("
+                              Name 13  "psout"
+                              Name 26  "flattenTemp"
+                              Name 29  "@entryPointOutput.Color"
+                              Name 33  "@entryPointOutput.Depth"
+                              Name 38  "g_sSamp"
+                              Name 41  "g_tTex1df4"
+                              Name 44  "g_tTex1di4"
+                              Name 48  "g_tTex1du4"
+                              Name 51  "g_tTex2df4"
+                              Name 54  "g_tTex2di4"
+                              Name 57  "g_tTex2du4"
+                              Name 60  "g_tTex3df4"
+                              Name 63  "g_tTex3di4"
+                              Name 66  "g_tTex3du4"
+                              Name 69  "g_tTex1df4a"
+                              Name 72  "g_tTex1di4a"
+                              Name 75  "g_tTex1du4a"
+                              Name 78  "g_tTex2df4a"
+                              Name 81  "g_tTex2di4a"
+                              Name 84  "g_tTex2du4a"
+                              Name 87  "g_tTex01"
+                              Name 90  "g_tTex02"
+                              Name 93  "g_tTex03"
+                              Name 96  "g_tTex04"
+                              Name 99  "g_tTex05"
+                              Name 102  "g_tTex06"
+                              Name 105  "g_tTex07"
+                              Name 108  "g_tTex08"
+                              Name 111  "g_tTex09"
+                              Name 114  "g_tTex10"
+                              Name 117  "g_tTex11"
+                              Name 120  "g_tTex12"
+                              Name 123  "g_tTex13"
+                              Name 126  "g_tTex14"
+                              Name 129  "g_tTex15"
+                              Name 132  "g_tTex16"
+                              Name 135  "g_tTex17"
+                              Name 138  "g_tTex18"
+                              Name 141  "g_tTex19"
+                              Name 144  "g_tTex20"
+                              Name 147  "g_tTex21"
+                              Name 150  "g_tTex22"
+                              Name 153  "g_tTex23"
+                              Name 156  "g_tTex24"
+                              Name 159  "g_tTex25"
+                              Decorate 29(@entryPointOutput.Color) Location 0
+                              Decorate 33(@entryPointOutput.Depth) BuiltIn FragDepth
+                              Decorate 38(g_sSamp) DescriptorSet 0
+                              Decorate 38(g_sSamp) Binding 0
+                              Decorate 41(g_tTex1df4) DescriptorSet 0
+                              Decorate 41(g_tTex1df4) Binding 0
+                              Decorate 44(g_tTex1di4) DescriptorSet 0
+                              Decorate 44(g_tTex1di4) Binding 0
+                              Decorate 48(g_tTex1du4) DescriptorSet 0
+                              Decorate 48(g_tTex1du4) Binding 0
+                              Decorate 51(g_tTex2df4) DescriptorSet 0
+                              Decorate 51(g_tTex2df4) Binding 0
+                              Decorate 54(g_tTex2di4) DescriptorSet 0
+                              Decorate 54(g_tTex2di4) Binding 0
+                              Decorate 57(g_tTex2du4) DescriptorSet 0
+                              Decorate 57(g_tTex2du4) Binding 0
+                              Decorate 60(g_tTex3df4) DescriptorSet 0
+                              Decorate 60(g_tTex3df4) Binding 0
+                              Decorate 60(g_tTex3df4) NonWritable
+                              Decorate 63(g_tTex3di4) DescriptorSet 0
+                              Decorate 63(g_tTex3di4) Binding 0
+                              Decorate 63(g_tTex3di4) NonReadable
+                              Decorate 66(g_tTex3du4) DescriptorSet 0
+                              Decorate 66(g_tTex3du4) Binding 0
+                              Decorate 66(g_tTex3du4) NonWritable
+                              Decorate 66(g_tTex3du4) NonReadable
+                              Decorate 69(g_tTex1df4a) DescriptorSet 0
+                              Decorate 69(g_tTex1df4a) Binding 0
+                              Decorate 72(g_tTex1di4a) DescriptorSet 0
+                              Decorate 72(g_tTex1di4a) Binding 0
+                              Decorate 75(g_tTex1du4a) DescriptorSet 0
+                              Decorate 75(g_tTex1du4a) Binding 0
+                              Decorate 78(g_tTex2df4a) DescriptorSet 0
+                              Decorate 78(g_tTex2df4a) Binding 0
+                              Decorate 81(g_tTex2di4a) DescriptorSet 0
+                              Decorate 81(g_tTex2di4a) Binding 0
+                              Decorate 84(g_tTex2du4a) DescriptorSet 0
+                              Decorate 84(g_tTex2du4a) Binding 0
+                              Decorate 87(g_tTex01) DescriptorSet 0
+                              Decorate 87(g_tTex01) Binding 0
+                              Decorate 90(g_tTex02) DescriptorSet 0
+                              Decorate 90(g_tTex02) Binding 0
+                              Decorate 93(g_tTex03) DescriptorSet 0
+                              Decorate 93(g_tTex03) Binding 0
+                              Decorate 96(g_tTex04) DescriptorSet 0
+                              Decorate 96(g_tTex04) Binding 0
+                              Decorate 99(g_tTex05) DescriptorSet 0
+                              Decorate 99(g_tTex05) Binding 0
+                              Decorate 102(g_tTex06) DescriptorSet 0
+                              Decorate 102(g_tTex06) Binding 0
+                              Decorate 105(g_tTex07) DescriptorSet 0
+                              Decorate 105(g_tTex07) Binding 0
+                              Decorate 108(g_tTex08) DescriptorSet 0
+                              Decorate 108(g_tTex08) Binding 0
+                              Decorate 111(g_tTex09) DescriptorSet 0
+                              Decorate 111(g_tTex09) Binding 0
+                              Decorate 114(g_tTex10) DescriptorSet 0
+                              Decorate 114(g_tTex10) Binding 0
+                              Decorate 117(g_tTex11) DescriptorSet 0
+                              Decorate 117(g_tTex11) Binding 0
+                              Decorate 120(g_tTex12) DescriptorSet 0
+                              Decorate 120(g_tTex12) Binding 0
+                              Decorate 123(g_tTex13) DescriptorSet 0
+                              Decorate 123(g_tTex13) Binding 0
+                              Decorate 126(g_tTex14) DescriptorSet 0
+                              Decorate 126(g_tTex14) Binding 0
+                              Decorate 129(g_tTex15) DescriptorSet 0
+                              Decorate 129(g_tTex15) Binding 0
+                              Decorate 132(g_tTex16) DescriptorSet 0
+                              Decorate 132(g_tTex16) Binding 0
+                              Decorate 135(g_tTex17) DescriptorSet 0
+                              Decorate 135(g_tTex17) Binding 0
+                              Decorate 138(g_tTex18) DescriptorSet 0
+                              Decorate 138(g_tTex18) Binding 0
+                              Decorate 141(g_tTex19) DescriptorSet 0
+                              Decorate 141(g_tTex19) Binding 0
+                              Decorate 144(g_tTex20) DescriptorSet 0
+                              Decorate 144(g_tTex20) Binding 0
+                              Decorate 147(g_tTex21) DescriptorSet 0
+                              Decorate 147(g_tTex21) Binding 0
+                              Decorate 150(g_tTex22) DescriptorSet 0
+                              Decorate 150(g_tTex22) Binding 0
+                              Decorate 153(g_tTex23) DescriptorSet 0
+                              Decorate 153(g_tTex23) Binding 0
+                              Decorate 156(g_tTex24) DescriptorSet 0
+                              Decorate 156(g_tTex24) Binding 0
+                              Decorate 159(g_tTex25) DescriptorSet 0
+                              Decorate 159(g_tTex25) Binding 0
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeFloat 32
+               7:             TypeVector 6(float) 4
+    8(PS_OUTPUT):             TypeStruct 7(fvec4) 6(float)
+               9:             TypeFunction 8(PS_OUTPUT)
+              12:             TypePointer Function 8(PS_OUTPUT)
+              14:             TypeInt 32 1
+              15:     14(int) Constant 0
+              16:    6(float) Constant 1065353216
+              17:    7(fvec4) ConstantComposite 16 16 16 16
+              18:             TypePointer Function 7(fvec4)
+              20:     14(int) Constant 1
+              21:             TypePointer Function 6(float)
+              28:             TypePointer Output 7(fvec4)
+29(@entryPointOutput.Color):     28(ptr) Variable Output
+              32:             TypePointer Output 6(float)
+33(@entryPointOutput.Depth):     32(ptr) Variable Output
+              36:             TypeSampler
+              37:             TypePointer UniformConstant 36
+     38(g_sSamp):     37(ptr) Variable UniformConstant
+              39:             TypeImage 6(float) 1D nonsampled format:Rgba32f
+              40:             TypePointer UniformConstant 39
+  41(g_tTex1df4):     40(ptr) Variable UniformConstant
+              42:             TypeImage 14(int) 1D nonsampled format:Rg32f
+              43:             TypePointer UniformConstant 42
+  44(g_tTex1di4):     43(ptr) Variable UniformConstant
+              45:             TypeInt 32 0
+              46:             TypeImage 45(int) 1D nonsampled format:Rgba8Snorm
+              47:             TypePointer UniformConstant 46
+  48(g_tTex1du4):     47(ptr) Variable UniformConstant
+              49:             TypeImage 6(float) 2D nonsampled format:Rgba8i
+              50:             TypePointer UniformConstant 49
+  51(g_tTex2df4):     50(ptr) Variable UniformConstant
+              52:             TypeImage 14(int) 2D nonsampled format:R11fG11fB10f
+              53:             TypePointer UniformConstant 52
+  54(g_tTex2di4):     53(ptr) Variable UniformConstant
+              55:             TypeImage 45(int) 2D nonsampled format:R8Snorm
+              56:             TypePointer UniformConstant 55
+  57(g_tTex2du4):     56(ptr) Variable UniformConstant
+              58:             TypeImage 6(float) 3D nonsampled format:Rg8
+              59:             TypePointer UniformConstant 58
+  60(g_tTex3df4):     59(ptr) Variable UniformConstant
+              61:             TypeImage 14(int) 3D nonsampled format:Rgba16i
+              62:             TypePointer UniformConstant 61
+  63(g_tTex3di4):     62(ptr) Variable UniformConstant
+              64:             TypeImage 45(int) 3D nonsampled format:R8i
+              65:             TypePointer UniformConstant 64
+  66(g_tTex3du4):     65(ptr) Variable UniformConstant
+              67:             TypeImage 6(float) 1D array nonsampled format:Rgba8ui
+              68:             TypePointer UniformConstant 67
+ 69(g_tTex1df4a):     68(ptr) Variable UniformConstant
+              70:             TypeImage 14(int) 1D array nonsampled format:Rg32ui
+              71:             TypePointer UniformConstant 70
+ 72(g_tTex1di4a):     71(ptr) Variable UniformConstant
+              73:             TypeImage 45(int) 1D array nonsampled format:R16ui
+              74:             TypePointer UniformConstant 73
+ 75(g_tTex1du4a):     74(ptr) Variable UniformConstant
+              76:             TypeImage 6(float) 2D array nonsampled format:Rgb10a2ui
+              77:             TypePointer UniformConstant 76
+ 78(g_tTex2df4a):     77(ptr) Variable UniformConstant
+              79:             TypeImage 14(int) 2D array nonsampled format:R8ui
+              80:             TypePointer UniformConstant 79
+ 81(g_tTex2di4a):     80(ptr) Variable UniformConstant
+              82:             TypeImage 45(int) 2D array nonsampled format:Rgba16f
+              83:             TypePointer UniformConstant 82
+ 84(g_tTex2du4a):     83(ptr) Variable UniformConstant
+              85:             TypeImage 14(int) 2D array nonsampled format:Rgba8
+              86:             TypePointer UniformConstant 85
+    87(g_tTex01):     86(ptr) Variable UniformConstant
+              88:             TypeImage 14(int) 2D array nonsampled format:Rg16f
+              89:             TypePointer UniformConstant 88
+    90(g_tTex02):     89(ptr) Variable UniformConstant
+              91:             TypeImage 14(int) 2D array nonsampled format:R16f
+              92:             TypePointer UniformConstant 91
+    93(g_tTex03):     92(ptr) Variable UniformConstant
+              94:             TypeImage 14(int) 2D array nonsampled format:Rgb10A2
+              95:             TypePointer UniformConstant 94
+    96(g_tTex04):     95(ptr) Variable UniformConstant
+              97:             TypeImage 14(int) 2D array nonsampled format:Rg16
+              98:             TypePointer UniformConstant 97
+    99(g_tTex05):     98(ptr) Variable UniformConstant
+             100:             TypeImage 14(int) 2D array nonsampled format:R32f
+             101:             TypePointer UniformConstant 100
+   102(g_tTex06):    101(ptr) Variable UniformConstant
+             103:             TypeImage 14(int) 2D array nonsampled format:Rgba16
+             104:             TypePointer UniformConstant 103
+   105(g_tTex07):    104(ptr) Variable UniformConstant
+             106:             TypeImage 14(int) 2D array nonsampled format:R16
+             107:             TypePointer UniformConstant 106
+   108(g_tTex08):    107(ptr) Variable UniformConstant
+             109:             TypeImage 14(int) 2D array nonsampled format:R8
+             110:             TypePointer UniformConstant 109
+   111(g_tTex09):    110(ptr) Variable UniformConstant
+             112:             TypeImage 14(int) 2D array nonsampled format:Rgba16Snorm
+             113:             TypePointer UniformConstant 112
+   114(g_tTex10):    113(ptr) Variable UniformConstant
+             115:             TypeImage 14(int) 2D array nonsampled format:Rg16Snorm
+             116:             TypePointer UniformConstant 115
+   117(g_tTex11):    116(ptr) Variable UniformConstant
+             118:             TypeImage 14(int) 2D array nonsampled format:R16Snorm
+             119:             TypePointer UniformConstant 118
+   120(g_tTex12):    119(ptr) Variable UniformConstant
+             121:             TypeImage 14(int) 2D array nonsampled format:R8Snorm
+             122:             TypePointer UniformConstant 121
+   123(g_tTex13):    122(ptr) Variable UniformConstant
+             124:             TypeImage 14(int) 2D array nonsampled format:Rgba32i
+             125:             TypePointer UniformConstant 124
+   126(g_tTex14):    125(ptr) Variable UniformConstant
+             127:             TypeImage 14(int) 2D array nonsampled format:R32i
+             128:             TypePointer UniformConstant 127
+   129(g_tTex15):    128(ptr) Variable UniformConstant
+             130:             TypeImage 14(int) 2D array nonsampled format:R32ui
+             131:             TypePointer UniformConstant 130
+   132(g_tTex16):    131(ptr) Variable UniformConstant
+             133:             TypeImage 14(int) 2D array nonsampled format:Rg16i
+             134:             TypePointer UniformConstant 133
+   135(g_tTex17):    134(ptr) Variable UniformConstant
+             136:             TypeImage 14(int) 2D array nonsampled format:R16i
+             137:             TypePointer UniformConstant 136
+   138(g_tTex18):    137(ptr) Variable UniformConstant
+             139:             TypeImage 14(int) 2D array nonsampled format:Rg32i
+             140:             TypePointer UniformConstant 139
+   141(g_tTex19):    140(ptr) Variable UniformConstant
+             142:             TypeImage 14(int) 2D array nonsampled format:Rg8i
+             143:             TypePointer UniformConstant 142
+   144(g_tTex20):    143(ptr) Variable UniformConstant
+             145:             TypeImage 14(int) 2D array nonsampled format:Rg8ui
+             146:             TypePointer UniformConstant 145
+   147(g_tTex21):    146(ptr) Variable UniformConstant
+             148:             TypeImage 14(int) 2D array nonsampled format:Rgba32ui
+             149:             TypePointer UniformConstant 148
+   150(g_tTex22):    149(ptr) Variable UniformConstant
+             151:             TypeImage 14(int) 2D array nonsampled format:Rgba16ui
+             152:             TypePointer UniformConstant 151
+   153(g_tTex23):    152(ptr) Variable UniformConstant
+             154:             TypeImage 14(int) 2D array nonsampled format:Rg32ui
+             155:             TypePointer UniformConstant 154
+   156(g_tTex24):    155(ptr) Variable UniformConstant
+             157:             TypeImage 14(int) 2D array nonsampled format:Rg16ui
+             158:             TypePointer UniformConstant 157
+   159(g_tTex25):    158(ptr) Variable UniformConstant
+         4(main):           2 Function None 3
+               5:             Label
+ 26(flattenTemp):     12(ptr) Variable Function
+              27:8(PS_OUTPUT) FunctionCall 10(@main()
+                              Store 26(flattenTemp) 27
+              30:     18(ptr) AccessChain 26(flattenTemp) 15
+              31:    7(fvec4) Load 30
+                              Store 29(@entryPointOutput.Color) 31
+              34:     21(ptr) AccessChain 26(flattenTemp) 20
+              35:    6(float) Load 34
+                              Store 33(@entryPointOutput.Depth) 35
+                              Return
+                              FunctionEnd
+      10(@main():8(PS_OUTPUT) Function None 9
+              11:             Label
+       13(psout):     12(ptr) Variable Function
+              19:     18(ptr) AccessChain 13(psout) 15
+                              Store 19 17
+              22:     21(ptr) AccessChain 13(psout) 20
+                              Store 22 16
+              23:8(PS_OUTPUT) Load 13(psout)
+                              ReturnValue 23
+                              FunctionEnd
diff --git a/Test/baseResults/nonuniform.frag.out b/Test/baseResults/nonuniform.frag.out
index 0df8cfc..9054c2c 100644
--- a/Test/baseResults/nonuniform.frag.out
+++ b/Test/baseResults/nonuniform.frag.out
@@ -40,6 +40,13 @@
 0:27                2 (const int)
 0:28      'nu_li' ( nonuniform temp int)
 0:29      'nu_li' ( nonuniform temp int)
+0:30      move second child to first child ( temp int)
+0:30        'nu_li' ( nonuniform temp int)
+0:30        indirect index ( nonuniform temp int)
+0:30          'table' ( temp 5-element array of int)
+0:30          copy object ( nonuniform temp int)
+0:30            Constant:
+0:30              3 (const int)
 0:?   Linker Objects
 0:?     'nonuniformEXT' ( global int)
 0:?     'nu_inv4' ( smooth nonuniform in 4-component vector of float)
@@ -83,6 +90,13 @@
 0:27                2 (const int)
 0:28      'nu_li' ( nonuniform temp int)
 0:29      'nu_li' ( nonuniform temp int)
+0:30      move second child to first child ( temp int)
+0:30        'nu_li' ( nonuniform temp int)
+0:30        indirect index ( nonuniform temp int)
+0:30          'table' ( temp 5-element array of int)
+0:30          copy object ( nonuniform temp int)
+0:30            Constant:
+0:30              3 (const int)
 0:?   Linker Objects
 0:?     'nonuniformEXT' ( global int)
 0:?     'nu_inv4' ( smooth nonuniform in 4-component vector of float)
diff --git a/Test/baseResults/spv.bufferhandleUvec2.frag.out b/Test/baseResults/spv.bufferhandleUvec2.frag.out
new file mode 100755
index 0000000..1e0cbcd
--- /dev/null
+++ b/Test/baseResults/spv.bufferhandleUvec2.frag.out
@@ -0,0 +1,133 @@
+spv.bufferhandleUvec2.frag
+// Module Version 10000
+// Generated by (magic number): 80007
+// Id's are bound by 71
+
+                              Capability Shader
+                              Capability PhysicalStorageBufferAddressesEXT
+                              Extension  "SPV_KHR_physical_storage_buffer"
+                              Extension  "SPV_KHR_storage_buffer_storage_class"
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel PhysicalStorageBuffer64EXT GLSL450
+                              EntryPoint Fragment 4  "main" 16 19
+                              ExecutionMode 4 OriginUpperLeft
+                              Source GLSL 450
+                              SourceExtension  "GL_EXT_buffer_reference"
+                              SourceExtension  "GL_EXT_buffer_reference_uvec2"
+                              Name 4  "main"
+                              Name 8  "blockType"
+                              MemberName 8(blockType) 0  "a"
+                              MemberName 8(blockType) 1  "b"
+                              MemberName 8(blockType) 2  "c"
+                              MemberName 8(blockType) 3  "d"
+                              MemberName 8(blockType) 4  "e"
+                              Name 13  "b1"
+                              Name 16  "h"
+                              Name 19  "i"
+                              Name 34  "b2"
+                              Name 37  "b3"
+                              Name 46  "j"
+                              Name 54  "carry"
+                              Name 55  "ResType"
+                              Name 68  "t2"
+                              MemberName 68(t2) 0  "f"
+                              MemberName 68(t2) 1  "g"
+                              Name 70  "t"
+                              MemberDecorate 8(blockType) 0 Offset 0
+                              MemberDecorate 8(blockType) 1 Offset 4
+                              MemberDecorate 8(blockType) 2 Offset 8
+                              MemberDecorate 8(blockType) 3 Offset 12
+                              MemberDecorate 8(blockType) 4 Offset 16
+                              Decorate 8(blockType) Block
+                              Decorate 13(b1) DecorationAliasedPointerEXT
+                              Decorate 16(h) Flat
+                              Decorate 19(i) Flat
+                              Decorate 34(b2) DecorationAliasedPointerEXT
+                              Decorate 37(b3) DecorationAliasedPointerEXT
+                              MemberDecorate 68(t2) 0 Offset 0
+                              MemberDecorate 68(t2) 1 Offset 8
+                              Decorate 68(t2) Block
+                              Decorate 70(t) DescriptorSet 0
+                              Decorate 70(t) Binding 0
+               2:             TypeVoid
+               3:             TypeFunction 2
+                              TypeForwardPointer 6 PhysicalStorageBufferEXT
+               7:             TypeInt 32 1
+    8(blockType):             TypeStruct 7(int) 7(int) 7(int) 7(int) 7(int)
+               6:             TypePointer PhysicalStorageBufferEXT 8(blockType)
+               9:             TypeInt 32 0
+              10:      9(int) Constant 2
+              11:             TypeArray 6(ptr) 10
+              12:             TypePointer Function 11
+              14:             TypeVector 9(int) 2
+              15:             TypePointer Input 14(ivec2)
+           16(h):     15(ptr) Variable Input
+           19(i):     15(ptr) Variable Input
+              23:      7(int) Constant 0
+              24:             TypePointer Function 6(ptr)
+              27:      7(int) Constant 1
+              30:             TypePointer PhysicalStorageBufferEXT 7(int)
+              45:             TypePointer Function 14(ivec2)
+              49:      9(int) Constant 0
+              50:             TypePointer Function 9(int)
+              53:      9(int) Constant 256
+     55(ResType):             TypeStruct 9(int) 9(int)
+              61:      9(int) Constant 1
+          68(t2):             TypeStruct 6(ptr) 6(ptr)
+              69:             TypePointer StorageBuffer 68(t2)
+           70(t):     69(ptr) Variable StorageBuffer
+         4(main):           2 Function None 3
+               5:             Label
+          13(b1):     12(ptr) Variable Function
+          34(b2):     24(ptr) Variable Function
+          37(b3):     24(ptr) Variable Function
+           46(j):     45(ptr) Variable Function
+       54(carry):     50(ptr) Variable Function
+              17:   14(ivec2) Load 16(h)
+              18:      6(ptr) Bitcast 17
+              20:   14(ivec2) Load 19(i)
+              21:      6(ptr) Bitcast 20
+              22:          11 CompositeConstruct 18 21
+                              Store 13(b1) 22
+              25:     24(ptr) AccessChain 13(b1) 23
+              26:      6(ptr) Load 25
+              28:     24(ptr) AccessChain 13(b1) 27
+              29:      6(ptr) Load 28
+              31:     30(ptr) AccessChain 29 27
+              32:      7(int) Load 31 Aligned 4
+              33:     30(ptr) AccessChain 26 23
+                              Store 33 32 Aligned 16
+              35:   14(ivec2) Load 16(h)
+              36:      6(ptr) Bitcast 35
+                              Store 34(b2) 36
+              38:   14(ivec2) Load 19(i)
+              39:      6(ptr) Bitcast 38
+                              Store 37(b3) 39
+              40:      6(ptr) Load 34(b2)
+              41:      6(ptr) Load 37(b3)
+              42:     30(ptr) AccessChain 41 27
+              43:      7(int) Load 42 Aligned 4
+              44:     30(ptr) AccessChain 40 23
+                              Store 44 43 Aligned 16
+              47:      6(ptr) Load 34(b2)
+              48:   14(ivec2) Bitcast 47
+                              Store 46(j) 48
+              51:     50(ptr) AccessChain 46(j) 49
+              52:      9(int) Load 51
+              56: 55(ResType) IAddCarry 52 53
+              57:      9(int) CompositeExtract 56 1
+                              Store 54(carry) 57
+              58:      9(int) CompositeExtract 56 0
+              59:     50(ptr) AccessChain 46(j) 49
+                              Store 59 58
+              60:      9(int) Load 54(carry)
+              62:     50(ptr) AccessChain 46(j) 61
+              63:      9(int) Load 62
+              64:      9(int) IAdd 63 60
+              65:     50(ptr) AccessChain 46(j) 61
+                              Store 65 64
+              66:   14(ivec2) Load 46(j)
+              67:      6(ptr) Bitcast 66
+                              Store 34(b2) 67
+                              Return
+                              FunctionEnd
diff --git a/Test/baseResults/spv.specConstant.vert.out b/Test/baseResults/spv.specConstant.vert.out
index 931ba8d..49bcca6 100644
--- a/Test/baseResults/spv.specConstant.vert.out
+++ b/Test/baseResults/spv.specConstant.vert.out
@@ -11,7 +11,7 @@
                               Source GLSL 400
                               Name 4  "main"
                               Name 9  "arraySize"
-                              Name 14  "foo(vf4[s2769];"
+                              Name 14  "foo(vf4[s4393];"
                               Name 13  "p"
                               Name 17  "builtin_spec_constant("
                               Name 20  "color"
@@ -102,10 +102,10 @@
                               Store 20(color) 46
               48:          10 Load 22(ucol)
                               Store 47(param) 48
-              49:           2 FunctionCall 14(foo(vf4[s2769];) 47(param)
+              49:           2 FunctionCall 14(foo(vf4[s4393];) 47(param)
                               Return
                               FunctionEnd
-14(foo(vf4[s2769];):           2 Function None 12
+14(foo(vf4[s4393];):           2 Function None 12
            13(p):     11(ptr) FunctionParameter
               15:             Label
               54:     24(ptr) AccessChain 53(dupUcol) 23
diff --git a/Test/baseResults/spv.subgroupExtendedTypesArithmetic.comp.out b/Test/baseResults/spv.subgroupExtendedTypesArithmetic.comp.out
new file mode 100644
index 0000000..453d6fc
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesArithmetic.comp.out
@@ -0,0 +1,4280 @@
+spv.subgroupExtendedTypesArithmetic.comp
+// Module Version 10300
+// Generated by (magic number): 80007
+// Id's are bound by 3665
+
+                              Capability Shader
+                              Capability Float16
+                              Capability Int64
+                              Capability Int16
+                              Capability Int8
+                              Capability GroupNonUniform
+                              Capability GroupNonUniformArithmetic
+                              Capability StorageUniformBufferBlock16
+                              Capability StorageBuffer8BitAccess
+                              Extension  "SPV_KHR_8bit_storage"
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint GLCompute 4  "main" 10 12
+                              ExecutionMode 4 LocalSize 8 1 1
+                              Source GLSL 450
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_float16"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int16"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int64"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int8"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_float16"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int16"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int64"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int8"
+                              SourceExtension  "GL_KHR_shader_subgroup_arithmetic"
+                              SourceExtension  "GL_KHR_shader_subgroup_basic"
+                              Name 4  "main"
+                              Name 8  "invocation"
+                              Name 10  "gl_SubgroupInvocationID"
+                              Name 12  "gl_SubgroupSize"
+                              Name 31  "Buffers"
+                              MemberName 31(Buffers) 0  "i8"
+                              MemberName 31(Buffers) 1  "u8"
+                              MemberName 31(Buffers) 2  "i16"
+                              MemberName 31(Buffers) 3  "u16"
+                              MemberName 31(Buffers) 4  "i64"
+                              MemberName 31(Buffers) 5  "u64"
+                              MemberName 31(Buffers) 6  "f16"
+                              Name 34  "data"
+                              Decorate 10(gl_SubgroupInvocationID) RelaxedPrecision
+                              Decorate 10(gl_SubgroupInvocationID) BuiltIn SubgroupLocalInvocationId
+                              Decorate 11 RelaxedPrecision
+                              Decorate 12(gl_SubgroupSize) RelaxedPrecision
+                              Decorate 12(gl_SubgroupSize) BuiltIn SubgroupSize
+                              Decorate 13 RelaxedPrecision
+                              Decorate 14 RelaxedPrecision
+                              Decorate 16 RelaxedPrecision
+                              MemberDecorate 31(Buffers) 0 Offset 0
+                              MemberDecorate 31(Buffers) 1 Offset 4
+                              MemberDecorate 31(Buffers) 2 Offset 8
+                              MemberDecorate 31(Buffers) 3 Offset 16
+                              MemberDecorate 31(Buffers) 4 Offset 32
+                              MemberDecorate 31(Buffers) 5 Offset 64
+                              MemberDecorate 31(Buffers) 6 Offset 96
+                              Decorate 31(Buffers) Block
+                              Decorate 34(data) DescriptorSet 0
+                              Decorate 34(data) Binding 0
+                              Decorate 3664 BuiltIn WorkgroupSize
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeInt 32 0
+               7:             TypePointer Function 6(int)
+               9:             TypePointer Input 6(int)
+10(gl_SubgroupInvocationID):      9(ptr) Variable Input
+12(gl_SubgroupSize):      9(ptr) Variable Input
+              15:      6(int) Constant 4
+              17:             TypeInt 8 1
+              18:             TypeVector 17(int8_t) 4
+              19:             TypeInt 8 0
+              20:             TypeVector 19(int8_t) 4
+              21:             TypeInt 16 1
+              22:             TypeVector 21(int16_t) 4
+              23:             TypeInt 16 0
+              24:             TypeVector 23(int16_t) 4
+              25:             TypeInt 64 1
+              26:             TypeVector 25(int64_t) 4
+              27:             TypeInt 64 0
+              28:             TypeVector 27(int64_t) 4
+              29:             TypeFloat 16
+              30:             TypeVector 29(float16_t) 4
+     31(Buffers):             TypeStruct 18(i8vec4) 20(i8vec4) 22(i16vec4) 24(i16vec4) 26(i64vec4) 28(i64vec4) 30(f16vec4)
+              32:             TypeArray 31(Buffers) 15
+              33:             TypePointer StorageBuffer 32
+        34(data):     33(ptr) Variable StorageBuffer
+              36:             TypeInt 32 1
+              37:     36(int) Constant 0
+              38:      6(int) Constant 0
+              39:             TypePointer StorageBuffer 17(int8_t)
+              42:      6(int) Constant 3
+              46:     36(int) Constant 1
+              47:             TypeVector 17(int8_t) 2
+              48:             TypePointer StorageBuffer 18(i8vec4)
+              57:     36(int) Constant 2
+              58:             TypeVector 17(int8_t) 3
+              67:     36(int) Constant 3
+             593:             TypePointer StorageBuffer 19(int8_t)
+             599:             TypeVector 19(int8_t) 2
+             600:             TypePointer StorageBuffer 20(i8vec4)
+             609:             TypeVector 19(int8_t) 3
+            1143:             TypePointer StorageBuffer 21(int16_t)
+            1149:             TypeVector 21(int16_t) 2
+            1150:             TypePointer StorageBuffer 22(i16vec4)
+            1159:             TypeVector 21(int16_t) 3
+            1693:             TypePointer StorageBuffer 23(int16_t)
+            1699:             TypeVector 23(int16_t) 2
+            1700:             TypePointer StorageBuffer 24(i16vec4)
+            1709:             TypeVector 23(int16_t) 3
+            2243:     36(int) Constant 4
+            2244:             TypePointer StorageBuffer 25(int64_t)
+            2250:             TypeVector 25(int64_t) 2
+            2251:             TypePointer StorageBuffer 26(i64vec4)
+            2260:             TypeVector 25(int64_t) 3
+            2794:     36(int) Constant 5
+            2795:             TypePointer StorageBuffer 27(int64_t)
+            2801:             TypeVector 27(int64_t) 2
+            2802:             TypePointer StorageBuffer 28(i64vec4)
+            2811:             TypeVector 27(int64_t) 3
+            3345:     36(int) Constant 6
+            3346:             TypePointer StorageBuffer 29(float16_t)
+            3352:             TypeVector 29(float16_t) 2
+            3353:             TypePointer StorageBuffer 30(f16vec4)
+            3362:             TypeVector 29(float16_t) 3
+            3661:             TypeVector 6(int) 3
+            3662:      6(int) Constant 8
+            3663:      6(int) Constant 1
+            3664: 3661(ivec3) ConstantComposite 3662 3663 3663
+         4(main):           2 Function None 3
+               5:             Label
+   8(invocation):      7(ptr) Variable Function
+              11:      6(int) Load 10(gl_SubgroupInvocationID)
+              13:      6(int) Load 12(gl_SubgroupSize)
+              14:      6(int) IAdd 11 13
+              16:      6(int) UMod 14 15
+                              Store 8(invocation) 16
+              35:      6(int) Load 8(invocation)
+              40:     39(ptr) AccessChain 34(data) 37 37 38
+              41:  17(int8_t) Load 40
+              43:  17(int8_t) GroupNonUniformIAdd 42 Reduce 41
+              44:     39(ptr) AccessChain 34(data) 35 37 38
+                              Store 44 43
+              45:      6(int) Load 8(invocation)
+              49:     48(ptr) AccessChain 34(data) 46 37
+              50:  18(i8vec4) Load 49
+              51:  47(i8vec2) VectorShuffle 50 50 0 1
+              52:  47(i8vec2) GroupNonUniformIAdd 42 Reduce 51
+              53:     48(ptr) AccessChain 34(data) 45 37
+              54:  18(i8vec4) Load 53
+              55:  18(i8vec4) VectorShuffle 54 52 4 5 2 3
+                              Store 53 55
+              56:      6(int) Load 8(invocation)
+              59:     48(ptr) AccessChain 34(data) 57 37
+              60:  18(i8vec4) Load 59
+              61:  58(i8vec3) VectorShuffle 60 60 0 1 2
+              62:  58(i8vec3) GroupNonUniformIAdd 42 Reduce 61
+              63:     48(ptr) AccessChain 34(data) 56 37
+              64:  18(i8vec4) Load 63
+              65:  18(i8vec4) VectorShuffle 64 62 4 5 6 3
+                              Store 63 65
+              66:      6(int) Load 8(invocation)
+              68:     48(ptr) AccessChain 34(data) 67 37
+              69:  18(i8vec4) Load 68
+              70:  18(i8vec4) GroupNonUniformIAdd 42 Reduce 69
+              71:     48(ptr) AccessChain 34(data) 66 37
+                              Store 71 70
+              72:      6(int) Load 8(invocation)
+              73:     39(ptr) AccessChain 34(data) 37 37 38
+              74:  17(int8_t) Load 73
+              75:  17(int8_t) GroupNonUniformIMul 42 Reduce 74
+              76:     39(ptr) AccessChain 34(data) 72 37 38
+                              Store 76 75
+              77:      6(int) Load 8(invocation)
+              78:     48(ptr) AccessChain 34(data) 46 37
+              79:  18(i8vec4) Load 78
+              80:  47(i8vec2) VectorShuffle 79 79 0 1
+              81:  47(i8vec2) GroupNonUniformIMul 42 Reduce 80
+              82:     48(ptr) AccessChain 34(data) 77 37
+              83:  18(i8vec4) Load 82
+              84:  18(i8vec4) VectorShuffle 83 81 4 5 2 3
+                              Store 82 84
+              85:      6(int) Load 8(invocation)
+              86:     48(ptr) AccessChain 34(data) 57 37
+              87:  18(i8vec4) Load 86
+              88:  58(i8vec3) VectorShuffle 87 87 0 1 2
+              89:  58(i8vec3) GroupNonUniformIMul 42 Reduce 88
+              90:     48(ptr) AccessChain 34(data) 85 37
+              91:  18(i8vec4) Load 90
+              92:  18(i8vec4) VectorShuffle 91 89 4 5 6 3
+                              Store 90 92
+              93:      6(int) Load 8(invocation)
+              94:     48(ptr) AccessChain 34(data) 67 37
+              95:  18(i8vec4) Load 94
+              96:  18(i8vec4) GroupNonUniformIMul 42 Reduce 95
+              97:     48(ptr) AccessChain 34(data) 93 37
+                              Store 97 96
+              98:      6(int) Load 8(invocation)
+              99:     39(ptr) AccessChain 34(data) 37 37 38
+             100:  17(int8_t) Load 99
+             101:  17(int8_t) GroupNonUniformSMin 42 Reduce 100
+             102:     39(ptr) AccessChain 34(data) 98 37 38
+                              Store 102 101
+             103:      6(int) Load 8(invocation)
+             104:     48(ptr) AccessChain 34(data) 46 37
+             105:  18(i8vec4) Load 104
+             106:  47(i8vec2) VectorShuffle 105 105 0 1
+             107:  47(i8vec2) GroupNonUniformSMin 42 Reduce 106
+             108:     48(ptr) AccessChain 34(data) 103 37
+             109:  18(i8vec4) Load 108
+             110:  18(i8vec4) VectorShuffle 109 107 4 5 2 3
+                              Store 108 110
+             111:      6(int) Load 8(invocation)
+             112:     48(ptr) AccessChain 34(data) 57 37
+             113:  18(i8vec4) Load 112
+             114:  58(i8vec3) VectorShuffle 113 113 0 1 2
+             115:  58(i8vec3) GroupNonUniformSMin 42 Reduce 114
+             116:     48(ptr) AccessChain 34(data) 111 37
+             117:  18(i8vec4) Load 116
+             118:  18(i8vec4) VectorShuffle 117 115 4 5 6 3
+                              Store 116 118
+             119:      6(int) Load 8(invocation)
+             120:     48(ptr) AccessChain 34(data) 67 37
+             121:  18(i8vec4) Load 120
+             122:  18(i8vec4) GroupNonUniformSMin 42 Reduce 121
+             123:     48(ptr) AccessChain 34(data) 119 37
+                              Store 123 122
+             124:      6(int) Load 8(invocation)
+             125:     39(ptr) AccessChain 34(data) 37 37 38
+             126:  17(int8_t) Load 125
+             127:  17(int8_t) GroupNonUniformSMax 42 Reduce 126
+             128:     39(ptr) AccessChain 34(data) 124 37 38
+                              Store 128 127
+             129:      6(int) Load 8(invocation)
+             130:     48(ptr) AccessChain 34(data) 46 37
+             131:  18(i8vec4) Load 130
+             132:  47(i8vec2) VectorShuffle 131 131 0 1
+             133:  47(i8vec2) GroupNonUniformSMax 42 Reduce 132
+             134:     48(ptr) AccessChain 34(data) 129 37
+             135:  18(i8vec4) Load 134
+             136:  18(i8vec4) VectorShuffle 135 133 4 5 2 3
+                              Store 134 136
+             137:      6(int) Load 8(invocation)
+             138:     48(ptr) AccessChain 34(data) 57 37
+             139:  18(i8vec4) Load 138
+             140:  58(i8vec3) VectorShuffle 139 139 0 1 2
+             141:  58(i8vec3) GroupNonUniformSMax 42 Reduce 140
+             142:     48(ptr) AccessChain 34(data) 137 37
+             143:  18(i8vec4) Load 142
+             144:  18(i8vec4) VectorShuffle 143 141 4 5 6 3
+                              Store 142 144
+             145:      6(int) Load 8(invocation)
+             146:     48(ptr) AccessChain 34(data) 67 37
+             147:  18(i8vec4) Load 146
+             148:  18(i8vec4) GroupNonUniformSMax 42 Reduce 147
+             149:     48(ptr) AccessChain 34(data) 145 37
+                              Store 149 148
+             150:      6(int) Load 8(invocation)
+             151:     39(ptr) AccessChain 34(data) 37 37 38
+             152:  17(int8_t) Load 151
+             153:  17(int8_t) GroupNonUniformBitwiseAnd 42 Reduce 152
+             154:     39(ptr) AccessChain 34(data) 150 37 38
+                              Store 154 153
+             155:      6(int) Load 8(invocation)
+             156:     48(ptr) AccessChain 34(data) 46 37
+             157:  18(i8vec4) Load 156
+             158:  47(i8vec2) VectorShuffle 157 157 0 1
+             159:  47(i8vec2) GroupNonUniformBitwiseAnd 42 Reduce 158
+             160:     48(ptr) AccessChain 34(data) 155 37
+             161:  18(i8vec4) Load 160
+             162:  18(i8vec4) VectorShuffle 161 159 4 5 2 3
+                              Store 160 162
+             163:      6(int) Load 8(invocation)
+             164:     48(ptr) AccessChain 34(data) 57 37
+             165:  18(i8vec4) Load 164
+             166:  58(i8vec3) VectorShuffle 165 165 0 1 2
+             167:  58(i8vec3) GroupNonUniformBitwiseAnd 42 Reduce 166
+             168:     48(ptr) AccessChain 34(data) 163 37
+             169:  18(i8vec4) Load 168
+             170:  18(i8vec4) VectorShuffle 169 167 4 5 6 3
+                              Store 168 170
+             171:      6(int) Load 8(invocation)
+             172:     48(ptr) AccessChain 34(data) 67 37
+             173:  18(i8vec4) Load 172
+             174:  18(i8vec4) GroupNonUniformBitwiseAnd 42 Reduce 173
+             175:     48(ptr) AccessChain 34(data) 171 37
+                              Store 175 174
+             176:      6(int) Load 8(invocation)
+             177:     39(ptr) AccessChain 34(data) 37 37 38
+             178:  17(int8_t) Load 177
+             179:  17(int8_t) GroupNonUniformBitwiseOr 42 Reduce 178
+             180:     39(ptr) AccessChain 34(data) 176 37 38
+                              Store 180 179
+             181:      6(int) Load 8(invocation)
+             182:     48(ptr) AccessChain 34(data) 46 37
+             183:  18(i8vec4) Load 182
+             184:  47(i8vec2) VectorShuffle 183 183 0 1
+             185:  47(i8vec2) GroupNonUniformBitwiseOr 42 Reduce 184
+             186:     48(ptr) AccessChain 34(data) 181 37
+             187:  18(i8vec4) Load 186
+             188:  18(i8vec4) VectorShuffle 187 185 4 5 2 3
+                              Store 186 188
+             189:      6(int) Load 8(invocation)
+             190:     48(ptr) AccessChain 34(data) 57 37
+             191:  18(i8vec4) Load 190
+             192:  58(i8vec3) VectorShuffle 191 191 0 1 2
+             193:  58(i8vec3) GroupNonUniformBitwiseOr 42 Reduce 192
+             194:     48(ptr) AccessChain 34(data) 189 37
+             195:  18(i8vec4) Load 194
+             196:  18(i8vec4) VectorShuffle 195 193 4 5 6 3
+                              Store 194 196
+             197:      6(int) Load 8(invocation)
+             198:     48(ptr) AccessChain 34(data) 67 37
+             199:  18(i8vec4) Load 198
+             200:  18(i8vec4) GroupNonUniformBitwiseOr 42 Reduce 199
+             201:     48(ptr) AccessChain 34(data) 197 37
+                              Store 201 200
+             202:      6(int) Load 8(invocation)
+             203:     39(ptr) AccessChain 34(data) 37 37 38
+             204:  17(int8_t) Load 203
+             205:  17(int8_t) GroupNonUniformBitwiseXor 42 Reduce 204
+             206:     39(ptr) AccessChain 34(data) 202 37 38
+                              Store 206 205
+             207:      6(int) Load 8(invocation)
+             208:     48(ptr) AccessChain 34(data) 46 37
+             209:  18(i8vec4) Load 208
+             210:  47(i8vec2) VectorShuffle 209 209 0 1
+             211:  47(i8vec2) GroupNonUniformBitwiseXor 42 Reduce 210
+             212:     48(ptr) AccessChain 34(data) 207 37
+             213:  18(i8vec4) Load 212
+             214:  18(i8vec4) VectorShuffle 213 211 4 5 2 3
+                              Store 212 214
+             215:      6(int) Load 8(invocation)
+             216:     48(ptr) AccessChain 34(data) 57 37
+             217:  18(i8vec4) Load 216
+             218:  58(i8vec3) VectorShuffle 217 217 0 1 2
+             219:  58(i8vec3) GroupNonUniformBitwiseXor 42 Reduce 218
+             220:     48(ptr) AccessChain 34(data) 215 37
+             221:  18(i8vec4) Load 220
+             222:  18(i8vec4) VectorShuffle 221 219 4 5 6 3
+                              Store 220 222
+             223:      6(int) Load 8(invocation)
+             224:     48(ptr) AccessChain 34(data) 67 37
+             225:  18(i8vec4) Load 224
+             226:  18(i8vec4) GroupNonUniformBitwiseXor 42 Reduce 225
+             227:     48(ptr) AccessChain 34(data) 223 37
+                              Store 227 226
+             228:      6(int) Load 8(invocation)
+             229:     39(ptr) AccessChain 34(data) 37 37 38
+             230:  17(int8_t) Load 229
+             231:  17(int8_t) GroupNonUniformIAdd 42 InclusiveScan 230
+             232:     39(ptr) AccessChain 34(data) 228 37 38
+                              Store 232 231
+             233:      6(int) Load 8(invocation)
+             234:     48(ptr) AccessChain 34(data) 46 37
+             235:  18(i8vec4) Load 234
+             236:  47(i8vec2) VectorShuffle 235 235 0 1
+             237:  47(i8vec2) GroupNonUniformIAdd 42 InclusiveScan 236
+             238:     48(ptr) AccessChain 34(data) 233 37
+             239:  18(i8vec4) Load 238
+             240:  18(i8vec4) VectorShuffle 239 237 4 5 2 3
+                              Store 238 240
+             241:      6(int) Load 8(invocation)
+             242:     48(ptr) AccessChain 34(data) 57 37
+             243:  18(i8vec4) Load 242
+             244:  58(i8vec3) VectorShuffle 243 243 0 1 2
+             245:  58(i8vec3) GroupNonUniformIAdd 42 InclusiveScan 244
+             246:     48(ptr) AccessChain 34(data) 241 37
+             247:  18(i8vec4) Load 246
+             248:  18(i8vec4) VectorShuffle 247 245 4 5 6 3
+                              Store 246 248
+             249:      6(int) Load 8(invocation)
+             250:     48(ptr) AccessChain 34(data) 67 37
+             251:  18(i8vec4) Load 250
+             252:  18(i8vec4) GroupNonUniformIAdd 42 InclusiveScan 251
+             253:     48(ptr) AccessChain 34(data) 249 37
+                              Store 253 252
+             254:      6(int) Load 8(invocation)
+             255:     39(ptr) AccessChain 34(data) 37 37 38
+             256:  17(int8_t) Load 255
+             257:  17(int8_t) GroupNonUniformIMul 42 InclusiveScan 256
+             258:     39(ptr) AccessChain 34(data) 254 37 38
+                              Store 258 257
+             259:      6(int) Load 8(invocation)
+             260:     48(ptr) AccessChain 34(data) 46 37
+             261:  18(i8vec4) Load 260
+             262:  47(i8vec2) VectorShuffle 261 261 0 1
+             263:  47(i8vec2) GroupNonUniformIMul 42 InclusiveScan 262
+             264:     48(ptr) AccessChain 34(data) 259 37
+             265:  18(i8vec4) Load 264
+             266:  18(i8vec4) VectorShuffle 265 263 4 5 2 3
+                              Store 264 266
+             267:      6(int) Load 8(invocation)
+             268:     48(ptr) AccessChain 34(data) 57 37
+             269:  18(i8vec4) Load 268
+             270:  58(i8vec3) VectorShuffle 269 269 0 1 2
+             271:  58(i8vec3) GroupNonUniformIMul 42 InclusiveScan 270
+             272:     48(ptr) AccessChain 34(data) 267 37
+             273:  18(i8vec4) Load 272
+             274:  18(i8vec4) VectorShuffle 273 271 4 5 6 3
+                              Store 272 274
+             275:      6(int) Load 8(invocation)
+             276:     48(ptr) AccessChain 34(data) 67 37
+             277:  18(i8vec4) Load 276
+             278:  18(i8vec4) GroupNonUniformIMul 42 InclusiveScan 277
+             279:     48(ptr) AccessChain 34(data) 275 37
+                              Store 279 278
+             280:      6(int) Load 8(invocation)
+             281:     39(ptr) AccessChain 34(data) 37 37 38
+             282:  17(int8_t) Load 281
+             283:  17(int8_t) GroupNonUniformSMin 42 InclusiveScan 282
+             284:     39(ptr) AccessChain 34(data) 280 37 38
+                              Store 284 283
+             285:      6(int) Load 8(invocation)
+             286:     48(ptr) AccessChain 34(data) 46 37
+             287:  18(i8vec4) Load 286
+             288:  47(i8vec2) VectorShuffle 287 287 0 1
+             289:  47(i8vec2) GroupNonUniformSMin 42 InclusiveScan 288
+             290:     48(ptr) AccessChain 34(data) 285 37
+             291:  18(i8vec4) Load 290
+             292:  18(i8vec4) VectorShuffle 291 289 4 5 2 3
+                              Store 290 292
+             293:      6(int) Load 8(invocation)
+             294:     48(ptr) AccessChain 34(data) 57 37
+             295:  18(i8vec4) Load 294
+             296:  58(i8vec3) VectorShuffle 295 295 0 1 2
+             297:  58(i8vec3) GroupNonUniformSMin 42 InclusiveScan 296
+             298:     48(ptr) AccessChain 34(data) 293 37
+             299:  18(i8vec4) Load 298
+             300:  18(i8vec4) VectorShuffle 299 297 4 5 6 3
+                              Store 298 300
+             301:      6(int) Load 8(invocation)
+             302:     48(ptr) AccessChain 34(data) 67 37
+             303:  18(i8vec4) Load 302
+             304:  18(i8vec4) GroupNonUniformSMin 42 InclusiveScan 303
+             305:     48(ptr) AccessChain 34(data) 301 37
+                              Store 305 304
+             306:      6(int) Load 8(invocation)
+             307:     39(ptr) AccessChain 34(data) 37 37 38
+             308:  17(int8_t) Load 307
+             309:  17(int8_t) GroupNonUniformSMax 42 InclusiveScan 308
+             310:     39(ptr) AccessChain 34(data) 306 37 38
+                              Store 310 309
+             311:      6(int) Load 8(invocation)
+             312:     48(ptr) AccessChain 34(data) 46 37
+             313:  18(i8vec4) Load 312
+             314:  47(i8vec2) VectorShuffle 313 313 0 1
+             315:  47(i8vec2) GroupNonUniformSMax 42 InclusiveScan 314
+             316:     48(ptr) AccessChain 34(data) 311 37
+             317:  18(i8vec4) Load 316
+             318:  18(i8vec4) VectorShuffle 317 315 4 5 2 3
+                              Store 316 318
+             319:      6(int) Load 8(invocation)
+             320:     48(ptr) AccessChain 34(data) 57 37
+             321:  18(i8vec4) Load 320
+             322:  58(i8vec3) VectorShuffle 321 321 0 1 2
+             323:  58(i8vec3) GroupNonUniformSMax 42 InclusiveScan 322
+             324:     48(ptr) AccessChain 34(data) 319 37
+             325:  18(i8vec4) Load 324
+             326:  18(i8vec4) VectorShuffle 325 323 4 5 6 3
+                              Store 324 326
+             327:      6(int) Load 8(invocation)
+             328:     48(ptr) AccessChain 34(data) 67 37
+             329:  18(i8vec4) Load 328
+             330:  18(i8vec4) GroupNonUniformSMax 42 InclusiveScan 329
+             331:     48(ptr) AccessChain 34(data) 327 37
+                              Store 331 330
+             332:      6(int) Load 8(invocation)
+             333:     39(ptr) AccessChain 34(data) 37 37 38
+             334:  17(int8_t) Load 333
+             335:  17(int8_t) GroupNonUniformBitwiseAnd 42 InclusiveScan 334
+             336:     39(ptr) AccessChain 34(data) 332 37 38
+                              Store 336 335
+             337:      6(int) Load 8(invocation)
+             338:     48(ptr) AccessChain 34(data) 46 37
+             339:  18(i8vec4) Load 338
+             340:  47(i8vec2) VectorShuffle 339 339 0 1
+             341:  47(i8vec2) GroupNonUniformBitwiseAnd 42 InclusiveScan 340
+             342:     48(ptr) AccessChain 34(data) 337 37
+             343:  18(i8vec4) Load 342
+             344:  18(i8vec4) VectorShuffle 343 341 4 5 2 3
+                              Store 342 344
+             345:      6(int) Load 8(invocation)
+             346:     48(ptr) AccessChain 34(data) 57 37
+             347:  18(i8vec4) Load 346
+             348:  58(i8vec3) VectorShuffle 347 347 0 1 2
+             349:  58(i8vec3) GroupNonUniformBitwiseAnd 42 InclusiveScan 348
+             350:     48(ptr) AccessChain 34(data) 345 37
+             351:  18(i8vec4) Load 350
+             352:  18(i8vec4) VectorShuffle 351 349 4 5 6 3
+                              Store 350 352
+             353:      6(int) Load 8(invocation)
+             354:     48(ptr) AccessChain 34(data) 67 37
+             355:  18(i8vec4) Load 354
+             356:  18(i8vec4) GroupNonUniformBitwiseAnd 42 InclusiveScan 355
+             357:     48(ptr) AccessChain 34(data) 353 37
+                              Store 357 356
+             358:      6(int) Load 8(invocation)
+             359:     39(ptr) AccessChain 34(data) 37 37 38
+             360:  17(int8_t) Load 359
+             361:  17(int8_t) GroupNonUniformBitwiseOr 42 InclusiveScan 360
+             362:     39(ptr) AccessChain 34(data) 358 37 38
+                              Store 362 361
+             363:      6(int) Load 8(invocation)
+             364:     48(ptr) AccessChain 34(data) 46 37
+             365:  18(i8vec4) Load 364
+             366:  47(i8vec2) VectorShuffle 365 365 0 1
+             367:  47(i8vec2) GroupNonUniformBitwiseOr 42 InclusiveScan 366
+             368:     48(ptr) AccessChain 34(data) 363 37
+             369:  18(i8vec4) Load 368
+             370:  18(i8vec4) VectorShuffle 369 367 4 5 2 3
+                              Store 368 370
+             371:      6(int) Load 8(invocation)
+             372:     48(ptr) AccessChain 34(data) 57 37
+             373:  18(i8vec4) Load 372
+             374:  58(i8vec3) VectorShuffle 373 373 0 1 2
+             375:  58(i8vec3) GroupNonUniformBitwiseOr 42 InclusiveScan 374
+             376:     48(ptr) AccessChain 34(data) 371 37
+             377:  18(i8vec4) Load 376
+             378:  18(i8vec4) VectorShuffle 377 375 4 5 6 3
+                              Store 376 378
+             379:      6(int) Load 8(invocation)
+             380:     48(ptr) AccessChain 34(data) 67 37
+             381:  18(i8vec4) Load 380
+             382:  18(i8vec4) GroupNonUniformBitwiseOr 42 InclusiveScan 381
+             383:     48(ptr) AccessChain 34(data) 379 37
+                              Store 383 382
+             384:      6(int) Load 8(invocation)
+             385:     39(ptr) AccessChain 34(data) 37 37 38
+             386:  17(int8_t) Load 385
+             387:  17(int8_t) GroupNonUniformBitwiseXor 42 InclusiveScan 386
+             388:     39(ptr) AccessChain 34(data) 384 37 38
+                              Store 388 387
+             389:      6(int) Load 8(invocation)
+             390:     48(ptr) AccessChain 34(data) 46 37
+             391:  18(i8vec4) Load 390
+             392:  47(i8vec2) VectorShuffle 391 391 0 1
+             393:  47(i8vec2) GroupNonUniformBitwiseXor 42 InclusiveScan 392
+             394:     48(ptr) AccessChain 34(data) 389 37
+             395:  18(i8vec4) Load 394
+             396:  18(i8vec4) VectorShuffle 395 393 4 5 2 3
+                              Store 394 396
+             397:      6(int) Load 8(invocation)
+             398:     48(ptr) AccessChain 34(data) 57 37
+             399:  18(i8vec4) Load 398
+             400:  58(i8vec3) VectorShuffle 399 399 0 1 2
+             401:  58(i8vec3) GroupNonUniformBitwiseXor 42 InclusiveScan 400
+             402:     48(ptr) AccessChain 34(data) 397 37
+             403:  18(i8vec4) Load 402
+             404:  18(i8vec4) VectorShuffle 403 401 4 5 6 3
+                              Store 402 404
+             405:      6(int) Load 8(invocation)
+             406:     48(ptr) AccessChain 34(data) 67 37
+             407:  18(i8vec4) Load 406
+             408:  18(i8vec4) GroupNonUniformBitwiseXor 42 InclusiveScan 407
+             409:     48(ptr) AccessChain 34(data) 405 37
+                              Store 409 408
+             410:      6(int) Load 8(invocation)
+             411:     39(ptr) AccessChain 34(data) 37 37 38
+             412:  17(int8_t) Load 411
+             413:  17(int8_t) GroupNonUniformIAdd 42 ExclusiveScan 412
+             414:     39(ptr) AccessChain 34(data) 410 37 38
+                              Store 414 413
+             415:      6(int) Load 8(invocation)
+             416:     48(ptr) AccessChain 34(data) 46 37
+             417:  18(i8vec4) Load 416
+             418:  47(i8vec2) VectorShuffle 417 417 0 1
+             419:  47(i8vec2) GroupNonUniformIAdd 42 ExclusiveScan 418
+             420:     48(ptr) AccessChain 34(data) 415 37
+             421:  18(i8vec4) Load 420
+             422:  18(i8vec4) VectorShuffle 421 419 4 5 2 3
+                              Store 420 422
+             423:      6(int) Load 8(invocation)
+             424:     48(ptr) AccessChain 34(data) 57 37
+             425:  18(i8vec4) Load 424
+             426:  58(i8vec3) VectorShuffle 425 425 0 1 2
+             427:  58(i8vec3) GroupNonUniformIAdd 42 ExclusiveScan 426
+             428:     48(ptr) AccessChain 34(data) 423 37
+             429:  18(i8vec4) Load 428
+             430:  18(i8vec4) VectorShuffle 429 427 4 5 6 3
+                              Store 428 430
+             431:      6(int) Load 8(invocation)
+             432:     48(ptr) AccessChain 34(data) 67 37
+             433:  18(i8vec4) Load 432
+             434:  18(i8vec4) GroupNonUniformIAdd 42 ExclusiveScan 433
+             435:     48(ptr) AccessChain 34(data) 431 37
+                              Store 435 434
+             436:      6(int) Load 8(invocation)
+             437:     39(ptr) AccessChain 34(data) 37 37 38
+             438:  17(int8_t) Load 437
+             439:  17(int8_t) GroupNonUniformIMul 42 ExclusiveScan 438
+             440:     39(ptr) AccessChain 34(data) 436 37 38
+                              Store 440 439
+             441:      6(int) Load 8(invocation)
+             442:     48(ptr) AccessChain 34(data) 46 37
+             443:  18(i8vec4) Load 442
+             444:  47(i8vec2) VectorShuffle 443 443 0 1
+             445:  47(i8vec2) GroupNonUniformIMul 42 ExclusiveScan 444
+             446:     48(ptr) AccessChain 34(data) 441 37
+             447:  18(i8vec4) Load 446
+             448:  18(i8vec4) VectorShuffle 447 445 4 5 2 3
+                              Store 446 448
+             449:      6(int) Load 8(invocation)
+             450:     48(ptr) AccessChain 34(data) 57 37
+             451:  18(i8vec4) Load 450
+             452:  58(i8vec3) VectorShuffle 451 451 0 1 2
+             453:  58(i8vec3) GroupNonUniformIMul 42 ExclusiveScan 452
+             454:     48(ptr) AccessChain 34(data) 449 37
+             455:  18(i8vec4) Load 454
+             456:  18(i8vec4) VectorShuffle 455 453 4 5 6 3
+                              Store 454 456
+             457:      6(int) Load 8(invocation)
+             458:     48(ptr) AccessChain 34(data) 67 37
+             459:  18(i8vec4) Load 458
+             460:  18(i8vec4) GroupNonUniformIMul 42 ExclusiveScan 459
+             461:     48(ptr) AccessChain 34(data) 457 37
+                              Store 461 460
+             462:      6(int) Load 8(invocation)
+             463:     39(ptr) AccessChain 34(data) 37 37 38
+             464:  17(int8_t) Load 463
+             465:  17(int8_t) GroupNonUniformSMin 42 ExclusiveScan 464
+             466:     39(ptr) AccessChain 34(data) 462 37 38
+                              Store 466 465
+             467:      6(int) Load 8(invocation)
+             468:     48(ptr) AccessChain 34(data) 46 37
+             469:  18(i8vec4) Load 468
+             470:  47(i8vec2) VectorShuffle 469 469 0 1
+             471:  47(i8vec2) GroupNonUniformSMin 42 ExclusiveScan 470
+             472:     48(ptr) AccessChain 34(data) 467 37
+             473:  18(i8vec4) Load 472
+             474:  18(i8vec4) VectorShuffle 473 471 4 5 2 3
+                              Store 472 474
+             475:      6(int) Load 8(invocation)
+             476:     48(ptr) AccessChain 34(data) 57 37
+             477:  18(i8vec4) Load 476
+             478:  58(i8vec3) VectorShuffle 477 477 0 1 2
+             479:  58(i8vec3) GroupNonUniformSMin 42 ExclusiveScan 478
+             480:     48(ptr) AccessChain 34(data) 475 37
+             481:  18(i8vec4) Load 480
+             482:  18(i8vec4) VectorShuffle 481 479 4 5 6 3
+                              Store 480 482
+             483:      6(int) Load 8(invocation)
+             484:     48(ptr) AccessChain 34(data) 67 37
+             485:  18(i8vec4) Load 484
+             486:  18(i8vec4) GroupNonUniformSMin 42 ExclusiveScan 485
+             487:     48(ptr) AccessChain 34(data) 483 37
+                              Store 487 486
+             488:      6(int) Load 8(invocation)
+             489:     39(ptr) AccessChain 34(data) 37 37 38
+             490:  17(int8_t) Load 489
+             491:  17(int8_t) GroupNonUniformSMax 42 ExclusiveScan 490
+             492:     39(ptr) AccessChain 34(data) 488 37 38
+                              Store 492 491
+             493:      6(int) Load 8(invocation)
+             494:     48(ptr) AccessChain 34(data) 46 37
+             495:  18(i8vec4) Load 494
+             496:  47(i8vec2) VectorShuffle 495 495 0 1
+             497:  47(i8vec2) GroupNonUniformSMax 42 ExclusiveScan 496
+             498:     48(ptr) AccessChain 34(data) 493 37
+             499:  18(i8vec4) Load 498
+             500:  18(i8vec4) VectorShuffle 499 497 4 5 2 3
+                              Store 498 500
+             501:      6(int) Load 8(invocation)
+             502:     48(ptr) AccessChain 34(data) 57 37
+             503:  18(i8vec4) Load 502
+             504:  58(i8vec3) VectorShuffle 503 503 0 1 2
+             505:  58(i8vec3) GroupNonUniformSMax 42 ExclusiveScan 504
+             506:     48(ptr) AccessChain 34(data) 501 37
+             507:  18(i8vec4) Load 506
+             508:  18(i8vec4) VectorShuffle 507 505 4 5 6 3
+                              Store 506 508
+             509:      6(int) Load 8(invocation)
+             510:     48(ptr) AccessChain 34(data) 67 37
+             511:  18(i8vec4) Load 510
+             512:  18(i8vec4) GroupNonUniformSMax 42 ExclusiveScan 511
+             513:     48(ptr) AccessChain 34(data) 509 37
+                              Store 513 512
+             514:      6(int) Load 8(invocation)
+             515:     39(ptr) AccessChain 34(data) 37 37 38
+             516:  17(int8_t) Load 515
+             517:  17(int8_t) GroupNonUniformBitwiseAnd 42 ExclusiveScan 516
+             518:     39(ptr) AccessChain 34(data) 514 37 38
+                              Store 518 517
+             519:      6(int) Load 8(invocation)
+             520:     48(ptr) AccessChain 34(data) 46 37
+             521:  18(i8vec4) Load 520
+             522:  47(i8vec2) VectorShuffle 521 521 0 1
+             523:  47(i8vec2) GroupNonUniformBitwiseAnd 42 ExclusiveScan 522
+             524:     48(ptr) AccessChain 34(data) 519 37
+             525:  18(i8vec4) Load 524
+             526:  18(i8vec4) VectorShuffle 525 523 4 5 2 3
+                              Store 524 526
+             527:      6(int) Load 8(invocation)
+             528:     48(ptr) AccessChain 34(data) 57 37
+             529:  18(i8vec4) Load 528
+             530:  58(i8vec3) VectorShuffle 529 529 0 1 2
+             531:  58(i8vec3) GroupNonUniformBitwiseAnd 42 ExclusiveScan 530
+             532:     48(ptr) AccessChain 34(data) 527 37
+             533:  18(i8vec4) Load 532
+             534:  18(i8vec4) VectorShuffle 533 531 4 5 6 3
+                              Store 532 534
+             535:      6(int) Load 8(invocation)
+             536:     48(ptr) AccessChain 34(data) 67 37
+             537:  18(i8vec4) Load 536
+             538:  18(i8vec4) GroupNonUniformBitwiseAnd 42 ExclusiveScan 537
+             539:     48(ptr) AccessChain 34(data) 535 37
+                              Store 539 538
+             540:      6(int) Load 8(invocation)
+             541:     39(ptr) AccessChain 34(data) 37 37 38
+             542:  17(int8_t) Load 541
+             543:  17(int8_t) GroupNonUniformBitwiseOr 42 ExclusiveScan 542
+             544:     39(ptr) AccessChain 34(data) 540 37 38
+                              Store 544 543
+             545:      6(int) Load 8(invocation)
+             546:     48(ptr) AccessChain 34(data) 46 37
+             547:  18(i8vec4) Load 546
+             548:  47(i8vec2) VectorShuffle 547 547 0 1
+             549:  47(i8vec2) GroupNonUniformBitwiseOr 42 ExclusiveScan 548
+             550:     48(ptr) AccessChain 34(data) 545 37
+             551:  18(i8vec4) Load 550
+             552:  18(i8vec4) VectorShuffle 551 549 4 5 2 3
+                              Store 550 552
+             553:      6(int) Load 8(invocation)
+             554:     48(ptr) AccessChain 34(data) 57 37
+             555:  18(i8vec4) Load 554
+             556:  58(i8vec3) VectorShuffle 555 555 0 1 2
+             557:  58(i8vec3) GroupNonUniformBitwiseOr 42 ExclusiveScan 556
+             558:     48(ptr) AccessChain 34(data) 553 37
+             559:  18(i8vec4) Load 558
+             560:  18(i8vec4) VectorShuffle 559 557 4 5 6 3
+                              Store 558 560
+             561:      6(int) Load 8(invocation)
+             562:     48(ptr) AccessChain 34(data) 67 37
+             563:  18(i8vec4) Load 562
+             564:  18(i8vec4) GroupNonUniformBitwiseOr 42 ExclusiveScan 563
+             565:     48(ptr) AccessChain 34(data) 561 37
+                              Store 565 564
+             566:      6(int) Load 8(invocation)
+             567:     39(ptr) AccessChain 34(data) 37 37 38
+             568:  17(int8_t) Load 567
+             569:  17(int8_t) GroupNonUniformBitwiseXor 42 ExclusiveScan 568
+             570:     39(ptr) AccessChain 34(data) 566 37 38
+                              Store 570 569
+             571:      6(int) Load 8(invocation)
+             572:     48(ptr) AccessChain 34(data) 46 37
+             573:  18(i8vec4) Load 572
+             574:  47(i8vec2) VectorShuffle 573 573 0 1
+             575:  47(i8vec2) GroupNonUniformBitwiseXor 42 ExclusiveScan 574
+             576:     48(ptr) AccessChain 34(data) 571 37
+             577:  18(i8vec4) Load 576
+             578:  18(i8vec4) VectorShuffle 577 575 4 5 2 3
+                              Store 576 578
+             579:      6(int) Load 8(invocation)
+             580:     48(ptr) AccessChain 34(data) 57 37
+             581:  18(i8vec4) Load 580
+             582:  58(i8vec3) VectorShuffle 581 581 0 1 2
+             583:  58(i8vec3) GroupNonUniformBitwiseXor 42 ExclusiveScan 582
+             584:     48(ptr) AccessChain 34(data) 579 37
+             585:  18(i8vec4) Load 584
+             586:  18(i8vec4) VectorShuffle 585 583 4 5 6 3
+                              Store 584 586
+             587:      6(int) Load 8(invocation)
+             588:     48(ptr) AccessChain 34(data) 67 37
+             589:  18(i8vec4) Load 588
+             590:  18(i8vec4) GroupNonUniformBitwiseXor 42 ExclusiveScan 589
+             591:     48(ptr) AccessChain 34(data) 587 37
+                              Store 591 590
+             592:      6(int) Load 8(invocation)
+             594:    593(ptr) AccessChain 34(data) 37 46 38
+             595:  19(int8_t) Load 594
+             596:  19(int8_t) GroupNonUniformIAdd 42 Reduce 595
+             597:    593(ptr) AccessChain 34(data) 592 46 38
+                              Store 597 596
+             598:      6(int) Load 8(invocation)
+             601:    600(ptr) AccessChain 34(data) 46 46
+             602:  20(i8vec4) Load 601
+             603: 599(i8vec2) VectorShuffle 602 602 0 1
+             604: 599(i8vec2) GroupNonUniformIAdd 42 Reduce 603
+             605:    600(ptr) AccessChain 34(data) 598 46
+             606:  20(i8vec4) Load 605
+             607:  20(i8vec4) VectorShuffle 606 604 4 5 2 3
+                              Store 605 607
+             608:      6(int) Load 8(invocation)
+             610:    600(ptr) AccessChain 34(data) 57 46
+             611:  20(i8vec4) Load 610
+             612: 609(i8vec3) VectorShuffle 611 611 0 1 2
+             613: 609(i8vec3) GroupNonUniformIAdd 42 Reduce 612
+             614:    600(ptr) AccessChain 34(data) 608 46
+             615:  20(i8vec4) Load 614
+             616:  20(i8vec4) VectorShuffle 615 613 4 5 6 3
+                              Store 614 616
+             617:      6(int) Load 8(invocation)
+             618:    600(ptr) AccessChain 34(data) 67 46
+             619:  20(i8vec4) Load 618
+             620:  20(i8vec4) GroupNonUniformIAdd 42 Reduce 619
+             621:    600(ptr) AccessChain 34(data) 617 46
+                              Store 621 620
+             622:      6(int) Load 8(invocation)
+             623:    593(ptr) AccessChain 34(data) 37 46 38
+             624:  19(int8_t) Load 623
+             625:  19(int8_t) GroupNonUniformIMul 42 Reduce 624
+             626:    593(ptr) AccessChain 34(data) 622 46 38
+                              Store 626 625
+             627:      6(int) Load 8(invocation)
+             628:    600(ptr) AccessChain 34(data) 46 46
+             629:  20(i8vec4) Load 628
+             630: 599(i8vec2) VectorShuffle 629 629 0 1
+             631: 599(i8vec2) GroupNonUniformIMul 42 Reduce 630
+             632:    600(ptr) AccessChain 34(data) 627 46
+             633:  20(i8vec4) Load 632
+             634:  20(i8vec4) VectorShuffle 633 631 4 5 2 3
+                              Store 632 634
+             635:      6(int) Load 8(invocation)
+             636:    600(ptr) AccessChain 34(data) 57 46
+             637:  20(i8vec4) Load 636
+             638: 609(i8vec3) VectorShuffle 637 637 0 1 2
+             639: 609(i8vec3) GroupNonUniformIMul 42 Reduce 638
+             640:    600(ptr) AccessChain 34(data) 635 46
+             641:  20(i8vec4) Load 640
+             642:  20(i8vec4) VectorShuffle 641 639 4 5 6 3
+                              Store 640 642
+             643:      6(int) Load 8(invocation)
+             644:    600(ptr) AccessChain 34(data) 67 46
+             645:  20(i8vec4) Load 644
+             646:  20(i8vec4) GroupNonUniformIMul 42 Reduce 645
+             647:    600(ptr) AccessChain 34(data) 643 46
+                              Store 647 646
+             648:      6(int) Load 8(invocation)
+             649:    593(ptr) AccessChain 34(data) 37 46 38
+             650:  19(int8_t) Load 649
+             651:  19(int8_t) GroupNonUniformUMin 42 Reduce 650
+             652:    593(ptr) AccessChain 34(data) 648 46 38
+                              Store 652 651
+             653:      6(int) Load 8(invocation)
+             654:    600(ptr) AccessChain 34(data) 46 46
+             655:  20(i8vec4) Load 654
+             656: 599(i8vec2) VectorShuffle 655 655 0 1
+             657: 599(i8vec2) GroupNonUniformUMin 42 Reduce 656
+             658:    600(ptr) AccessChain 34(data) 653 46
+             659:  20(i8vec4) Load 658
+             660:  20(i8vec4) VectorShuffle 659 657 4 5 2 3
+                              Store 658 660
+             661:      6(int) Load 8(invocation)
+             662:    600(ptr) AccessChain 34(data) 57 46
+             663:  20(i8vec4) Load 662
+             664: 609(i8vec3) VectorShuffle 663 663 0 1 2
+             665: 609(i8vec3) GroupNonUniformUMin 42 Reduce 664
+             666:    600(ptr) AccessChain 34(data) 661 46
+             667:  20(i8vec4) Load 666
+             668:  20(i8vec4) VectorShuffle 667 665 4 5 6 3
+                              Store 666 668
+             669:      6(int) Load 8(invocation)
+             670:    600(ptr) AccessChain 34(data) 67 46
+             671:  20(i8vec4) Load 670
+             672:  20(i8vec4) GroupNonUniformUMin 42 Reduce 671
+             673:    600(ptr) AccessChain 34(data) 669 46
+                              Store 673 672
+             674:      6(int) Load 8(invocation)
+             675:    593(ptr) AccessChain 34(data) 37 46 38
+             676:  19(int8_t) Load 675
+             677:  19(int8_t) GroupNonUniformUMax 42 Reduce 676
+             678:    593(ptr) AccessChain 34(data) 674 46 38
+                              Store 678 677
+             679:      6(int) Load 8(invocation)
+             680:    600(ptr) AccessChain 34(data) 46 46
+             681:  20(i8vec4) Load 680
+             682: 599(i8vec2) VectorShuffle 681 681 0 1
+             683: 599(i8vec2) GroupNonUniformUMax 42 Reduce 682
+             684:    600(ptr) AccessChain 34(data) 679 46
+             685:  20(i8vec4) Load 684
+             686:  20(i8vec4) VectorShuffle 685 683 4 5 2 3
+                              Store 684 686
+             687:      6(int) Load 8(invocation)
+             688:    600(ptr) AccessChain 34(data) 57 46
+             689:  20(i8vec4) Load 688
+             690: 609(i8vec3) VectorShuffle 689 689 0 1 2
+             691: 609(i8vec3) GroupNonUniformUMax 42 Reduce 690
+             692:    600(ptr) AccessChain 34(data) 687 46
+             693:  20(i8vec4) Load 692
+             694:  20(i8vec4) VectorShuffle 693 691 4 5 6 3
+                              Store 692 694
+             695:      6(int) Load 8(invocation)
+             696:    600(ptr) AccessChain 34(data) 67 46
+             697:  20(i8vec4) Load 696
+             698:  20(i8vec4) GroupNonUniformUMax 42 Reduce 697
+             699:    600(ptr) AccessChain 34(data) 695 46
+                              Store 699 698
+             700:      6(int) Load 8(invocation)
+             701:    593(ptr) AccessChain 34(data) 37 46 38
+             702:  19(int8_t) Load 701
+             703:  19(int8_t) GroupNonUniformBitwiseAnd 42 Reduce 702
+             704:    593(ptr) AccessChain 34(data) 700 46 38
+                              Store 704 703
+             705:      6(int) Load 8(invocation)
+             706:    600(ptr) AccessChain 34(data) 46 46
+             707:  20(i8vec4) Load 706
+             708: 599(i8vec2) VectorShuffle 707 707 0 1
+             709: 599(i8vec2) GroupNonUniformBitwiseAnd 42 Reduce 708
+             710:    600(ptr) AccessChain 34(data) 705 46
+             711:  20(i8vec4) Load 710
+             712:  20(i8vec4) VectorShuffle 711 709 4 5 2 3
+                              Store 710 712
+             713:      6(int) Load 8(invocation)
+             714:    600(ptr) AccessChain 34(data) 57 46
+             715:  20(i8vec4) Load 714
+             716: 609(i8vec3) VectorShuffle 715 715 0 1 2
+             717: 609(i8vec3) GroupNonUniformBitwiseAnd 42 Reduce 716
+             718:    600(ptr) AccessChain 34(data) 713 46
+             719:  20(i8vec4) Load 718
+             720:  20(i8vec4) VectorShuffle 719 717 4 5 6 3
+                              Store 718 720
+             721:      6(int) Load 8(invocation)
+             722:    600(ptr) AccessChain 34(data) 67 46
+             723:  20(i8vec4) Load 722
+             724:  20(i8vec4) GroupNonUniformBitwiseAnd 42 Reduce 723
+             725:    600(ptr) AccessChain 34(data) 721 46
+                              Store 725 724
+             726:      6(int) Load 8(invocation)
+             727:    593(ptr) AccessChain 34(data) 37 46 38
+             728:  19(int8_t) Load 727
+             729:  19(int8_t) GroupNonUniformBitwiseOr 42 Reduce 728
+             730:    593(ptr) AccessChain 34(data) 726 46 38
+                              Store 730 729
+             731:      6(int) Load 8(invocation)
+             732:    600(ptr) AccessChain 34(data) 46 46
+             733:  20(i8vec4) Load 732
+             734: 599(i8vec2) VectorShuffle 733 733 0 1
+             735: 599(i8vec2) GroupNonUniformBitwiseOr 42 Reduce 734
+             736:    600(ptr) AccessChain 34(data) 731 46
+             737:  20(i8vec4) Load 736
+             738:  20(i8vec4) VectorShuffle 737 735 4 5 2 3
+                              Store 736 738
+             739:      6(int) Load 8(invocation)
+             740:    600(ptr) AccessChain 34(data) 57 46
+             741:  20(i8vec4) Load 740
+             742: 609(i8vec3) VectorShuffle 741 741 0 1 2
+             743: 609(i8vec3) GroupNonUniformBitwiseOr 42 Reduce 742
+             744:    600(ptr) AccessChain 34(data) 739 46
+             745:  20(i8vec4) Load 744
+             746:  20(i8vec4) VectorShuffle 745 743 4 5 6 3
+                              Store 744 746
+             747:      6(int) Load 8(invocation)
+             748:    600(ptr) AccessChain 34(data) 67 46
+             749:  20(i8vec4) Load 748
+             750:  20(i8vec4) GroupNonUniformBitwiseOr 42 Reduce 749
+             751:    600(ptr) AccessChain 34(data) 747 46
+                              Store 751 750
+             752:      6(int) Load 8(invocation)
+             753:    593(ptr) AccessChain 34(data) 37 46 38
+             754:  19(int8_t) Load 753
+             755:  19(int8_t) GroupNonUniformBitwiseXor 42 Reduce 754
+             756:    593(ptr) AccessChain 34(data) 752 46 38
+                              Store 756 755
+             757:      6(int) Load 8(invocation)
+             758:    600(ptr) AccessChain 34(data) 46 46
+             759:  20(i8vec4) Load 758
+             760: 599(i8vec2) VectorShuffle 759 759 0 1
+             761: 599(i8vec2) GroupNonUniformBitwiseXor 42 Reduce 760
+             762:    600(ptr) AccessChain 34(data) 757 46
+             763:  20(i8vec4) Load 762
+             764:  20(i8vec4) VectorShuffle 763 761 4 5 2 3
+                              Store 762 764
+             765:      6(int) Load 8(invocation)
+             766:    600(ptr) AccessChain 34(data) 57 46
+             767:  20(i8vec4) Load 766
+             768: 609(i8vec3) VectorShuffle 767 767 0 1 2
+             769: 609(i8vec3) GroupNonUniformBitwiseXor 42 Reduce 768
+             770:    600(ptr) AccessChain 34(data) 765 46
+             771:  20(i8vec4) Load 770
+             772:  20(i8vec4) VectorShuffle 771 769 4 5 6 3
+                              Store 770 772
+             773:      6(int) Load 8(invocation)
+             774:    600(ptr) AccessChain 34(data) 67 46
+             775:  20(i8vec4) Load 774
+             776:  20(i8vec4) GroupNonUniformBitwiseXor 42 Reduce 775
+             777:    600(ptr) AccessChain 34(data) 773 46
+                              Store 777 776
+             778:      6(int) Load 8(invocation)
+             779:    593(ptr) AccessChain 34(data) 37 46 38
+             780:  19(int8_t) Load 779
+             781:  19(int8_t) GroupNonUniformIAdd 42 InclusiveScan 780
+             782:    593(ptr) AccessChain 34(data) 778 46 38
+                              Store 782 781
+             783:      6(int) Load 8(invocation)
+             784:    600(ptr) AccessChain 34(data) 46 46
+             785:  20(i8vec4) Load 784
+             786: 599(i8vec2) VectorShuffle 785 785 0 1
+             787: 599(i8vec2) GroupNonUniformIAdd 42 InclusiveScan 786
+             788:    600(ptr) AccessChain 34(data) 783 46
+             789:  20(i8vec4) Load 788
+             790:  20(i8vec4) VectorShuffle 789 787 4 5 2 3
+                              Store 788 790
+             791:      6(int) Load 8(invocation)
+             792:    600(ptr) AccessChain 34(data) 57 46
+             793:  20(i8vec4) Load 792
+             794: 609(i8vec3) VectorShuffle 793 793 0 1 2
+             795: 609(i8vec3) GroupNonUniformIAdd 42 InclusiveScan 794
+             796:    600(ptr) AccessChain 34(data) 791 46
+             797:  20(i8vec4) Load 796
+             798:  20(i8vec4) VectorShuffle 797 795 4 5 6 3
+                              Store 796 798
+             799:      6(int) Load 8(invocation)
+             800:    600(ptr) AccessChain 34(data) 67 46
+             801:  20(i8vec4) Load 800
+             802:  20(i8vec4) GroupNonUniformIAdd 42 InclusiveScan 801
+             803:    600(ptr) AccessChain 34(data) 799 46
+                              Store 803 802
+             804:      6(int) Load 8(invocation)
+             805:    593(ptr) AccessChain 34(data) 37 46 38
+             806:  19(int8_t) Load 805
+             807:  19(int8_t) GroupNonUniformIMul 42 InclusiveScan 806
+             808:    593(ptr) AccessChain 34(data) 804 46 38
+                              Store 808 807
+             809:      6(int) Load 8(invocation)
+             810:    600(ptr) AccessChain 34(data) 46 46
+             811:  20(i8vec4) Load 810
+             812: 599(i8vec2) VectorShuffle 811 811 0 1
+             813: 599(i8vec2) GroupNonUniformIMul 42 InclusiveScan 812
+             814:    600(ptr) AccessChain 34(data) 809 46
+             815:  20(i8vec4) Load 814
+             816:  20(i8vec4) VectorShuffle 815 813 4 5 2 3
+                              Store 814 816
+             817:      6(int) Load 8(invocation)
+             818:    600(ptr) AccessChain 34(data) 57 46
+             819:  20(i8vec4) Load 818
+             820: 609(i8vec3) VectorShuffle 819 819 0 1 2
+             821: 609(i8vec3) GroupNonUniformIMul 42 InclusiveScan 820
+             822:    600(ptr) AccessChain 34(data) 817 46
+             823:  20(i8vec4) Load 822
+             824:  20(i8vec4) VectorShuffle 823 821 4 5 6 3
+                              Store 822 824
+             825:      6(int) Load 8(invocation)
+             826:    600(ptr) AccessChain 34(data) 67 46
+             827:  20(i8vec4) Load 826
+             828:  20(i8vec4) GroupNonUniformIMul 42 InclusiveScan 827
+             829:    600(ptr) AccessChain 34(data) 825 46
+                              Store 829 828
+             830:      6(int) Load 8(invocation)
+             831:    593(ptr) AccessChain 34(data) 37 46 38
+             832:  19(int8_t) Load 831
+             833:  19(int8_t) GroupNonUniformUMin 42 InclusiveScan 832
+             834:    593(ptr) AccessChain 34(data) 830 46 38
+                              Store 834 833
+             835:      6(int) Load 8(invocation)
+             836:    600(ptr) AccessChain 34(data) 46 46
+             837:  20(i8vec4) Load 836
+             838: 599(i8vec2) VectorShuffle 837 837 0 1
+             839: 599(i8vec2) GroupNonUniformUMin 42 InclusiveScan 838
+             840:    600(ptr) AccessChain 34(data) 835 46
+             841:  20(i8vec4) Load 840
+             842:  20(i8vec4) VectorShuffle 841 839 4 5 2 3
+                              Store 840 842
+             843:      6(int) Load 8(invocation)
+             844:    600(ptr) AccessChain 34(data) 57 46
+             845:  20(i8vec4) Load 844
+             846: 609(i8vec3) VectorShuffle 845 845 0 1 2
+             847: 609(i8vec3) GroupNonUniformUMin 42 InclusiveScan 846
+             848:    600(ptr) AccessChain 34(data) 843 46
+             849:  20(i8vec4) Load 848
+             850:  20(i8vec4) VectorShuffle 849 847 4 5 6 3
+                              Store 848 850
+             851:      6(int) Load 8(invocation)
+             852:    600(ptr) AccessChain 34(data) 67 46
+             853:  20(i8vec4) Load 852
+             854:  20(i8vec4) GroupNonUniformUMin 42 InclusiveScan 853
+             855:    600(ptr) AccessChain 34(data) 851 46
+                              Store 855 854
+             856:      6(int) Load 8(invocation)
+             857:    593(ptr) AccessChain 34(data) 37 46 38
+             858:  19(int8_t) Load 857
+             859:  19(int8_t) GroupNonUniformUMax 42 InclusiveScan 858
+             860:    593(ptr) AccessChain 34(data) 856 46 38
+                              Store 860 859
+             861:      6(int) Load 8(invocation)
+             862:    600(ptr) AccessChain 34(data) 46 46
+             863:  20(i8vec4) Load 862
+             864: 599(i8vec2) VectorShuffle 863 863 0 1
+             865: 599(i8vec2) GroupNonUniformUMax 42 InclusiveScan 864
+             866:    600(ptr) AccessChain 34(data) 861 46
+             867:  20(i8vec4) Load 866
+             868:  20(i8vec4) VectorShuffle 867 865 4 5 2 3
+                              Store 866 868
+             869:      6(int) Load 8(invocation)
+             870:    600(ptr) AccessChain 34(data) 57 46
+             871:  20(i8vec4) Load 870
+             872: 609(i8vec3) VectorShuffle 871 871 0 1 2
+             873: 609(i8vec3) GroupNonUniformUMax 42 InclusiveScan 872
+             874:    600(ptr) AccessChain 34(data) 869 46
+             875:  20(i8vec4) Load 874
+             876:  20(i8vec4) VectorShuffle 875 873 4 5 6 3
+                              Store 874 876
+             877:      6(int) Load 8(invocation)
+             878:    600(ptr) AccessChain 34(data) 67 46
+             879:  20(i8vec4) Load 878
+             880:  20(i8vec4) GroupNonUniformUMax 42 InclusiveScan 879
+             881:    600(ptr) AccessChain 34(data) 877 46
+                              Store 881 880
+             882:      6(int) Load 8(invocation)
+             883:    593(ptr) AccessChain 34(data) 37 46 38
+             884:  19(int8_t) Load 883
+             885:  19(int8_t) GroupNonUniformBitwiseAnd 42 InclusiveScan 884
+             886:    593(ptr) AccessChain 34(data) 882 46 38
+                              Store 886 885
+             887:      6(int) Load 8(invocation)
+             888:    600(ptr) AccessChain 34(data) 46 46
+             889:  20(i8vec4) Load 888
+             890: 599(i8vec2) VectorShuffle 889 889 0 1
+             891: 599(i8vec2) GroupNonUniformBitwiseAnd 42 InclusiveScan 890
+             892:    600(ptr) AccessChain 34(data) 887 46
+             893:  20(i8vec4) Load 892
+             894:  20(i8vec4) VectorShuffle 893 891 4 5 2 3
+                              Store 892 894
+             895:      6(int) Load 8(invocation)
+             896:    600(ptr) AccessChain 34(data) 57 46
+             897:  20(i8vec4) Load 896
+             898: 609(i8vec3) VectorShuffle 897 897 0 1 2
+             899: 609(i8vec3) GroupNonUniformBitwiseAnd 42 InclusiveScan 898
+             900:    600(ptr) AccessChain 34(data) 895 46
+             901:  20(i8vec4) Load 900
+             902:  20(i8vec4) VectorShuffle 901 899 4 5 6 3
+                              Store 900 902
+             903:      6(int) Load 8(invocation)
+             904:    600(ptr) AccessChain 34(data) 67 46
+             905:  20(i8vec4) Load 904
+             906:  20(i8vec4) GroupNonUniformBitwiseAnd 42 InclusiveScan 905
+             907:    600(ptr) AccessChain 34(data) 903 46
+                              Store 907 906
+             908:      6(int) Load 8(invocation)
+             909:    593(ptr) AccessChain 34(data) 37 46 38
+             910:  19(int8_t) Load 909
+             911:  19(int8_t) GroupNonUniformBitwiseOr 42 InclusiveScan 910
+             912:    593(ptr) AccessChain 34(data) 908 46 38
+                              Store 912 911
+             913:      6(int) Load 8(invocation)
+             914:    600(ptr) AccessChain 34(data) 46 46
+             915:  20(i8vec4) Load 914
+             916: 599(i8vec2) VectorShuffle 915 915 0 1
+             917: 599(i8vec2) GroupNonUniformBitwiseOr 42 InclusiveScan 916
+             918:    600(ptr) AccessChain 34(data) 913 46
+             919:  20(i8vec4) Load 918
+             920:  20(i8vec4) VectorShuffle 919 917 4 5 2 3
+                              Store 918 920
+             921:      6(int) Load 8(invocation)
+             922:    600(ptr) AccessChain 34(data) 57 46
+             923:  20(i8vec4) Load 922
+             924: 609(i8vec3) VectorShuffle 923 923 0 1 2
+             925: 609(i8vec3) GroupNonUniformBitwiseOr 42 InclusiveScan 924
+             926:    600(ptr) AccessChain 34(data) 921 46
+             927:  20(i8vec4) Load 926
+             928:  20(i8vec4) VectorShuffle 927 925 4 5 6 3
+                              Store 926 928
+             929:      6(int) Load 8(invocation)
+             930:    600(ptr) AccessChain 34(data) 67 46
+             931:  20(i8vec4) Load 930
+             932:  20(i8vec4) GroupNonUniformBitwiseOr 42 InclusiveScan 931
+             933:    600(ptr) AccessChain 34(data) 929 46
+                              Store 933 932
+             934:      6(int) Load 8(invocation)
+             935:    593(ptr) AccessChain 34(data) 37 46 38
+             936:  19(int8_t) Load 935
+             937:  19(int8_t) GroupNonUniformBitwiseXor 42 InclusiveScan 936
+             938:    593(ptr) AccessChain 34(data) 934 46 38
+                              Store 938 937
+             939:      6(int) Load 8(invocation)
+             940:    600(ptr) AccessChain 34(data) 46 46
+             941:  20(i8vec4) Load 940
+             942: 599(i8vec2) VectorShuffle 941 941 0 1
+             943: 599(i8vec2) GroupNonUniformBitwiseXor 42 InclusiveScan 942
+             944:    600(ptr) AccessChain 34(data) 939 46
+             945:  20(i8vec4) Load 944
+             946:  20(i8vec4) VectorShuffle 945 943 4 5 2 3
+                              Store 944 946
+             947:      6(int) Load 8(invocation)
+             948:    600(ptr) AccessChain 34(data) 57 46
+             949:  20(i8vec4) Load 948
+             950: 609(i8vec3) VectorShuffle 949 949 0 1 2
+             951: 609(i8vec3) GroupNonUniformBitwiseXor 42 InclusiveScan 950
+             952:    600(ptr) AccessChain 34(data) 947 46
+             953:  20(i8vec4) Load 952
+             954:  20(i8vec4) VectorShuffle 953 951 4 5 6 3
+                              Store 952 954
+             955:      6(int) Load 8(invocation)
+             956:    600(ptr) AccessChain 34(data) 67 46
+             957:  20(i8vec4) Load 956
+             958:  20(i8vec4) GroupNonUniformBitwiseXor 42 InclusiveScan 957
+             959:    600(ptr) AccessChain 34(data) 955 46
+                              Store 959 958
+             960:      6(int) Load 8(invocation)
+             961:    593(ptr) AccessChain 34(data) 37 46 38
+             962:  19(int8_t) Load 961
+             963:  19(int8_t) GroupNonUniformIAdd 42 ExclusiveScan 962
+             964:    593(ptr) AccessChain 34(data) 960 46 38
+                              Store 964 963
+             965:      6(int) Load 8(invocation)
+             966:    600(ptr) AccessChain 34(data) 46 46
+             967:  20(i8vec4) Load 966
+             968: 599(i8vec2) VectorShuffle 967 967 0 1
+             969: 599(i8vec2) GroupNonUniformIAdd 42 ExclusiveScan 968
+             970:    600(ptr) AccessChain 34(data) 965 46
+             971:  20(i8vec4) Load 970
+             972:  20(i8vec4) VectorShuffle 971 969 4 5 2 3
+                              Store 970 972
+             973:      6(int) Load 8(invocation)
+             974:    600(ptr) AccessChain 34(data) 57 46
+             975:  20(i8vec4) Load 974
+             976: 609(i8vec3) VectorShuffle 975 975 0 1 2
+             977: 609(i8vec3) GroupNonUniformIAdd 42 ExclusiveScan 976
+             978:    600(ptr) AccessChain 34(data) 973 46
+             979:  20(i8vec4) Load 978
+             980:  20(i8vec4) VectorShuffle 979 977 4 5 6 3
+                              Store 978 980
+             981:      6(int) Load 8(invocation)
+             982:    600(ptr) AccessChain 34(data) 67 46
+             983:  20(i8vec4) Load 982
+             984:  20(i8vec4) GroupNonUniformIAdd 42 ExclusiveScan 983
+             985:    600(ptr) AccessChain 34(data) 981 46
+                              Store 985 984
+             986:      6(int) Load 8(invocation)
+             987:    593(ptr) AccessChain 34(data) 37 46 38
+             988:  19(int8_t) Load 987
+             989:  19(int8_t) GroupNonUniformIMul 42 ExclusiveScan 988
+             990:    593(ptr) AccessChain 34(data) 986 46 38
+                              Store 990 989
+             991:      6(int) Load 8(invocation)
+             992:    600(ptr) AccessChain 34(data) 46 46
+             993:  20(i8vec4) Load 992
+             994: 599(i8vec2) VectorShuffle 993 993 0 1
+             995: 599(i8vec2) GroupNonUniformIMul 42 ExclusiveScan 994
+             996:    600(ptr) AccessChain 34(data) 991 46
+             997:  20(i8vec4) Load 996
+             998:  20(i8vec4) VectorShuffle 997 995 4 5 2 3
+                              Store 996 998
+             999:      6(int) Load 8(invocation)
+            1000:    600(ptr) AccessChain 34(data) 57 46
+            1001:  20(i8vec4) Load 1000
+            1002: 609(i8vec3) VectorShuffle 1001 1001 0 1 2
+            1003: 609(i8vec3) GroupNonUniformIMul 42 ExclusiveScan 1002
+            1004:    600(ptr) AccessChain 34(data) 999 46
+            1005:  20(i8vec4) Load 1004
+            1006:  20(i8vec4) VectorShuffle 1005 1003 4 5 6 3
+                              Store 1004 1006
+            1007:      6(int) Load 8(invocation)
+            1008:    600(ptr) AccessChain 34(data) 67 46
+            1009:  20(i8vec4) Load 1008
+            1010:  20(i8vec4) GroupNonUniformIMul 42 ExclusiveScan 1009
+            1011:    600(ptr) AccessChain 34(data) 1007 46
+                              Store 1011 1010
+            1012:      6(int) Load 8(invocation)
+            1013:    593(ptr) AccessChain 34(data) 37 46 38
+            1014:  19(int8_t) Load 1013
+            1015:  19(int8_t) GroupNonUniformUMin 42 ExclusiveScan 1014
+            1016:    593(ptr) AccessChain 34(data) 1012 46 38
+                              Store 1016 1015
+            1017:      6(int) Load 8(invocation)
+            1018:    600(ptr) AccessChain 34(data) 46 46
+            1019:  20(i8vec4) Load 1018
+            1020: 599(i8vec2) VectorShuffle 1019 1019 0 1
+            1021: 599(i8vec2) GroupNonUniformUMin 42 ExclusiveScan 1020
+            1022:    600(ptr) AccessChain 34(data) 1017 46
+            1023:  20(i8vec4) Load 1022
+            1024:  20(i8vec4) VectorShuffle 1023 1021 4 5 2 3
+                              Store 1022 1024
+            1025:      6(int) Load 8(invocation)
+            1026:    600(ptr) AccessChain 34(data) 57 46
+            1027:  20(i8vec4) Load 1026
+            1028: 609(i8vec3) VectorShuffle 1027 1027 0 1 2
+            1029: 609(i8vec3) GroupNonUniformUMin 42 ExclusiveScan 1028
+            1030:    600(ptr) AccessChain 34(data) 1025 46
+            1031:  20(i8vec4) Load 1030
+            1032:  20(i8vec4) VectorShuffle 1031 1029 4 5 6 3
+                              Store 1030 1032
+            1033:      6(int) Load 8(invocation)
+            1034:    600(ptr) AccessChain 34(data) 67 46
+            1035:  20(i8vec4) Load 1034
+            1036:  20(i8vec4) GroupNonUniformUMin 42 ExclusiveScan 1035
+            1037:    600(ptr) AccessChain 34(data) 1033 46
+                              Store 1037 1036
+            1038:      6(int) Load 8(invocation)
+            1039:    593(ptr) AccessChain 34(data) 37 46 38
+            1040:  19(int8_t) Load 1039
+            1041:  19(int8_t) GroupNonUniformUMax 42 ExclusiveScan 1040
+            1042:    593(ptr) AccessChain 34(data) 1038 46 38
+                              Store 1042 1041
+            1043:      6(int) Load 8(invocation)
+            1044:    600(ptr) AccessChain 34(data) 46 46
+            1045:  20(i8vec4) Load 1044
+            1046: 599(i8vec2) VectorShuffle 1045 1045 0 1
+            1047: 599(i8vec2) GroupNonUniformUMax 42 ExclusiveScan 1046
+            1048:    600(ptr) AccessChain 34(data) 1043 46
+            1049:  20(i8vec4) Load 1048
+            1050:  20(i8vec4) VectorShuffle 1049 1047 4 5 2 3
+                              Store 1048 1050
+            1051:      6(int) Load 8(invocation)
+            1052:    600(ptr) AccessChain 34(data) 57 46
+            1053:  20(i8vec4) Load 1052
+            1054: 609(i8vec3) VectorShuffle 1053 1053 0 1 2
+            1055: 609(i8vec3) GroupNonUniformUMax 42 ExclusiveScan 1054
+            1056:    600(ptr) AccessChain 34(data) 1051 46
+            1057:  20(i8vec4) Load 1056
+            1058:  20(i8vec4) VectorShuffle 1057 1055 4 5 6 3
+                              Store 1056 1058
+            1059:      6(int) Load 8(invocation)
+            1060:    600(ptr) AccessChain 34(data) 67 46
+            1061:  20(i8vec4) Load 1060
+            1062:  20(i8vec4) GroupNonUniformUMax 42 ExclusiveScan 1061
+            1063:    600(ptr) AccessChain 34(data) 1059 46
+                              Store 1063 1062
+            1064:      6(int) Load 8(invocation)
+            1065:    593(ptr) AccessChain 34(data) 37 46 38
+            1066:  19(int8_t) Load 1065
+            1067:  19(int8_t) GroupNonUniformBitwiseAnd 42 ExclusiveScan 1066
+            1068:    593(ptr) AccessChain 34(data) 1064 46 38
+                              Store 1068 1067
+            1069:      6(int) Load 8(invocation)
+            1070:    600(ptr) AccessChain 34(data) 46 46
+            1071:  20(i8vec4) Load 1070
+            1072: 599(i8vec2) VectorShuffle 1071 1071 0 1
+            1073: 599(i8vec2) GroupNonUniformBitwiseAnd 42 ExclusiveScan 1072
+            1074:    600(ptr) AccessChain 34(data) 1069 46
+            1075:  20(i8vec4) Load 1074
+            1076:  20(i8vec4) VectorShuffle 1075 1073 4 5 2 3
+                              Store 1074 1076
+            1077:      6(int) Load 8(invocation)
+            1078:    600(ptr) AccessChain 34(data) 57 46
+            1079:  20(i8vec4) Load 1078
+            1080: 609(i8vec3) VectorShuffle 1079 1079 0 1 2
+            1081: 609(i8vec3) GroupNonUniformBitwiseAnd 42 ExclusiveScan 1080
+            1082:    600(ptr) AccessChain 34(data) 1077 46
+            1083:  20(i8vec4) Load 1082
+            1084:  20(i8vec4) VectorShuffle 1083 1081 4 5 6 3
+                              Store 1082 1084
+            1085:      6(int) Load 8(invocation)
+            1086:    600(ptr) AccessChain 34(data) 67 46
+            1087:  20(i8vec4) Load 1086
+            1088:  20(i8vec4) GroupNonUniformBitwiseAnd 42 ExclusiveScan 1087
+            1089:    600(ptr) AccessChain 34(data) 1085 46
+                              Store 1089 1088
+            1090:      6(int) Load 8(invocation)
+            1091:    593(ptr) AccessChain 34(data) 37 46 38
+            1092:  19(int8_t) Load 1091
+            1093:  19(int8_t) GroupNonUniformBitwiseOr 42 ExclusiveScan 1092
+            1094:    593(ptr) AccessChain 34(data) 1090 46 38
+                              Store 1094 1093
+            1095:      6(int) Load 8(invocation)
+            1096:    600(ptr) AccessChain 34(data) 46 46
+            1097:  20(i8vec4) Load 1096
+            1098: 599(i8vec2) VectorShuffle 1097 1097 0 1
+            1099: 599(i8vec2) GroupNonUniformBitwiseOr 42 ExclusiveScan 1098
+            1100:    600(ptr) AccessChain 34(data) 1095 46
+            1101:  20(i8vec4) Load 1100
+            1102:  20(i8vec4) VectorShuffle 1101 1099 4 5 2 3
+                              Store 1100 1102
+            1103:      6(int) Load 8(invocation)
+            1104:    600(ptr) AccessChain 34(data) 57 46
+            1105:  20(i8vec4) Load 1104
+            1106: 609(i8vec3) VectorShuffle 1105 1105 0 1 2
+            1107: 609(i8vec3) GroupNonUniformBitwiseOr 42 ExclusiveScan 1106
+            1108:    600(ptr) AccessChain 34(data) 1103 46
+            1109:  20(i8vec4) Load 1108
+            1110:  20(i8vec4) VectorShuffle 1109 1107 4 5 6 3
+                              Store 1108 1110
+            1111:      6(int) Load 8(invocation)
+            1112:    600(ptr) AccessChain 34(data) 67 46
+            1113:  20(i8vec4) Load 1112
+            1114:  20(i8vec4) GroupNonUniformBitwiseOr 42 ExclusiveScan 1113
+            1115:    600(ptr) AccessChain 34(data) 1111 46
+                              Store 1115 1114
+            1116:      6(int) Load 8(invocation)
+            1117:    593(ptr) AccessChain 34(data) 37 46 38
+            1118:  19(int8_t) Load 1117
+            1119:  19(int8_t) GroupNonUniformBitwiseXor 42 ExclusiveScan 1118
+            1120:    593(ptr) AccessChain 34(data) 1116 46 38
+                              Store 1120 1119
+            1121:      6(int) Load 8(invocation)
+            1122:    600(ptr) AccessChain 34(data) 46 46
+            1123:  20(i8vec4) Load 1122
+            1124: 599(i8vec2) VectorShuffle 1123 1123 0 1
+            1125: 599(i8vec2) GroupNonUniformBitwiseXor 42 ExclusiveScan 1124
+            1126:    600(ptr) AccessChain 34(data) 1121 46
+            1127:  20(i8vec4) Load 1126
+            1128:  20(i8vec4) VectorShuffle 1127 1125 4 5 2 3
+                              Store 1126 1128
+            1129:      6(int) Load 8(invocation)
+            1130:    600(ptr) AccessChain 34(data) 57 46
+            1131:  20(i8vec4) Load 1130
+            1132: 609(i8vec3) VectorShuffle 1131 1131 0 1 2
+            1133: 609(i8vec3) GroupNonUniformBitwiseXor 42 ExclusiveScan 1132
+            1134:    600(ptr) AccessChain 34(data) 1129 46
+            1135:  20(i8vec4) Load 1134
+            1136:  20(i8vec4) VectorShuffle 1135 1133 4 5 6 3
+                              Store 1134 1136
+            1137:      6(int) Load 8(invocation)
+            1138:    600(ptr) AccessChain 34(data) 67 46
+            1139:  20(i8vec4) Load 1138
+            1140:  20(i8vec4) GroupNonUniformBitwiseXor 42 ExclusiveScan 1139
+            1141:    600(ptr) AccessChain 34(data) 1137 46
+                              Store 1141 1140
+            1142:      6(int) Load 8(invocation)
+            1144:   1143(ptr) AccessChain 34(data) 37 57 38
+            1145: 21(int16_t) Load 1144
+            1146: 21(int16_t) GroupNonUniformIAdd 42 Reduce 1145
+            1147:   1143(ptr) AccessChain 34(data) 1142 57 38
+                              Store 1147 1146
+            1148:      6(int) Load 8(invocation)
+            1151:   1150(ptr) AccessChain 34(data) 46 57
+            1152: 22(i16vec4) Load 1151
+            1153:1149(i16vec2) VectorShuffle 1152 1152 0 1
+            1154:1149(i16vec2) GroupNonUniformIAdd 42 Reduce 1153
+            1155:   1150(ptr) AccessChain 34(data) 1148 57
+            1156: 22(i16vec4) Load 1155
+            1157: 22(i16vec4) VectorShuffle 1156 1154 4 5 2 3
+                              Store 1155 1157
+            1158:      6(int) Load 8(invocation)
+            1160:   1150(ptr) AccessChain 34(data) 57 57
+            1161: 22(i16vec4) Load 1160
+            1162:1159(i16vec3) VectorShuffle 1161 1161 0 1 2
+            1163:1159(i16vec3) GroupNonUniformIAdd 42 Reduce 1162
+            1164:   1150(ptr) AccessChain 34(data) 1158 57
+            1165: 22(i16vec4) Load 1164
+            1166: 22(i16vec4) VectorShuffle 1165 1163 4 5 6 3
+                              Store 1164 1166
+            1167:      6(int) Load 8(invocation)
+            1168:   1150(ptr) AccessChain 34(data) 67 57
+            1169: 22(i16vec4) Load 1168
+            1170: 22(i16vec4) GroupNonUniformIAdd 42 Reduce 1169
+            1171:   1150(ptr) AccessChain 34(data) 1167 57
+                              Store 1171 1170
+            1172:      6(int) Load 8(invocation)
+            1173:   1143(ptr) AccessChain 34(data) 37 57 38
+            1174: 21(int16_t) Load 1173
+            1175: 21(int16_t) GroupNonUniformIMul 42 Reduce 1174
+            1176:   1143(ptr) AccessChain 34(data) 1172 57 38
+                              Store 1176 1175
+            1177:      6(int) Load 8(invocation)
+            1178:   1150(ptr) AccessChain 34(data) 46 57
+            1179: 22(i16vec4) Load 1178
+            1180:1149(i16vec2) VectorShuffle 1179 1179 0 1
+            1181:1149(i16vec2) GroupNonUniformIMul 42 Reduce 1180
+            1182:   1150(ptr) AccessChain 34(data) 1177 57
+            1183: 22(i16vec4) Load 1182
+            1184: 22(i16vec4) VectorShuffle 1183 1181 4 5 2 3
+                              Store 1182 1184
+            1185:      6(int) Load 8(invocation)
+            1186:   1150(ptr) AccessChain 34(data) 57 57
+            1187: 22(i16vec4) Load 1186
+            1188:1159(i16vec3) VectorShuffle 1187 1187 0 1 2
+            1189:1159(i16vec3) GroupNonUniformIMul 42 Reduce 1188
+            1190:   1150(ptr) AccessChain 34(data) 1185 57
+            1191: 22(i16vec4) Load 1190
+            1192: 22(i16vec4) VectorShuffle 1191 1189 4 5 6 3
+                              Store 1190 1192
+            1193:      6(int) Load 8(invocation)
+            1194:   1150(ptr) AccessChain 34(data) 67 57
+            1195: 22(i16vec4) Load 1194
+            1196: 22(i16vec4) GroupNonUniformIMul 42 Reduce 1195
+            1197:   1150(ptr) AccessChain 34(data) 1193 57
+                              Store 1197 1196
+            1198:      6(int) Load 8(invocation)
+            1199:   1143(ptr) AccessChain 34(data) 37 57 38
+            1200: 21(int16_t) Load 1199
+            1201: 21(int16_t) GroupNonUniformSMin 42 Reduce 1200
+            1202:   1143(ptr) AccessChain 34(data) 1198 57 38
+                              Store 1202 1201
+            1203:      6(int) Load 8(invocation)
+            1204:   1150(ptr) AccessChain 34(data) 46 57
+            1205: 22(i16vec4) Load 1204
+            1206:1149(i16vec2) VectorShuffle 1205 1205 0 1
+            1207:1149(i16vec2) GroupNonUniformSMin 42 Reduce 1206
+            1208:   1150(ptr) AccessChain 34(data) 1203 57
+            1209: 22(i16vec4) Load 1208
+            1210: 22(i16vec4) VectorShuffle 1209 1207 4 5 2 3
+                              Store 1208 1210
+            1211:      6(int) Load 8(invocation)
+            1212:   1150(ptr) AccessChain 34(data) 57 57
+            1213: 22(i16vec4) Load 1212
+            1214:1159(i16vec3) VectorShuffle 1213 1213 0 1 2
+            1215:1159(i16vec3) GroupNonUniformSMin 42 Reduce 1214
+            1216:   1150(ptr) AccessChain 34(data) 1211 57
+            1217: 22(i16vec4) Load 1216
+            1218: 22(i16vec4) VectorShuffle 1217 1215 4 5 6 3
+                              Store 1216 1218
+            1219:      6(int) Load 8(invocation)
+            1220:   1150(ptr) AccessChain 34(data) 67 57
+            1221: 22(i16vec4) Load 1220
+            1222: 22(i16vec4) GroupNonUniformSMin 42 Reduce 1221
+            1223:   1150(ptr) AccessChain 34(data) 1219 57
+                              Store 1223 1222
+            1224:      6(int) Load 8(invocation)
+            1225:   1143(ptr) AccessChain 34(data) 37 57 38
+            1226: 21(int16_t) Load 1225
+            1227: 21(int16_t) GroupNonUniformSMax 42 Reduce 1226
+            1228:   1143(ptr) AccessChain 34(data) 1224 57 38
+                              Store 1228 1227
+            1229:      6(int) Load 8(invocation)
+            1230:   1150(ptr) AccessChain 34(data) 46 57
+            1231: 22(i16vec4) Load 1230
+            1232:1149(i16vec2) VectorShuffle 1231 1231 0 1
+            1233:1149(i16vec2) GroupNonUniformSMax 42 Reduce 1232
+            1234:   1150(ptr) AccessChain 34(data) 1229 57
+            1235: 22(i16vec4) Load 1234
+            1236: 22(i16vec4) VectorShuffle 1235 1233 4 5 2 3
+                              Store 1234 1236
+            1237:      6(int) Load 8(invocation)
+            1238:   1150(ptr) AccessChain 34(data) 57 57
+            1239: 22(i16vec4) Load 1238
+            1240:1159(i16vec3) VectorShuffle 1239 1239 0 1 2
+            1241:1159(i16vec3) GroupNonUniformSMax 42 Reduce 1240
+            1242:   1150(ptr) AccessChain 34(data) 1237 57
+            1243: 22(i16vec4) Load 1242
+            1244: 22(i16vec4) VectorShuffle 1243 1241 4 5 6 3
+                              Store 1242 1244
+            1245:      6(int) Load 8(invocation)
+            1246:   1150(ptr) AccessChain 34(data) 67 57
+            1247: 22(i16vec4) Load 1246
+            1248: 22(i16vec4) GroupNonUniformSMax 42 Reduce 1247
+            1249:   1150(ptr) AccessChain 34(data) 1245 57
+                              Store 1249 1248
+            1250:      6(int) Load 8(invocation)
+            1251:   1143(ptr) AccessChain 34(data) 37 57 38
+            1252: 21(int16_t) Load 1251
+            1253: 21(int16_t) GroupNonUniformBitwiseAnd 42 Reduce 1252
+            1254:   1143(ptr) AccessChain 34(data) 1250 57 38
+                              Store 1254 1253
+            1255:      6(int) Load 8(invocation)
+            1256:   1150(ptr) AccessChain 34(data) 46 57
+            1257: 22(i16vec4) Load 1256
+            1258:1149(i16vec2) VectorShuffle 1257 1257 0 1
+            1259:1149(i16vec2) GroupNonUniformBitwiseAnd 42 Reduce 1258
+            1260:   1150(ptr) AccessChain 34(data) 1255 57
+            1261: 22(i16vec4) Load 1260
+            1262: 22(i16vec4) VectorShuffle 1261 1259 4 5 2 3
+                              Store 1260 1262
+            1263:      6(int) Load 8(invocation)
+            1264:   1150(ptr) AccessChain 34(data) 57 57
+            1265: 22(i16vec4) Load 1264
+            1266:1159(i16vec3) VectorShuffle 1265 1265 0 1 2
+            1267:1159(i16vec3) GroupNonUniformBitwiseAnd 42 Reduce 1266
+            1268:   1150(ptr) AccessChain 34(data) 1263 57
+            1269: 22(i16vec4) Load 1268
+            1270: 22(i16vec4) VectorShuffle 1269 1267 4 5 6 3
+                              Store 1268 1270
+            1271:      6(int) Load 8(invocation)
+            1272:   1150(ptr) AccessChain 34(data) 67 57
+            1273: 22(i16vec4) Load 1272
+            1274: 22(i16vec4) GroupNonUniformBitwiseAnd 42 Reduce 1273
+            1275:   1150(ptr) AccessChain 34(data) 1271 57
+                              Store 1275 1274
+            1276:      6(int) Load 8(invocation)
+            1277:   1143(ptr) AccessChain 34(data) 37 57 38
+            1278: 21(int16_t) Load 1277
+            1279: 21(int16_t) GroupNonUniformBitwiseOr 42 Reduce 1278
+            1280:   1143(ptr) AccessChain 34(data) 1276 57 38
+                              Store 1280 1279
+            1281:      6(int) Load 8(invocation)
+            1282:   1150(ptr) AccessChain 34(data) 46 57
+            1283: 22(i16vec4) Load 1282
+            1284:1149(i16vec2) VectorShuffle 1283 1283 0 1
+            1285:1149(i16vec2) GroupNonUniformBitwiseOr 42 Reduce 1284
+            1286:   1150(ptr) AccessChain 34(data) 1281 57
+            1287: 22(i16vec4) Load 1286
+            1288: 22(i16vec4) VectorShuffle 1287 1285 4 5 2 3
+                              Store 1286 1288
+            1289:      6(int) Load 8(invocation)
+            1290:   1150(ptr) AccessChain 34(data) 57 57
+            1291: 22(i16vec4) Load 1290
+            1292:1159(i16vec3) VectorShuffle 1291 1291 0 1 2
+            1293:1159(i16vec3) GroupNonUniformBitwiseOr 42 Reduce 1292
+            1294:   1150(ptr) AccessChain 34(data) 1289 57
+            1295: 22(i16vec4) Load 1294
+            1296: 22(i16vec4) VectorShuffle 1295 1293 4 5 6 3
+                              Store 1294 1296
+            1297:      6(int) Load 8(invocation)
+            1298:   1150(ptr) AccessChain 34(data) 67 57
+            1299: 22(i16vec4) Load 1298
+            1300: 22(i16vec4) GroupNonUniformBitwiseOr 42 Reduce 1299
+            1301:   1150(ptr) AccessChain 34(data) 1297 57
+                              Store 1301 1300
+            1302:      6(int) Load 8(invocation)
+            1303:   1143(ptr) AccessChain 34(data) 37 57 38
+            1304: 21(int16_t) Load 1303
+            1305: 21(int16_t) GroupNonUniformBitwiseXor 42 Reduce 1304
+            1306:   1143(ptr) AccessChain 34(data) 1302 57 38
+                              Store 1306 1305
+            1307:      6(int) Load 8(invocation)
+            1308:   1150(ptr) AccessChain 34(data) 46 57
+            1309: 22(i16vec4) Load 1308
+            1310:1149(i16vec2) VectorShuffle 1309 1309 0 1
+            1311:1149(i16vec2) GroupNonUniformBitwiseXor 42 Reduce 1310
+            1312:   1150(ptr) AccessChain 34(data) 1307 57
+            1313: 22(i16vec4) Load 1312
+            1314: 22(i16vec4) VectorShuffle 1313 1311 4 5 2 3
+                              Store 1312 1314
+            1315:      6(int) Load 8(invocation)
+            1316:   1150(ptr) AccessChain 34(data) 57 57
+            1317: 22(i16vec4) Load 1316
+            1318:1159(i16vec3) VectorShuffle 1317 1317 0 1 2
+            1319:1159(i16vec3) GroupNonUniformBitwiseXor 42 Reduce 1318
+            1320:   1150(ptr) AccessChain 34(data) 1315 57
+            1321: 22(i16vec4) Load 1320
+            1322: 22(i16vec4) VectorShuffle 1321 1319 4 5 6 3
+                              Store 1320 1322
+            1323:      6(int) Load 8(invocation)
+            1324:   1150(ptr) AccessChain 34(data) 67 57
+            1325: 22(i16vec4) Load 1324
+            1326: 22(i16vec4) GroupNonUniformBitwiseXor 42 Reduce 1325
+            1327:   1150(ptr) AccessChain 34(data) 1323 57
+                              Store 1327 1326
+            1328:      6(int) Load 8(invocation)
+            1329:   1143(ptr) AccessChain 34(data) 37 57 38
+            1330: 21(int16_t) Load 1329
+            1331: 21(int16_t) GroupNonUniformIAdd 42 InclusiveScan 1330
+            1332:   1143(ptr) AccessChain 34(data) 1328 57 38
+                              Store 1332 1331
+            1333:      6(int) Load 8(invocation)
+            1334:   1150(ptr) AccessChain 34(data) 46 57
+            1335: 22(i16vec4) Load 1334
+            1336:1149(i16vec2) VectorShuffle 1335 1335 0 1
+            1337:1149(i16vec2) GroupNonUniformIAdd 42 InclusiveScan 1336
+            1338:   1150(ptr) AccessChain 34(data) 1333 57
+            1339: 22(i16vec4) Load 1338
+            1340: 22(i16vec4) VectorShuffle 1339 1337 4 5 2 3
+                              Store 1338 1340
+            1341:      6(int) Load 8(invocation)
+            1342:   1150(ptr) AccessChain 34(data) 57 57
+            1343: 22(i16vec4) Load 1342
+            1344:1159(i16vec3) VectorShuffle 1343 1343 0 1 2
+            1345:1159(i16vec3) GroupNonUniformIAdd 42 InclusiveScan 1344
+            1346:   1150(ptr) AccessChain 34(data) 1341 57
+            1347: 22(i16vec4) Load 1346
+            1348: 22(i16vec4) VectorShuffle 1347 1345 4 5 6 3
+                              Store 1346 1348
+            1349:      6(int) Load 8(invocation)
+            1350:   1150(ptr) AccessChain 34(data) 67 57
+            1351: 22(i16vec4) Load 1350
+            1352: 22(i16vec4) GroupNonUniformIAdd 42 InclusiveScan 1351
+            1353:   1150(ptr) AccessChain 34(data) 1349 57
+                              Store 1353 1352
+            1354:      6(int) Load 8(invocation)
+            1355:   1143(ptr) AccessChain 34(data) 37 57 38
+            1356: 21(int16_t) Load 1355
+            1357: 21(int16_t) GroupNonUniformIMul 42 InclusiveScan 1356
+            1358:   1143(ptr) AccessChain 34(data) 1354 57 38
+                              Store 1358 1357
+            1359:      6(int) Load 8(invocation)
+            1360:   1150(ptr) AccessChain 34(data) 46 57
+            1361: 22(i16vec4) Load 1360
+            1362:1149(i16vec2) VectorShuffle 1361 1361 0 1
+            1363:1149(i16vec2) GroupNonUniformIMul 42 InclusiveScan 1362
+            1364:   1150(ptr) AccessChain 34(data) 1359 57
+            1365: 22(i16vec4) Load 1364
+            1366: 22(i16vec4) VectorShuffle 1365 1363 4 5 2 3
+                              Store 1364 1366
+            1367:      6(int) Load 8(invocation)
+            1368:   1150(ptr) AccessChain 34(data) 57 57
+            1369: 22(i16vec4) Load 1368
+            1370:1159(i16vec3) VectorShuffle 1369 1369 0 1 2
+            1371:1159(i16vec3) GroupNonUniformIMul 42 InclusiveScan 1370
+            1372:   1150(ptr) AccessChain 34(data) 1367 57
+            1373: 22(i16vec4) Load 1372
+            1374: 22(i16vec4) VectorShuffle 1373 1371 4 5 6 3
+                              Store 1372 1374
+            1375:      6(int) Load 8(invocation)
+            1376:   1150(ptr) AccessChain 34(data) 67 57
+            1377: 22(i16vec4) Load 1376
+            1378: 22(i16vec4) GroupNonUniformIMul 42 InclusiveScan 1377
+            1379:   1150(ptr) AccessChain 34(data) 1375 57
+                              Store 1379 1378
+            1380:      6(int) Load 8(invocation)
+            1381:   1143(ptr) AccessChain 34(data) 37 57 38
+            1382: 21(int16_t) Load 1381
+            1383: 21(int16_t) GroupNonUniformSMin 42 InclusiveScan 1382
+            1384:   1143(ptr) AccessChain 34(data) 1380 57 38
+                              Store 1384 1383
+            1385:      6(int) Load 8(invocation)
+            1386:   1150(ptr) AccessChain 34(data) 46 57
+            1387: 22(i16vec4) Load 1386
+            1388:1149(i16vec2) VectorShuffle 1387 1387 0 1
+            1389:1149(i16vec2) GroupNonUniformSMin 42 InclusiveScan 1388
+            1390:   1150(ptr) AccessChain 34(data) 1385 57
+            1391: 22(i16vec4) Load 1390
+            1392: 22(i16vec4) VectorShuffle 1391 1389 4 5 2 3
+                              Store 1390 1392
+            1393:      6(int) Load 8(invocation)
+            1394:   1150(ptr) AccessChain 34(data) 57 57
+            1395: 22(i16vec4) Load 1394
+            1396:1159(i16vec3) VectorShuffle 1395 1395 0 1 2
+            1397:1159(i16vec3) GroupNonUniformSMin 42 InclusiveScan 1396
+            1398:   1150(ptr) AccessChain 34(data) 1393 57
+            1399: 22(i16vec4) Load 1398
+            1400: 22(i16vec4) VectorShuffle 1399 1397 4 5 6 3
+                              Store 1398 1400
+            1401:      6(int) Load 8(invocation)
+            1402:   1150(ptr) AccessChain 34(data) 67 57
+            1403: 22(i16vec4) Load 1402
+            1404: 22(i16vec4) GroupNonUniformSMin 42 InclusiveScan 1403
+            1405:   1150(ptr) AccessChain 34(data) 1401 57
+                              Store 1405 1404
+            1406:      6(int) Load 8(invocation)
+            1407:   1143(ptr) AccessChain 34(data) 37 57 38
+            1408: 21(int16_t) Load 1407
+            1409: 21(int16_t) GroupNonUniformSMax 42 InclusiveScan 1408
+            1410:   1143(ptr) AccessChain 34(data) 1406 57 38
+                              Store 1410 1409
+            1411:      6(int) Load 8(invocation)
+            1412:   1150(ptr) AccessChain 34(data) 46 57
+            1413: 22(i16vec4) Load 1412
+            1414:1149(i16vec2) VectorShuffle 1413 1413 0 1
+            1415:1149(i16vec2) GroupNonUniformSMax 42 InclusiveScan 1414
+            1416:   1150(ptr) AccessChain 34(data) 1411 57
+            1417: 22(i16vec4) Load 1416
+            1418: 22(i16vec4) VectorShuffle 1417 1415 4 5 2 3
+                              Store 1416 1418
+            1419:      6(int) Load 8(invocation)
+            1420:   1150(ptr) AccessChain 34(data) 57 57
+            1421: 22(i16vec4) Load 1420
+            1422:1159(i16vec3) VectorShuffle 1421 1421 0 1 2
+            1423:1159(i16vec3) GroupNonUniformSMax 42 InclusiveScan 1422
+            1424:   1150(ptr) AccessChain 34(data) 1419 57
+            1425: 22(i16vec4) Load 1424
+            1426: 22(i16vec4) VectorShuffle 1425 1423 4 5 6 3
+                              Store 1424 1426
+            1427:      6(int) Load 8(invocation)
+            1428:   1150(ptr) AccessChain 34(data) 67 57
+            1429: 22(i16vec4) Load 1428
+            1430: 22(i16vec4) GroupNonUniformSMax 42 InclusiveScan 1429
+            1431:   1150(ptr) AccessChain 34(data) 1427 57
+                              Store 1431 1430
+            1432:      6(int) Load 8(invocation)
+            1433:   1143(ptr) AccessChain 34(data) 37 57 38
+            1434: 21(int16_t) Load 1433
+            1435: 21(int16_t) GroupNonUniformBitwiseAnd 42 InclusiveScan 1434
+            1436:   1143(ptr) AccessChain 34(data) 1432 57 38
+                              Store 1436 1435
+            1437:      6(int) Load 8(invocation)
+            1438:   1150(ptr) AccessChain 34(data) 46 57
+            1439: 22(i16vec4) Load 1438
+            1440:1149(i16vec2) VectorShuffle 1439 1439 0 1
+            1441:1149(i16vec2) GroupNonUniformBitwiseAnd 42 InclusiveScan 1440
+            1442:   1150(ptr) AccessChain 34(data) 1437 57
+            1443: 22(i16vec4) Load 1442
+            1444: 22(i16vec4) VectorShuffle 1443 1441 4 5 2 3
+                              Store 1442 1444
+            1445:      6(int) Load 8(invocation)
+            1446:   1150(ptr) AccessChain 34(data) 57 57
+            1447: 22(i16vec4) Load 1446
+            1448:1159(i16vec3) VectorShuffle 1447 1447 0 1 2
+            1449:1159(i16vec3) GroupNonUniformBitwiseAnd 42 InclusiveScan 1448
+            1450:   1150(ptr) AccessChain 34(data) 1445 57
+            1451: 22(i16vec4) Load 1450
+            1452: 22(i16vec4) VectorShuffle 1451 1449 4 5 6 3
+                              Store 1450 1452
+            1453:      6(int) Load 8(invocation)
+            1454:   1150(ptr) AccessChain 34(data) 67 57
+            1455: 22(i16vec4) Load 1454
+            1456: 22(i16vec4) GroupNonUniformBitwiseAnd 42 InclusiveScan 1455
+            1457:   1150(ptr) AccessChain 34(data) 1453 57
+                              Store 1457 1456
+            1458:      6(int) Load 8(invocation)
+            1459:   1143(ptr) AccessChain 34(data) 37 57 38
+            1460: 21(int16_t) Load 1459
+            1461: 21(int16_t) GroupNonUniformBitwiseOr 42 InclusiveScan 1460
+            1462:   1143(ptr) AccessChain 34(data) 1458 57 38
+                              Store 1462 1461
+            1463:      6(int) Load 8(invocation)
+            1464:   1150(ptr) AccessChain 34(data) 46 57
+            1465: 22(i16vec4) Load 1464
+            1466:1149(i16vec2) VectorShuffle 1465 1465 0 1
+            1467:1149(i16vec2) GroupNonUniformBitwiseOr 42 InclusiveScan 1466
+            1468:   1150(ptr) AccessChain 34(data) 1463 57
+            1469: 22(i16vec4) Load 1468
+            1470: 22(i16vec4) VectorShuffle 1469 1467 4 5 2 3
+                              Store 1468 1470
+            1471:      6(int) Load 8(invocation)
+            1472:   1150(ptr) AccessChain 34(data) 57 57
+            1473: 22(i16vec4) Load 1472
+            1474:1159(i16vec3) VectorShuffle 1473 1473 0 1 2
+            1475:1159(i16vec3) GroupNonUniformBitwiseOr 42 InclusiveScan 1474
+            1476:   1150(ptr) AccessChain 34(data) 1471 57
+            1477: 22(i16vec4) Load 1476
+            1478: 22(i16vec4) VectorShuffle 1477 1475 4 5 6 3
+                              Store 1476 1478
+            1479:      6(int) Load 8(invocation)
+            1480:   1150(ptr) AccessChain 34(data) 67 57
+            1481: 22(i16vec4) Load 1480
+            1482: 22(i16vec4) GroupNonUniformBitwiseOr 42 InclusiveScan 1481
+            1483:   1150(ptr) AccessChain 34(data) 1479 57
+                              Store 1483 1482
+            1484:      6(int) Load 8(invocation)
+            1485:   1143(ptr) AccessChain 34(data) 37 57 38
+            1486: 21(int16_t) Load 1485
+            1487: 21(int16_t) GroupNonUniformBitwiseXor 42 InclusiveScan 1486
+            1488:   1143(ptr) AccessChain 34(data) 1484 57 38
+                              Store 1488 1487
+            1489:      6(int) Load 8(invocation)
+            1490:   1150(ptr) AccessChain 34(data) 46 57
+            1491: 22(i16vec4) Load 1490
+            1492:1149(i16vec2) VectorShuffle 1491 1491 0 1
+            1493:1149(i16vec2) GroupNonUniformBitwiseXor 42 InclusiveScan 1492
+            1494:   1150(ptr) AccessChain 34(data) 1489 57
+            1495: 22(i16vec4) Load 1494
+            1496: 22(i16vec4) VectorShuffle 1495 1493 4 5 2 3
+                              Store 1494 1496
+            1497:      6(int) Load 8(invocation)
+            1498:   1150(ptr) AccessChain 34(data) 57 57
+            1499: 22(i16vec4) Load 1498
+            1500:1159(i16vec3) VectorShuffle 1499 1499 0 1 2
+            1501:1159(i16vec3) GroupNonUniformBitwiseXor 42 InclusiveScan 1500
+            1502:   1150(ptr) AccessChain 34(data) 1497 57
+            1503: 22(i16vec4) Load 1502
+            1504: 22(i16vec4) VectorShuffle 1503 1501 4 5 6 3
+                              Store 1502 1504
+            1505:      6(int) Load 8(invocation)
+            1506:   1150(ptr) AccessChain 34(data) 67 57
+            1507: 22(i16vec4) Load 1506
+            1508: 22(i16vec4) GroupNonUniformBitwiseXor 42 InclusiveScan 1507
+            1509:   1150(ptr) AccessChain 34(data) 1505 57
+                              Store 1509 1508
+            1510:      6(int) Load 8(invocation)
+            1511:   1143(ptr) AccessChain 34(data) 37 57 38
+            1512: 21(int16_t) Load 1511
+            1513: 21(int16_t) GroupNonUniformIAdd 42 ExclusiveScan 1512
+            1514:   1143(ptr) AccessChain 34(data) 1510 57 38
+                              Store 1514 1513
+            1515:      6(int) Load 8(invocation)
+            1516:   1150(ptr) AccessChain 34(data) 46 57
+            1517: 22(i16vec4) Load 1516
+            1518:1149(i16vec2) VectorShuffle 1517 1517 0 1
+            1519:1149(i16vec2) GroupNonUniformIAdd 42 ExclusiveScan 1518
+            1520:   1150(ptr) AccessChain 34(data) 1515 57
+            1521: 22(i16vec4) Load 1520
+            1522: 22(i16vec4) VectorShuffle 1521 1519 4 5 2 3
+                              Store 1520 1522
+            1523:      6(int) Load 8(invocation)
+            1524:   1150(ptr) AccessChain 34(data) 57 57
+            1525: 22(i16vec4) Load 1524
+            1526:1159(i16vec3) VectorShuffle 1525 1525 0 1 2
+            1527:1159(i16vec3) GroupNonUniformIAdd 42 ExclusiveScan 1526
+            1528:   1150(ptr) AccessChain 34(data) 1523 57
+            1529: 22(i16vec4) Load 1528
+            1530: 22(i16vec4) VectorShuffle 1529 1527 4 5 6 3
+                              Store 1528 1530
+            1531:      6(int) Load 8(invocation)
+            1532:   1150(ptr) AccessChain 34(data) 67 57
+            1533: 22(i16vec4) Load 1532
+            1534: 22(i16vec4) GroupNonUniformIAdd 42 ExclusiveScan 1533
+            1535:   1150(ptr) AccessChain 34(data) 1531 57
+                              Store 1535 1534
+            1536:      6(int) Load 8(invocation)
+            1537:   1143(ptr) AccessChain 34(data) 37 57 38
+            1538: 21(int16_t) Load 1537
+            1539: 21(int16_t) GroupNonUniformIMul 42 ExclusiveScan 1538
+            1540:   1143(ptr) AccessChain 34(data) 1536 57 38
+                              Store 1540 1539
+            1541:      6(int) Load 8(invocation)
+            1542:   1150(ptr) AccessChain 34(data) 46 57
+            1543: 22(i16vec4) Load 1542
+            1544:1149(i16vec2) VectorShuffle 1543 1543 0 1
+            1545:1149(i16vec2) GroupNonUniformIMul 42 ExclusiveScan 1544
+            1546:   1150(ptr) AccessChain 34(data) 1541 57
+            1547: 22(i16vec4) Load 1546
+            1548: 22(i16vec4) VectorShuffle 1547 1545 4 5 2 3
+                              Store 1546 1548
+            1549:      6(int) Load 8(invocation)
+            1550:   1150(ptr) AccessChain 34(data) 57 57
+            1551: 22(i16vec4) Load 1550
+            1552:1159(i16vec3) VectorShuffle 1551 1551 0 1 2
+            1553:1159(i16vec3) GroupNonUniformIMul 42 ExclusiveScan 1552
+            1554:   1150(ptr) AccessChain 34(data) 1549 57
+            1555: 22(i16vec4) Load 1554
+            1556: 22(i16vec4) VectorShuffle 1555 1553 4 5 6 3
+                              Store 1554 1556
+            1557:      6(int) Load 8(invocation)
+            1558:   1150(ptr) AccessChain 34(data) 67 57
+            1559: 22(i16vec4) Load 1558
+            1560: 22(i16vec4) GroupNonUniformIMul 42 ExclusiveScan 1559
+            1561:   1150(ptr) AccessChain 34(data) 1557 57
+                              Store 1561 1560
+            1562:      6(int) Load 8(invocation)
+            1563:   1143(ptr) AccessChain 34(data) 37 57 38
+            1564: 21(int16_t) Load 1563
+            1565: 21(int16_t) GroupNonUniformSMin 42 ExclusiveScan 1564
+            1566:   1143(ptr) AccessChain 34(data) 1562 57 38
+                              Store 1566 1565
+            1567:      6(int) Load 8(invocation)
+            1568:   1150(ptr) AccessChain 34(data) 46 57
+            1569: 22(i16vec4) Load 1568
+            1570:1149(i16vec2) VectorShuffle 1569 1569 0 1
+            1571:1149(i16vec2) GroupNonUniformSMin 42 ExclusiveScan 1570
+            1572:   1150(ptr) AccessChain 34(data) 1567 57
+            1573: 22(i16vec4) Load 1572
+            1574: 22(i16vec4) VectorShuffle 1573 1571 4 5 2 3
+                              Store 1572 1574
+            1575:      6(int) Load 8(invocation)
+            1576:   1150(ptr) AccessChain 34(data) 57 57
+            1577: 22(i16vec4) Load 1576
+            1578:1159(i16vec3) VectorShuffle 1577 1577 0 1 2
+            1579:1159(i16vec3) GroupNonUniformSMin 42 ExclusiveScan 1578
+            1580:   1150(ptr) AccessChain 34(data) 1575 57
+            1581: 22(i16vec4) Load 1580
+            1582: 22(i16vec4) VectorShuffle 1581 1579 4 5 6 3
+                              Store 1580 1582
+            1583:      6(int) Load 8(invocation)
+            1584:   1150(ptr) AccessChain 34(data) 67 57
+            1585: 22(i16vec4) Load 1584
+            1586: 22(i16vec4) GroupNonUniformSMin 42 ExclusiveScan 1585
+            1587:   1150(ptr) AccessChain 34(data) 1583 57
+                              Store 1587 1586
+            1588:      6(int) Load 8(invocation)
+            1589:   1143(ptr) AccessChain 34(data) 37 57 38
+            1590: 21(int16_t) Load 1589
+            1591: 21(int16_t) GroupNonUniformSMax 42 ExclusiveScan 1590
+            1592:   1143(ptr) AccessChain 34(data) 1588 57 38
+                              Store 1592 1591
+            1593:      6(int) Load 8(invocation)
+            1594:   1150(ptr) AccessChain 34(data) 46 57
+            1595: 22(i16vec4) Load 1594
+            1596:1149(i16vec2) VectorShuffle 1595 1595 0 1
+            1597:1149(i16vec2) GroupNonUniformSMax 42 ExclusiveScan 1596
+            1598:   1150(ptr) AccessChain 34(data) 1593 57
+            1599: 22(i16vec4) Load 1598
+            1600: 22(i16vec4) VectorShuffle 1599 1597 4 5 2 3
+                              Store 1598 1600
+            1601:      6(int) Load 8(invocation)
+            1602:   1150(ptr) AccessChain 34(data) 57 57
+            1603: 22(i16vec4) Load 1602
+            1604:1159(i16vec3) VectorShuffle 1603 1603 0 1 2
+            1605:1159(i16vec3) GroupNonUniformSMax 42 ExclusiveScan 1604
+            1606:   1150(ptr) AccessChain 34(data) 1601 57
+            1607: 22(i16vec4) Load 1606
+            1608: 22(i16vec4) VectorShuffle 1607 1605 4 5 6 3
+                              Store 1606 1608
+            1609:      6(int) Load 8(invocation)
+            1610:   1150(ptr) AccessChain 34(data) 67 57
+            1611: 22(i16vec4) Load 1610
+            1612: 22(i16vec4) GroupNonUniformSMax 42 ExclusiveScan 1611
+            1613:   1150(ptr) AccessChain 34(data) 1609 57
+                              Store 1613 1612
+            1614:      6(int) Load 8(invocation)
+            1615:   1143(ptr) AccessChain 34(data) 37 57 38
+            1616: 21(int16_t) Load 1615
+            1617: 21(int16_t) GroupNonUniformBitwiseAnd 42 ExclusiveScan 1616
+            1618:   1143(ptr) AccessChain 34(data) 1614 57 38
+                              Store 1618 1617
+            1619:      6(int) Load 8(invocation)
+            1620:   1150(ptr) AccessChain 34(data) 46 57
+            1621: 22(i16vec4) Load 1620
+            1622:1149(i16vec2) VectorShuffle 1621 1621 0 1
+            1623:1149(i16vec2) GroupNonUniformBitwiseAnd 42 ExclusiveScan 1622
+            1624:   1150(ptr) AccessChain 34(data) 1619 57
+            1625: 22(i16vec4) Load 1624
+            1626: 22(i16vec4) VectorShuffle 1625 1623 4 5 2 3
+                              Store 1624 1626
+            1627:      6(int) Load 8(invocation)
+            1628:   1150(ptr) AccessChain 34(data) 57 57
+            1629: 22(i16vec4) Load 1628
+            1630:1159(i16vec3) VectorShuffle 1629 1629 0 1 2
+            1631:1159(i16vec3) GroupNonUniformBitwiseAnd 42 ExclusiveScan 1630
+            1632:   1150(ptr) AccessChain 34(data) 1627 57
+            1633: 22(i16vec4) Load 1632
+            1634: 22(i16vec4) VectorShuffle 1633 1631 4 5 6 3
+                              Store 1632 1634
+            1635:      6(int) Load 8(invocation)
+            1636:   1150(ptr) AccessChain 34(data) 67 57
+            1637: 22(i16vec4) Load 1636
+            1638: 22(i16vec4) GroupNonUniformBitwiseAnd 42 ExclusiveScan 1637
+            1639:   1150(ptr) AccessChain 34(data) 1635 57
+                              Store 1639 1638
+            1640:      6(int) Load 8(invocation)
+            1641:   1143(ptr) AccessChain 34(data) 37 57 38
+            1642: 21(int16_t) Load 1641
+            1643: 21(int16_t) GroupNonUniformBitwiseOr 42 ExclusiveScan 1642
+            1644:   1143(ptr) AccessChain 34(data) 1640 57 38
+                              Store 1644 1643
+            1645:      6(int) Load 8(invocation)
+            1646:   1150(ptr) AccessChain 34(data) 46 57
+            1647: 22(i16vec4) Load 1646
+            1648:1149(i16vec2) VectorShuffle 1647 1647 0 1
+            1649:1149(i16vec2) GroupNonUniformBitwiseOr 42 ExclusiveScan 1648
+            1650:   1150(ptr) AccessChain 34(data) 1645 57
+            1651: 22(i16vec4) Load 1650
+            1652: 22(i16vec4) VectorShuffle 1651 1649 4 5 2 3
+                              Store 1650 1652
+            1653:      6(int) Load 8(invocation)
+            1654:   1150(ptr) AccessChain 34(data) 57 57
+            1655: 22(i16vec4) Load 1654
+            1656:1159(i16vec3) VectorShuffle 1655 1655 0 1 2
+            1657:1159(i16vec3) GroupNonUniformBitwiseOr 42 ExclusiveScan 1656
+            1658:   1150(ptr) AccessChain 34(data) 1653 57
+            1659: 22(i16vec4) Load 1658
+            1660: 22(i16vec4) VectorShuffle 1659 1657 4 5 6 3
+                              Store 1658 1660
+            1661:      6(int) Load 8(invocation)
+            1662:   1150(ptr) AccessChain 34(data) 67 57
+            1663: 22(i16vec4) Load 1662
+            1664: 22(i16vec4) GroupNonUniformBitwiseOr 42 ExclusiveScan 1663
+            1665:   1150(ptr) AccessChain 34(data) 1661 57
+                              Store 1665 1664
+            1666:      6(int) Load 8(invocation)
+            1667:   1143(ptr) AccessChain 34(data) 37 57 38
+            1668: 21(int16_t) Load 1667
+            1669: 21(int16_t) GroupNonUniformBitwiseXor 42 ExclusiveScan 1668
+            1670:   1143(ptr) AccessChain 34(data) 1666 57 38
+                              Store 1670 1669
+            1671:      6(int) Load 8(invocation)
+            1672:   1150(ptr) AccessChain 34(data) 46 57
+            1673: 22(i16vec4) Load 1672
+            1674:1149(i16vec2) VectorShuffle 1673 1673 0 1
+            1675:1149(i16vec2) GroupNonUniformBitwiseXor 42 ExclusiveScan 1674
+            1676:   1150(ptr) AccessChain 34(data) 1671 57
+            1677: 22(i16vec4) Load 1676
+            1678: 22(i16vec4) VectorShuffle 1677 1675 4 5 2 3
+                              Store 1676 1678
+            1679:      6(int) Load 8(invocation)
+            1680:   1150(ptr) AccessChain 34(data) 57 57
+            1681: 22(i16vec4) Load 1680
+            1682:1159(i16vec3) VectorShuffle 1681 1681 0 1 2
+            1683:1159(i16vec3) GroupNonUniformBitwiseXor 42 ExclusiveScan 1682
+            1684:   1150(ptr) AccessChain 34(data) 1679 57
+            1685: 22(i16vec4) Load 1684
+            1686: 22(i16vec4) VectorShuffle 1685 1683 4 5 6 3
+                              Store 1684 1686
+            1687:      6(int) Load 8(invocation)
+            1688:   1150(ptr) AccessChain 34(data) 67 57
+            1689: 22(i16vec4) Load 1688
+            1690: 22(i16vec4) GroupNonUniformBitwiseXor 42 ExclusiveScan 1689
+            1691:   1150(ptr) AccessChain 34(data) 1687 57
+                              Store 1691 1690
+            1692:      6(int) Load 8(invocation)
+            1694:   1693(ptr) AccessChain 34(data) 37 67 38
+            1695: 23(int16_t) Load 1694
+            1696: 23(int16_t) GroupNonUniformIAdd 42 Reduce 1695
+            1697:   1693(ptr) AccessChain 34(data) 1692 67 38
+                              Store 1697 1696
+            1698:      6(int) Load 8(invocation)
+            1701:   1700(ptr) AccessChain 34(data) 46 67
+            1702: 24(i16vec4) Load 1701
+            1703:1699(i16vec2) VectorShuffle 1702 1702 0 1
+            1704:1699(i16vec2) GroupNonUniformIAdd 42 Reduce 1703
+            1705:   1700(ptr) AccessChain 34(data) 1698 67
+            1706: 24(i16vec4) Load 1705
+            1707: 24(i16vec4) VectorShuffle 1706 1704 4 5 2 3
+                              Store 1705 1707
+            1708:      6(int) Load 8(invocation)
+            1710:   1700(ptr) AccessChain 34(data) 57 67
+            1711: 24(i16vec4) Load 1710
+            1712:1709(i16vec3) VectorShuffle 1711 1711 0 1 2
+            1713:1709(i16vec3) GroupNonUniformIAdd 42 Reduce 1712
+            1714:   1700(ptr) AccessChain 34(data) 1708 67
+            1715: 24(i16vec4) Load 1714
+            1716: 24(i16vec4) VectorShuffle 1715 1713 4 5 6 3
+                              Store 1714 1716
+            1717:      6(int) Load 8(invocation)
+            1718:   1700(ptr) AccessChain 34(data) 67 67
+            1719: 24(i16vec4) Load 1718
+            1720: 24(i16vec4) GroupNonUniformIAdd 42 Reduce 1719
+            1721:   1700(ptr) AccessChain 34(data) 1717 67
+                              Store 1721 1720
+            1722:      6(int) Load 8(invocation)
+            1723:   1693(ptr) AccessChain 34(data) 37 67 38
+            1724: 23(int16_t) Load 1723
+            1725: 23(int16_t) GroupNonUniformIMul 42 Reduce 1724
+            1726:   1693(ptr) AccessChain 34(data) 1722 67 38
+                              Store 1726 1725
+            1727:      6(int) Load 8(invocation)
+            1728:   1700(ptr) AccessChain 34(data) 46 67
+            1729: 24(i16vec4) Load 1728
+            1730:1699(i16vec2) VectorShuffle 1729 1729 0 1
+            1731:1699(i16vec2) GroupNonUniformIMul 42 Reduce 1730
+            1732:   1700(ptr) AccessChain 34(data) 1727 67
+            1733: 24(i16vec4) Load 1732
+            1734: 24(i16vec4) VectorShuffle 1733 1731 4 5 2 3
+                              Store 1732 1734
+            1735:      6(int) Load 8(invocation)
+            1736:   1700(ptr) AccessChain 34(data) 57 67
+            1737: 24(i16vec4) Load 1736
+            1738:1709(i16vec3) VectorShuffle 1737 1737 0 1 2
+            1739:1709(i16vec3) GroupNonUniformIMul 42 Reduce 1738
+            1740:   1700(ptr) AccessChain 34(data) 1735 67
+            1741: 24(i16vec4) Load 1740
+            1742: 24(i16vec4) VectorShuffle 1741 1739 4 5 6 3
+                              Store 1740 1742
+            1743:      6(int) Load 8(invocation)
+            1744:   1700(ptr) AccessChain 34(data) 67 67
+            1745: 24(i16vec4) Load 1744
+            1746: 24(i16vec4) GroupNonUniformIMul 42 Reduce 1745
+            1747:   1700(ptr) AccessChain 34(data) 1743 67
+                              Store 1747 1746
+            1748:      6(int) Load 8(invocation)
+            1749:   1693(ptr) AccessChain 34(data) 37 67 38
+            1750: 23(int16_t) Load 1749
+            1751: 23(int16_t) GroupNonUniformUMin 42 Reduce 1750
+            1752:   1693(ptr) AccessChain 34(data) 1748 67 38
+                              Store 1752 1751
+            1753:      6(int) Load 8(invocation)
+            1754:   1700(ptr) AccessChain 34(data) 46 67
+            1755: 24(i16vec4) Load 1754
+            1756:1699(i16vec2) VectorShuffle 1755 1755 0 1
+            1757:1699(i16vec2) GroupNonUniformUMin 42 Reduce 1756
+            1758:   1700(ptr) AccessChain 34(data) 1753 67
+            1759: 24(i16vec4) Load 1758
+            1760: 24(i16vec4) VectorShuffle 1759 1757 4 5 2 3
+                              Store 1758 1760
+            1761:      6(int) Load 8(invocation)
+            1762:   1700(ptr) AccessChain 34(data) 57 67
+            1763: 24(i16vec4) Load 1762
+            1764:1709(i16vec3) VectorShuffle 1763 1763 0 1 2
+            1765:1709(i16vec3) GroupNonUniformUMin 42 Reduce 1764
+            1766:   1700(ptr) AccessChain 34(data) 1761 67
+            1767: 24(i16vec4) Load 1766
+            1768: 24(i16vec4) VectorShuffle 1767 1765 4 5 6 3
+                              Store 1766 1768
+            1769:      6(int) Load 8(invocation)
+            1770:   1700(ptr) AccessChain 34(data) 67 67
+            1771: 24(i16vec4) Load 1770
+            1772: 24(i16vec4) GroupNonUniformUMin 42 Reduce 1771
+            1773:   1700(ptr) AccessChain 34(data) 1769 67
+                              Store 1773 1772
+            1774:      6(int) Load 8(invocation)
+            1775:   1693(ptr) AccessChain 34(data) 37 67 38
+            1776: 23(int16_t) Load 1775
+            1777: 23(int16_t) GroupNonUniformUMax 42 Reduce 1776
+            1778:   1693(ptr) AccessChain 34(data) 1774 67 38
+                              Store 1778 1777
+            1779:      6(int) Load 8(invocation)
+            1780:   1700(ptr) AccessChain 34(data) 46 67
+            1781: 24(i16vec4) Load 1780
+            1782:1699(i16vec2) VectorShuffle 1781 1781 0 1
+            1783:1699(i16vec2) GroupNonUniformUMax 42 Reduce 1782
+            1784:   1700(ptr) AccessChain 34(data) 1779 67
+            1785: 24(i16vec4) Load 1784
+            1786: 24(i16vec4) VectorShuffle 1785 1783 4 5 2 3
+                              Store 1784 1786
+            1787:      6(int) Load 8(invocation)
+            1788:   1700(ptr) AccessChain 34(data) 57 67
+            1789: 24(i16vec4) Load 1788
+            1790:1709(i16vec3) VectorShuffle 1789 1789 0 1 2
+            1791:1709(i16vec3) GroupNonUniformUMax 42 Reduce 1790
+            1792:   1700(ptr) AccessChain 34(data) 1787 67
+            1793: 24(i16vec4) Load 1792
+            1794: 24(i16vec4) VectorShuffle 1793 1791 4 5 6 3
+                              Store 1792 1794
+            1795:      6(int) Load 8(invocation)
+            1796:   1700(ptr) AccessChain 34(data) 67 67
+            1797: 24(i16vec4) Load 1796
+            1798: 24(i16vec4) GroupNonUniformUMax 42 Reduce 1797
+            1799:   1700(ptr) AccessChain 34(data) 1795 67
+                              Store 1799 1798
+            1800:      6(int) Load 8(invocation)
+            1801:   1693(ptr) AccessChain 34(data) 37 67 38
+            1802: 23(int16_t) Load 1801
+            1803: 23(int16_t) GroupNonUniformBitwiseAnd 42 Reduce 1802
+            1804:   1693(ptr) AccessChain 34(data) 1800 67 38
+                              Store 1804 1803
+            1805:      6(int) Load 8(invocation)
+            1806:   1700(ptr) AccessChain 34(data) 46 67
+            1807: 24(i16vec4) Load 1806
+            1808:1699(i16vec2) VectorShuffle 1807 1807 0 1
+            1809:1699(i16vec2) GroupNonUniformBitwiseAnd 42 Reduce 1808
+            1810:   1700(ptr) AccessChain 34(data) 1805 67
+            1811: 24(i16vec4) Load 1810
+            1812: 24(i16vec4) VectorShuffle 1811 1809 4 5 2 3
+                              Store 1810 1812
+            1813:      6(int) Load 8(invocation)
+            1814:   1700(ptr) AccessChain 34(data) 57 67
+            1815: 24(i16vec4) Load 1814
+            1816:1709(i16vec3) VectorShuffle 1815 1815 0 1 2
+            1817:1709(i16vec3) GroupNonUniformBitwiseAnd 42 Reduce 1816
+            1818:   1700(ptr) AccessChain 34(data) 1813 67
+            1819: 24(i16vec4) Load 1818
+            1820: 24(i16vec4) VectorShuffle 1819 1817 4 5 6 3
+                              Store 1818 1820
+            1821:      6(int) Load 8(invocation)
+            1822:   1700(ptr) AccessChain 34(data) 67 67
+            1823: 24(i16vec4) Load 1822
+            1824: 24(i16vec4) GroupNonUniformBitwiseAnd 42 Reduce 1823
+            1825:   1700(ptr) AccessChain 34(data) 1821 67
+                              Store 1825 1824
+            1826:      6(int) Load 8(invocation)
+            1827:   1693(ptr) AccessChain 34(data) 37 67 38
+            1828: 23(int16_t) Load 1827
+            1829: 23(int16_t) GroupNonUniformBitwiseOr 42 Reduce 1828
+            1830:   1693(ptr) AccessChain 34(data) 1826 67 38
+                              Store 1830 1829
+            1831:      6(int) Load 8(invocation)
+            1832:   1700(ptr) AccessChain 34(data) 46 67
+            1833: 24(i16vec4) Load 1832
+            1834:1699(i16vec2) VectorShuffle 1833 1833 0 1
+            1835:1699(i16vec2) GroupNonUniformBitwiseOr 42 Reduce 1834
+            1836:   1700(ptr) AccessChain 34(data) 1831 67
+            1837: 24(i16vec4) Load 1836
+            1838: 24(i16vec4) VectorShuffle 1837 1835 4 5 2 3
+                              Store 1836 1838
+            1839:      6(int) Load 8(invocation)
+            1840:   1700(ptr) AccessChain 34(data) 57 67
+            1841: 24(i16vec4) Load 1840
+            1842:1709(i16vec3) VectorShuffle 1841 1841 0 1 2
+            1843:1709(i16vec3) GroupNonUniformBitwiseOr 42 Reduce 1842
+            1844:   1700(ptr) AccessChain 34(data) 1839 67
+            1845: 24(i16vec4) Load 1844
+            1846: 24(i16vec4) VectorShuffle 1845 1843 4 5 6 3
+                              Store 1844 1846
+            1847:      6(int) Load 8(invocation)
+            1848:   1700(ptr) AccessChain 34(data) 67 67
+            1849: 24(i16vec4) Load 1848
+            1850: 24(i16vec4) GroupNonUniformBitwiseOr 42 Reduce 1849
+            1851:   1700(ptr) AccessChain 34(data) 1847 67
+                              Store 1851 1850
+            1852:      6(int) Load 8(invocation)
+            1853:   1693(ptr) AccessChain 34(data) 37 67 38
+            1854: 23(int16_t) Load 1853
+            1855: 23(int16_t) GroupNonUniformBitwiseXor 42 Reduce 1854
+            1856:   1693(ptr) AccessChain 34(data) 1852 67 38
+                              Store 1856 1855
+            1857:      6(int) Load 8(invocation)
+            1858:   1700(ptr) AccessChain 34(data) 46 67
+            1859: 24(i16vec4) Load 1858
+            1860:1699(i16vec2) VectorShuffle 1859 1859 0 1
+            1861:1699(i16vec2) GroupNonUniformBitwiseXor 42 Reduce 1860
+            1862:   1700(ptr) AccessChain 34(data) 1857 67
+            1863: 24(i16vec4) Load 1862
+            1864: 24(i16vec4) VectorShuffle 1863 1861 4 5 2 3
+                              Store 1862 1864
+            1865:      6(int) Load 8(invocation)
+            1866:   1700(ptr) AccessChain 34(data) 57 67
+            1867: 24(i16vec4) Load 1866
+            1868:1709(i16vec3) VectorShuffle 1867 1867 0 1 2
+            1869:1709(i16vec3) GroupNonUniformBitwiseXor 42 Reduce 1868
+            1870:   1700(ptr) AccessChain 34(data) 1865 67
+            1871: 24(i16vec4) Load 1870
+            1872: 24(i16vec4) VectorShuffle 1871 1869 4 5 6 3
+                              Store 1870 1872
+            1873:      6(int) Load 8(invocation)
+            1874:   1700(ptr) AccessChain 34(data) 67 67
+            1875: 24(i16vec4) Load 1874
+            1876: 24(i16vec4) GroupNonUniformBitwiseXor 42 Reduce 1875
+            1877:   1700(ptr) AccessChain 34(data) 1873 67
+                              Store 1877 1876
+            1878:      6(int) Load 8(invocation)
+            1879:   1693(ptr) AccessChain 34(data) 37 67 38
+            1880: 23(int16_t) Load 1879
+            1881: 23(int16_t) GroupNonUniformIAdd 42 InclusiveScan 1880
+            1882:   1693(ptr) AccessChain 34(data) 1878 67 38
+                              Store 1882 1881
+            1883:      6(int) Load 8(invocation)
+            1884:   1700(ptr) AccessChain 34(data) 46 67
+            1885: 24(i16vec4) Load 1884
+            1886:1699(i16vec2) VectorShuffle 1885 1885 0 1
+            1887:1699(i16vec2) GroupNonUniformIAdd 42 InclusiveScan 1886
+            1888:   1700(ptr) AccessChain 34(data) 1883 67
+            1889: 24(i16vec4) Load 1888
+            1890: 24(i16vec4) VectorShuffle 1889 1887 4 5 2 3
+                              Store 1888 1890
+            1891:      6(int) Load 8(invocation)
+            1892:   1700(ptr) AccessChain 34(data) 57 67
+            1893: 24(i16vec4) Load 1892
+            1894:1709(i16vec3) VectorShuffle 1893 1893 0 1 2
+            1895:1709(i16vec3) GroupNonUniformIAdd 42 InclusiveScan 1894
+            1896:   1700(ptr) AccessChain 34(data) 1891 67
+            1897: 24(i16vec4) Load 1896
+            1898: 24(i16vec4) VectorShuffle 1897 1895 4 5 6 3
+                              Store 1896 1898
+            1899:      6(int) Load 8(invocation)
+            1900:   1700(ptr) AccessChain 34(data) 67 67
+            1901: 24(i16vec4) Load 1900
+            1902: 24(i16vec4) GroupNonUniformIAdd 42 InclusiveScan 1901
+            1903:   1700(ptr) AccessChain 34(data) 1899 67
+                              Store 1903 1902
+            1904:      6(int) Load 8(invocation)
+            1905:   1693(ptr) AccessChain 34(data) 37 67 38
+            1906: 23(int16_t) Load 1905
+            1907: 23(int16_t) GroupNonUniformIMul 42 InclusiveScan 1906
+            1908:   1693(ptr) AccessChain 34(data) 1904 67 38
+                              Store 1908 1907
+            1909:      6(int) Load 8(invocation)
+            1910:   1700(ptr) AccessChain 34(data) 46 67
+            1911: 24(i16vec4) Load 1910
+            1912:1699(i16vec2) VectorShuffle 1911 1911 0 1
+            1913:1699(i16vec2) GroupNonUniformIMul 42 InclusiveScan 1912
+            1914:   1700(ptr) AccessChain 34(data) 1909 67
+            1915: 24(i16vec4) Load 1914
+            1916: 24(i16vec4) VectorShuffle 1915 1913 4 5 2 3
+                              Store 1914 1916
+            1917:      6(int) Load 8(invocation)
+            1918:   1700(ptr) AccessChain 34(data) 57 67
+            1919: 24(i16vec4) Load 1918
+            1920:1709(i16vec3) VectorShuffle 1919 1919 0 1 2
+            1921:1709(i16vec3) GroupNonUniformIMul 42 InclusiveScan 1920
+            1922:   1700(ptr) AccessChain 34(data) 1917 67
+            1923: 24(i16vec4) Load 1922
+            1924: 24(i16vec4) VectorShuffle 1923 1921 4 5 6 3
+                              Store 1922 1924
+            1925:      6(int) Load 8(invocation)
+            1926:   1700(ptr) AccessChain 34(data) 67 67
+            1927: 24(i16vec4) Load 1926
+            1928: 24(i16vec4) GroupNonUniformIMul 42 InclusiveScan 1927
+            1929:   1700(ptr) AccessChain 34(data) 1925 67
+                              Store 1929 1928
+            1930:      6(int) Load 8(invocation)
+            1931:   1693(ptr) AccessChain 34(data) 37 67 38
+            1932: 23(int16_t) Load 1931
+            1933: 23(int16_t) GroupNonUniformUMin 42 InclusiveScan 1932
+            1934:   1693(ptr) AccessChain 34(data) 1930 67 38
+                              Store 1934 1933
+            1935:      6(int) Load 8(invocation)
+            1936:   1700(ptr) AccessChain 34(data) 46 67
+            1937: 24(i16vec4) Load 1936
+            1938:1699(i16vec2) VectorShuffle 1937 1937 0 1
+            1939:1699(i16vec2) GroupNonUniformUMin 42 InclusiveScan 1938
+            1940:   1700(ptr) AccessChain 34(data) 1935 67
+            1941: 24(i16vec4) Load 1940
+            1942: 24(i16vec4) VectorShuffle 1941 1939 4 5 2 3
+                              Store 1940 1942
+            1943:      6(int) Load 8(invocation)
+            1944:   1700(ptr) AccessChain 34(data) 57 67
+            1945: 24(i16vec4) Load 1944
+            1946:1709(i16vec3) VectorShuffle 1945 1945 0 1 2
+            1947:1709(i16vec3) GroupNonUniformUMin 42 InclusiveScan 1946
+            1948:   1700(ptr) AccessChain 34(data) 1943 67
+            1949: 24(i16vec4) Load 1948
+            1950: 24(i16vec4) VectorShuffle 1949 1947 4 5 6 3
+                              Store 1948 1950
+            1951:      6(int) Load 8(invocation)
+            1952:   1700(ptr) AccessChain 34(data) 67 67
+            1953: 24(i16vec4) Load 1952
+            1954: 24(i16vec4) GroupNonUniformUMin 42 InclusiveScan 1953
+            1955:   1700(ptr) AccessChain 34(data) 1951 67
+                              Store 1955 1954
+            1956:      6(int) Load 8(invocation)
+            1957:   1693(ptr) AccessChain 34(data) 37 67 38
+            1958: 23(int16_t) Load 1957
+            1959: 23(int16_t) GroupNonUniformUMax 42 InclusiveScan 1958
+            1960:   1693(ptr) AccessChain 34(data) 1956 67 38
+                              Store 1960 1959
+            1961:      6(int) Load 8(invocation)
+            1962:   1700(ptr) AccessChain 34(data) 46 67
+            1963: 24(i16vec4) Load 1962
+            1964:1699(i16vec2) VectorShuffle 1963 1963 0 1
+            1965:1699(i16vec2) GroupNonUniformUMax 42 InclusiveScan 1964
+            1966:   1700(ptr) AccessChain 34(data) 1961 67
+            1967: 24(i16vec4) Load 1966
+            1968: 24(i16vec4) VectorShuffle 1967 1965 4 5 2 3
+                              Store 1966 1968
+            1969:      6(int) Load 8(invocation)
+            1970:   1700(ptr) AccessChain 34(data) 57 67
+            1971: 24(i16vec4) Load 1970
+            1972:1709(i16vec3) VectorShuffle 1971 1971 0 1 2
+            1973:1709(i16vec3) GroupNonUniformUMax 42 InclusiveScan 1972
+            1974:   1700(ptr) AccessChain 34(data) 1969 67
+            1975: 24(i16vec4) Load 1974
+            1976: 24(i16vec4) VectorShuffle 1975 1973 4 5 6 3
+                              Store 1974 1976
+            1977:      6(int) Load 8(invocation)
+            1978:   1700(ptr) AccessChain 34(data) 67 67
+            1979: 24(i16vec4) Load 1978
+            1980: 24(i16vec4) GroupNonUniformUMax 42 InclusiveScan 1979
+            1981:   1700(ptr) AccessChain 34(data) 1977 67
+                              Store 1981 1980
+            1982:      6(int) Load 8(invocation)
+            1983:   1693(ptr) AccessChain 34(data) 37 67 38
+            1984: 23(int16_t) Load 1983
+            1985: 23(int16_t) GroupNonUniformBitwiseAnd 42 InclusiveScan 1984
+            1986:   1693(ptr) AccessChain 34(data) 1982 67 38
+                              Store 1986 1985
+            1987:      6(int) Load 8(invocation)
+            1988:   1700(ptr) AccessChain 34(data) 46 67
+            1989: 24(i16vec4) Load 1988
+            1990:1699(i16vec2) VectorShuffle 1989 1989 0 1
+            1991:1699(i16vec2) GroupNonUniformBitwiseAnd 42 InclusiveScan 1990
+            1992:   1700(ptr) AccessChain 34(data) 1987 67
+            1993: 24(i16vec4) Load 1992
+            1994: 24(i16vec4) VectorShuffle 1993 1991 4 5 2 3
+                              Store 1992 1994
+            1995:      6(int) Load 8(invocation)
+            1996:   1700(ptr) AccessChain 34(data) 57 67
+            1997: 24(i16vec4) Load 1996
+            1998:1709(i16vec3) VectorShuffle 1997 1997 0 1 2
+            1999:1709(i16vec3) GroupNonUniformBitwiseAnd 42 InclusiveScan 1998
+            2000:   1700(ptr) AccessChain 34(data) 1995 67
+            2001: 24(i16vec4) Load 2000
+            2002: 24(i16vec4) VectorShuffle 2001 1999 4 5 6 3
+                              Store 2000 2002
+            2003:      6(int) Load 8(invocation)
+            2004:   1700(ptr) AccessChain 34(data) 67 67
+            2005: 24(i16vec4) Load 2004
+            2006: 24(i16vec4) GroupNonUniformBitwiseAnd 42 InclusiveScan 2005
+            2007:   1700(ptr) AccessChain 34(data) 2003 67
+                              Store 2007 2006
+            2008:      6(int) Load 8(invocation)
+            2009:   1693(ptr) AccessChain 34(data) 37 67 38
+            2010: 23(int16_t) Load 2009
+            2011: 23(int16_t) GroupNonUniformBitwiseOr 42 InclusiveScan 2010
+            2012:   1693(ptr) AccessChain 34(data) 2008 67 38
+                              Store 2012 2011
+            2013:      6(int) Load 8(invocation)
+            2014:   1700(ptr) AccessChain 34(data) 46 67
+            2015: 24(i16vec4) Load 2014
+            2016:1699(i16vec2) VectorShuffle 2015 2015 0 1
+            2017:1699(i16vec2) GroupNonUniformBitwiseOr 42 InclusiveScan 2016
+            2018:   1700(ptr) AccessChain 34(data) 2013 67
+            2019: 24(i16vec4) Load 2018
+            2020: 24(i16vec4) VectorShuffle 2019 2017 4 5 2 3
+                              Store 2018 2020
+            2021:      6(int) Load 8(invocation)
+            2022:   1700(ptr) AccessChain 34(data) 57 67
+            2023: 24(i16vec4) Load 2022
+            2024:1709(i16vec3) VectorShuffle 2023 2023 0 1 2
+            2025:1709(i16vec3) GroupNonUniformBitwiseOr 42 InclusiveScan 2024
+            2026:   1700(ptr) AccessChain 34(data) 2021 67
+            2027: 24(i16vec4) Load 2026
+            2028: 24(i16vec4) VectorShuffle 2027 2025 4 5 6 3
+                              Store 2026 2028
+            2029:      6(int) Load 8(invocation)
+            2030:   1700(ptr) AccessChain 34(data) 67 67
+            2031: 24(i16vec4) Load 2030
+            2032: 24(i16vec4) GroupNonUniformBitwiseOr 42 InclusiveScan 2031
+            2033:   1700(ptr) AccessChain 34(data) 2029 67
+                              Store 2033 2032
+            2034:      6(int) Load 8(invocation)
+            2035:   1693(ptr) AccessChain 34(data) 37 67 38
+            2036: 23(int16_t) Load 2035
+            2037: 23(int16_t) GroupNonUniformBitwiseXor 42 InclusiveScan 2036
+            2038:   1693(ptr) AccessChain 34(data) 2034 67 38
+                              Store 2038 2037
+            2039:      6(int) Load 8(invocation)
+            2040:   1700(ptr) AccessChain 34(data) 46 67
+            2041: 24(i16vec4) Load 2040
+            2042:1699(i16vec2) VectorShuffle 2041 2041 0 1
+            2043:1699(i16vec2) GroupNonUniformBitwiseXor 42 InclusiveScan 2042
+            2044:   1700(ptr) AccessChain 34(data) 2039 67
+            2045: 24(i16vec4) Load 2044
+            2046: 24(i16vec4) VectorShuffle 2045 2043 4 5 2 3
+                              Store 2044 2046
+            2047:      6(int) Load 8(invocation)
+            2048:   1700(ptr) AccessChain 34(data) 57 67
+            2049: 24(i16vec4) Load 2048
+            2050:1709(i16vec3) VectorShuffle 2049 2049 0 1 2
+            2051:1709(i16vec3) GroupNonUniformBitwiseXor 42 InclusiveScan 2050
+            2052:   1700(ptr) AccessChain 34(data) 2047 67
+            2053: 24(i16vec4) Load 2052
+            2054: 24(i16vec4) VectorShuffle 2053 2051 4 5 6 3
+                              Store 2052 2054
+            2055:      6(int) Load 8(invocation)
+            2056:   1700(ptr) AccessChain 34(data) 67 67
+            2057: 24(i16vec4) Load 2056
+            2058: 24(i16vec4) GroupNonUniformBitwiseXor 42 InclusiveScan 2057
+            2059:   1700(ptr) AccessChain 34(data) 2055 67
+                              Store 2059 2058
+            2060:      6(int) Load 8(invocation)
+            2061:   1693(ptr) AccessChain 34(data) 37 67 38
+            2062: 23(int16_t) Load 2061
+            2063: 23(int16_t) GroupNonUniformIAdd 42 ExclusiveScan 2062
+            2064:   1693(ptr) AccessChain 34(data) 2060 67 38
+                              Store 2064 2063
+            2065:      6(int) Load 8(invocation)
+            2066:   1700(ptr) AccessChain 34(data) 46 67
+            2067: 24(i16vec4) Load 2066
+            2068:1699(i16vec2) VectorShuffle 2067 2067 0 1
+            2069:1699(i16vec2) GroupNonUniformIAdd 42 ExclusiveScan 2068
+            2070:   1700(ptr) AccessChain 34(data) 2065 67
+            2071: 24(i16vec4) Load 2070
+            2072: 24(i16vec4) VectorShuffle 2071 2069 4 5 2 3
+                              Store 2070 2072
+            2073:      6(int) Load 8(invocation)
+            2074:   1700(ptr) AccessChain 34(data) 57 67
+            2075: 24(i16vec4) Load 2074
+            2076:1709(i16vec3) VectorShuffle 2075 2075 0 1 2
+            2077:1709(i16vec3) GroupNonUniformIAdd 42 ExclusiveScan 2076
+            2078:   1700(ptr) AccessChain 34(data) 2073 67
+            2079: 24(i16vec4) Load 2078
+            2080: 24(i16vec4) VectorShuffle 2079 2077 4 5 6 3
+                              Store 2078 2080
+            2081:      6(int) Load 8(invocation)
+            2082:   1700(ptr) AccessChain 34(data) 67 67
+            2083: 24(i16vec4) Load 2082
+            2084: 24(i16vec4) GroupNonUniformIAdd 42 ExclusiveScan 2083
+            2085:   1700(ptr) AccessChain 34(data) 2081 67
+                              Store 2085 2084
+            2086:      6(int) Load 8(invocation)
+            2087:   1693(ptr) AccessChain 34(data) 37 67 38
+            2088: 23(int16_t) Load 2087
+            2089: 23(int16_t) GroupNonUniformIMul 42 ExclusiveScan 2088
+            2090:   1693(ptr) AccessChain 34(data) 2086 67 38
+                              Store 2090 2089
+            2091:      6(int) Load 8(invocation)
+            2092:   1700(ptr) AccessChain 34(data) 46 67
+            2093: 24(i16vec4) Load 2092
+            2094:1699(i16vec2) VectorShuffle 2093 2093 0 1
+            2095:1699(i16vec2) GroupNonUniformIMul 42 ExclusiveScan 2094
+            2096:   1700(ptr) AccessChain 34(data) 2091 67
+            2097: 24(i16vec4) Load 2096
+            2098: 24(i16vec4) VectorShuffle 2097 2095 4 5 2 3
+                              Store 2096 2098
+            2099:      6(int) Load 8(invocation)
+            2100:   1700(ptr) AccessChain 34(data) 57 67
+            2101: 24(i16vec4) Load 2100
+            2102:1709(i16vec3) VectorShuffle 2101 2101 0 1 2
+            2103:1709(i16vec3) GroupNonUniformIMul 42 ExclusiveScan 2102
+            2104:   1700(ptr) AccessChain 34(data) 2099 67
+            2105: 24(i16vec4) Load 2104
+            2106: 24(i16vec4) VectorShuffle 2105 2103 4 5 6 3
+                              Store 2104 2106
+            2107:      6(int) Load 8(invocation)
+            2108:   1700(ptr) AccessChain 34(data) 67 67
+            2109: 24(i16vec4) Load 2108
+            2110: 24(i16vec4) GroupNonUniformIMul 42 ExclusiveScan 2109
+            2111:   1700(ptr) AccessChain 34(data) 2107 67
+                              Store 2111 2110
+            2112:      6(int) Load 8(invocation)
+            2113:   1693(ptr) AccessChain 34(data) 37 67 38
+            2114: 23(int16_t) Load 2113
+            2115: 23(int16_t) GroupNonUniformUMin 42 ExclusiveScan 2114
+            2116:   1693(ptr) AccessChain 34(data) 2112 67 38
+                              Store 2116 2115
+            2117:      6(int) Load 8(invocation)
+            2118:   1700(ptr) AccessChain 34(data) 46 67
+            2119: 24(i16vec4) Load 2118
+            2120:1699(i16vec2) VectorShuffle 2119 2119 0 1
+            2121:1699(i16vec2) GroupNonUniformUMin 42 ExclusiveScan 2120
+            2122:   1700(ptr) AccessChain 34(data) 2117 67
+            2123: 24(i16vec4) Load 2122
+            2124: 24(i16vec4) VectorShuffle 2123 2121 4 5 2 3
+                              Store 2122 2124
+            2125:      6(int) Load 8(invocation)
+            2126:   1700(ptr) AccessChain 34(data) 57 67
+            2127: 24(i16vec4) Load 2126
+            2128:1709(i16vec3) VectorShuffle 2127 2127 0 1 2
+            2129:1709(i16vec3) GroupNonUniformUMin 42 ExclusiveScan 2128
+            2130:   1700(ptr) AccessChain 34(data) 2125 67
+            2131: 24(i16vec4) Load 2130
+            2132: 24(i16vec4) VectorShuffle 2131 2129 4 5 6 3
+                              Store 2130 2132
+            2133:      6(int) Load 8(invocation)
+            2134:   1700(ptr) AccessChain 34(data) 67 67
+            2135: 24(i16vec4) Load 2134
+            2136: 24(i16vec4) GroupNonUniformUMin 42 ExclusiveScan 2135
+            2137:   1700(ptr) AccessChain 34(data) 2133 67
+                              Store 2137 2136
+            2138:      6(int) Load 8(invocation)
+            2139:   1693(ptr) AccessChain 34(data) 37 67 38
+            2140: 23(int16_t) Load 2139
+            2141: 23(int16_t) GroupNonUniformUMax 42 ExclusiveScan 2140
+            2142:   1693(ptr) AccessChain 34(data) 2138 67 38
+                              Store 2142 2141
+            2143:      6(int) Load 8(invocation)
+            2144:   1700(ptr) AccessChain 34(data) 46 67
+            2145: 24(i16vec4) Load 2144
+            2146:1699(i16vec2) VectorShuffle 2145 2145 0 1
+            2147:1699(i16vec2) GroupNonUniformUMax 42 ExclusiveScan 2146
+            2148:   1700(ptr) AccessChain 34(data) 2143 67
+            2149: 24(i16vec4) Load 2148
+            2150: 24(i16vec4) VectorShuffle 2149 2147 4 5 2 3
+                              Store 2148 2150
+            2151:      6(int) Load 8(invocation)
+            2152:   1700(ptr) AccessChain 34(data) 57 67
+            2153: 24(i16vec4) Load 2152
+            2154:1709(i16vec3) VectorShuffle 2153 2153 0 1 2
+            2155:1709(i16vec3) GroupNonUniformUMax 42 ExclusiveScan 2154
+            2156:   1700(ptr) AccessChain 34(data) 2151 67
+            2157: 24(i16vec4) Load 2156
+            2158: 24(i16vec4) VectorShuffle 2157 2155 4 5 6 3
+                              Store 2156 2158
+            2159:      6(int) Load 8(invocation)
+            2160:   1700(ptr) AccessChain 34(data) 67 67
+            2161: 24(i16vec4) Load 2160
+            2162: 24(i16vec4) GroupNonUniformUMax 42 ExclusiveScan 2161
+            2163:   1700(ptr) AccessChain 34(data) 2159 67
+                              Store 2163 2162
+            2164:      6(int) Load 8(invocation)
+            2165:   1693(ptr) AccessChain 34(data) 37 67 38
+            2166: 23(int16_t) Load 2165
+            2167: 23(int16_t) GroupNonUniformBitwiseAnd 42 ExclusiveScan 2166
+            2168:   1693(ptr) AccessChain 34(data) 2164 67 38
+                              Store 2168 2167
+            2169:      6(int) Load 8(invocation)
+            2170:   1700(ptr) AccessChain 34(data) 46 67
+            2171: 24(i16vec4) Load 2170
+            2172:1699(i16vec2) VectorShuffle 2171 2171 0 1
+            2173:1699(i16vec2) GroupNonUniformBitwiseAnd 42 ExclusiveScan 2172
+            2174:   1700(ptr) AccessChain 34(data) 2169 67
+            2175: 24(i16vec4) Load 2174
+            2176: 24(i16vec4) VectorShuffle 2175 2173 4 5 2 3
+                              Store 2174 2176
+            2177:      6(int) Load 8(invocation)
+            2178:   1700(ptr) AccessChain 34(data) 57 67
+            2179: 24(i16vec4) Load 2178
+            2180:1709(i16vec3) VectorShuffle 2179 2179 0 1 2
+            2181:1709(i16vec3) GroupNonUniformBitwiseAnd 42 ExclusiveScan 2180
+            2182:   1700(ptr) AccessChain 34(data) 2177 67
+            2183: 24(i16vec4) Load 2182
+            2184: 24(i16vec4) VectorShuffle 2183 2181 4 5 6 3
+                              Store 2182 2184
+            2185:      6(int) Load 8(invocation)
+            2186:   1700(ptr) AccessChain 34(data) 67 67
+            2187: 24(i16vec4) Load 2186
+            2188: 24(i16vec4) GroupNonUniformBitwiseAnd 42 ExclusiveScan 2187
+            2189:   1700(ptr) AccessChain 34(data) 2185 67
+                              Store 2189 2188
+            2190:      6(int) Load 8(invocation)
+            2191:   1693(ptr) AccessChain 34(data) 37 67 38
+            2192: 23(int16_t) Load 2191
+            2193: 23(int16_t) GroupNonUniformBitwiseOr 42 ExclusiveScan 2192
+            2194:   1693(ptr) AccessChain 34(data) 2190 67 38
+                              Store 2194 2193
+            2195:      6(int) Load 8(invocation)
+            2196:   1700(ptr) AccessChain 34(data) 46 67
+            2197: 24(i16vec4) Load 2196
+            2198:1699(i16vec2) VectorShuffle 2197 2197 0 1
+            2199:1699(i16vec2) GroupNonUniformBitwiseOr 42 ExclusiveScan 2198
+            2200:   1700(ptr) AccessChain 34(data) 2195 67
+            2201: 24(i16vec4) Load 2200
+            2202: 24(i16vec4) VectorShuffle 2201 2199 4 5 2 3
+                              Store 2200 2202
+            2203:      6(int) Load 8(invocation)
+            2204:   1700(ptr) AccessChain 34(data) 57 67
+            2205: 24(i16vec4) Load 2204
+            2206:1709(i16vec3) VectorShuffle 2205 2205 0 1 2
+            2207:1709(i16vec3) GroupNonUniformBitwiseOr 42 ExclusiveScan 2206
+            2208:   1700(ptr) AccessChain 34(data) 2203 67
+            2209: 24(i16vec4) Load 2208
+            2210: 24(i16vec4) VectorShuffle 2209 2207 4 5 6 3
+                              Store 2208 2210
+            2211:      6(int) Load 8(invocation)
+            2212:   1700(ptr) AccessChain 34(data) 67 67
+            2213: 24(i16vec4) Load 2212
+            2214: 24(i16vec4) GroupNonUniformBitwiseOr 42 ExclusiveScan 2213
+            2215:   1700(ptr) AccessChain 34(data) 2211 67
+                              Store 2215 2214
+            2216:      6(int) Load 8(invocation)
+            2217:   1693(ptr) AccessChain 34(data) 37 67 38
+            2218: 23(int16_t) Load 2217
+            2219: 23(int16_t) GroupNonUniformBitwiseXor 42 ExclusiveScan 2218
+            2220:   1693(ptr) AccessChain 34(data) 2216 67 38
+                              Store 2220 2219
+            2221:      6(int) Load 8(invocation)
+            2222:   1700(ptr) AccessChain 34(data) 46 67
+            2223: 24(i16vec4) Load 2222
+            2224:1699(i16vec2) VectorShuffle 2223 2223 0 1
+            2225:1699(i16vec2) GroupNonUniformBitwiseXor 42 ExclusiveScan 2224
+            2226:   1700(ptr) AccessChain 34(data) 2221 67
+            2227: 24(i16vec4) Load 2226
+            2228: 24(i16vec4) VectorShuffle 2227 2225 4 5 2 3
+                              Store 2226 2228
+            2229:      6(int) Load 8(invocation)
+            2230:   1700(ptr) AccessChain 34(data) 57 67
+            2231: 24(i16vec4) Load 2230
+            2232:1709(i16vec3) VectorShuffle 2231 2231 0 1 2
+            2233:1709(i16vec3) GroupNonUniformBitwiseXor 42 ExclusiveScan 2232
+            2234:   1700(ptr) AccessChain 34(data) 2229 67
+            2235: 24(i16vec4) Load 2234
+            2236: 24(i16vec4) VectorShuffle 2235 2233 4 5 6 3
+                              Store 2234 2236
+            2237:      6(int) Load 8(invocation)
+            2238:   1700(ptr) AccessChain 34(data) 67 67
+            2239: 24(i16vec4) Load 2238
+            2240: 24(i16vec4) GroupNonUniformBitwiseXor 42 ExclusiveScan 2239
+            2241:   1700(ptr) AccessChain 34(data) 2237 67
+                              Store 2241 2240
+            2242:      6(int) Load 8(invocation)
+            2245:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2246: 25(int64_t) Load 2245
+            2247: 25(int64_t) GroupNonUniformIAdd 42 Reduce 2246
+            2248:   2244(ptr) AccessChain 34(data) 2242 2243 38
+                              Store 2248 2247
+            2249:      6(int) Load 8(invocation)
+            2252:   2251(ptr) AccessChain 34(data) 46 2243
+            2253: 26(i64vec4) Load 2252
+            2254:2250(i64vec2) VectorShuffle 2253 2253 0 1
+            2255:2250(i64vec2) GroupNonUniformIAdd 42 Reduce 2254
+            2256:   2251(ptr) AccessChain 34(data) 2249 2243
+            2257: 26(i64vec4) Load 2256
+            2258: 26(i64vec4) VectorShuffle 2257 2255 4 5 2 3
+                              Store 2256 2258
+            2259:      6(int) Load 8(invocation)
+            2261:   2251(ptr) AccessChain 34(data) 57 2243
+            2262: 26(i64vec4) Load 2261
+            2263:2260(i64vec3) VectorShuffle 2262 2262 0 1 2
+            2264:2260(i64vec3) GroupNonUniformIAdd 42 Reduce 2263
+            2265:   2251(ptr) AccessChain 34(data) 2259 2243
+            2266: 26(i64vec4) Load 2265
+            2267: 26(i64vec4) VectorShuffle 2266 2264 4 5 6 3
+                              Store 2265 2267
+            2268:      6(int) Load 8(invocation)
+            2269:   2251(ptr) AccessChain 34(data) 67 2243
+            2270: 26(i64vec4) Load 2269
+            2271: 26(i64vec4) GroupNonUniformIAdd 42 Reduce 2270
+            2272:   2251(ptr) AccessChain 34(data) 2268 2243
+                              Store 2272 2271
+            2273:      6(int) Load 8(invocation)
+            2274:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2275: 25(int64_t) Load 2274
+            2276: 25(int64_t) GroupNonUniformIMul 42 Reduce 2275
+            2277:   2244(ptr) AccessChain 34(data) 2273 2243 38
+                              Store 2277 2276
+            2278:      6(int) Load 8(invocation)
+            2279:   2251(ptr) AccessChain 34(data) 46 2243
+            2280: 26(i64vec4) Load 2279
+            2281:2250(i64vec2) VectorShuffle 2280 2280 0 1
+            2282:2250(i64vec2) GroupNonUniformIMul 42 Reduce 2281
+            2283:   2251(ptr) AccessChain 34(data) 2278 2243
+            2284: 26(i64vec4) Load 2283
+            2285: 26(i64vec4) VectorShuffle 2284 2282 4 5 2 3
+                              Store 2283 2285
+            2286:      6(int) Load 8(invocation)
+            2287:   2251(ptr) AccessChain 34(data) 57 2243
+            2288: 26(i64vec4) Load 2287
+            2289:2260(i64vec3) VectorShuffle 2288 2288 0 1 2
+            2290:2260(i64vec3) GroupNonUniformIMul 42 Reduce 2289
+            2291:   2251(ptr) AccessChain 34(data) 2286 2243
+            2292: 26(i64vec4) Load 2291
+            2293: 26(i64vec4) VectorShuffle 2292 2290 4 5 6 3
+                              Store 2291 2293
+            2294:      6(int) Load 8(invocation)
+            2295:   2251(ptr) AccessChain 34(data) 67 2243
+            2296: 26(i64vec4) Load 2295
+            2297: 26(i64vec4) GroupNonUniformIMul 42 Reduce 2296
+            2298:   2251(ptr) AccessChain 34(data) 2294 2243
+                              Store 2298 2297
+            2299:      6(int) Load 8(invocation)
+            2300:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2301: 25(int64_t) Load 2300
+            2302: 25(int64_t) GroupNonUniformSMin 42 Reduce 2301
+            2303:   2244(ptr) AccessChain 34(data) 2299 2243 38
+                              Store 2303 2302
+            2304:      6(int) Load 8(invocation)
+            2305:   2251(ptr) AccessChain 34(data) 46 2243
+            2306: 26(i64vec4) Load 2305
+            2307:2250(i64vec2) VectorShuffle 2306 2306 0 1
+            2308:2250(i64vec2) GroupNonUniformSMin 42 Reduce 2307
+            2309:   2251(ptr) AccessChain 34(data) 2304 2243
+            2310: 26(i64vec4) Load 2309
+            2311: 26(i64vec4) VectorShuffle 2310 2308 4 5 2 3
+                              Store 2309 2311
+            2312:      6(int) Load 8(invocation)
+            2313:   2251(ptr) AccessChain 34(data) 57 2243
+            2314: 26(i64vec4) Load 2313
+            2315:2260(i64vec3) VectorShuffle 2314 2314 0 1 2
+            2316:2260(i64vec3) GroupNonUniformSMin 42 Reduce 2315
+            2317:   2251(ptr) AccessChain 34(data) 2312 2243
+            2318: 26(i64vec4) Load 2317
+            2319: 26(i64vec4) VectorShuffle 2318 2316 4 5 6 3
+                              Store 2317 2319
+            2320:      6(int) Load 8(invocation)
+            2321:   2251(ptr) AccessChain 34(data) 67 2243
+            2322: 26(i64vec4) Load 2321
+            2323: 26(i64vec4) GroupNonUniformSMin 42 Reduce 2322
+            2324:   2251(ptr) AccessChain 34(data) 2320 2243
+                              Store 2324 2323
+            2325:      6(int) Load 8(invocation)
+            2326:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2327: 25(int64_t) Load 2326
+            2328: 25(int64_t) GroupNonUniformSMax 42 Reduce 2327
+            2329:   2244(ptr) AccessChain 34(data) 2325 2243 38
+                              Store 2329 2328
+            2330:      6(int) Load 8(invocation)
+            2331:   2251(ptr) AccessChain 34(data) 46 2243
+            2332: 26(i64vec4) Load 2331
+            2333:2250(i64vec2) VectorShuffle 2332 2332 0 1
+            2334:2250(i64vec2) GroupNonUniformSMax 42 Reduce 2333
+            2335:   2251(ptr) AccessChain 34(data) 2330 2243
+            2336: 26(i64vec4) Load 2335
+            2337: 26(i64vec4) VectorShuffle 2336 2334 4 5 2 3
+                              Store 2335 2337
+            2338:      6(int) Load 8(invocation)
+            2339:   2251(ptr) AccessChain 34(data) 57 2243
+            2340: 26(i64vec4) Load 2339
+            2341:2260(i64vec3) VectorShuffle 2340 2340 0 1 2
+            2342:2260(i64vec3) GroupNonUniformSMax 42 Reduce 2341
+            2343:   2251(ptr) AccessChain 34(data) 2338 2243
+            2344: 26(i64vec4) Load 2343
+            2345: 26(i64vec4) VectorShuffle 2344 2342 4 5 6 3
+                              Store 2343 2345
+            2346:      6(int) Load 8(invocation)
+            2347:   2251(ptr) AccessChain 34(data) 67 2243
+            2348: 26(i64vec4) Load 2347
+            2349: 26(i64vec4) GroupNonUniformSMax 42 Reduce 2348
+            2350:   2251(ptr) AccessChain 34(data) 2346 2243
+                              Store 2350 2349
+            2351:      6(int) Load 8(invocation)
+            2352:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2353: 25(int64_t) Load 2352
+            2354: 25(int64_t) GroupNonUniformBitwiseAnd 42 Reduce 2353
+            2355:   2244(ptr) AccessChain 34(data) 2351 2243 38
+                              Store 2355 2354
+            2356:      6(int) Load 8(invocation)
+            2357:   2251(ptr) AccessChain 34(data) 46 2243
+            2358: 26(i64vec4) Load 2357
+            2359:2250(i64vec2) VectorShuffle 2358 2358 0 1
+            2360:2250(i64vec2) GroupNonUniformBitwiseAnd 42 Reduce 2359
+            2361:   2251(ptr) AccessChain 34(data) 2356 2243
+            2362: 26(i64vec4) Load 2361
+            2363: 26(i64vec4) VectorShuffle 2362 2360 4 5 2 3
+                              Store 2361 2363
+            2364:      6(int) Load 8(invocation)
+            2365:   2251(ptr) AccessChain 34(data) 57 2243
+            2366: 26(i64vec4) Load 2365
+            2367:2260(i64vec3) VectorShuffle 2366 2366 0 1 2
+            2368:2260(i64vec3) GroupNonUniformBitwiseAnd 42 Reduce 2367
+            2369:   2251(ptr) AccessChain 34(data) 2364 2243
+            2370: 26(i64vec4) Load 2369
+            2371: 26(i64vec4) VectorShuffle 2370 2368 4 5 6 3
+                              Store 2369 2371
+            2372:      6(int) Load 8(invocation)
+            2373:   2251(ptr) AccessChain 34(data) 67 2243
+            2374: 26(i64vec4) Load 2373
+            2375: 26(i64vec4) GroupNonUniformBitwiseAnd 42 Reduce 2374
+            2376:   2251(ptr) AccessChain 34(data) 2372 2243
+                              Store 2376 2375
+            2377:      6(int) Load 8(invocation)
+            2378:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2379: 25(int64_t) Load 2378
+            2380: 25(int64_t) GroupNonUniformBitwiseOr 42 Reduce 2379
+            2381:   2244(ptr) AccessChain 34(data) 2377 2243 38
+                              Store 2381 2380
+            2382:      6(int) Load 8(invocation)
+            2383:   2251(ptr) AccessChain 34(data) 46 2243
+            2384: 26(i64vec4) Load 2383
+            2385:2250(i64vec2) VectorShuffle 2384 2384 0 1
+            2386:2250(i64vec2) GroupNonUniformBitwiseOr 42 Reduce 2385
+            2387:   2251(ptr) AccessChain 34(data) 2382 2243
+            2388: 26(i64vec4) Load 2387
+            2389: 26(i64vec4) VectorShuffle 2388 2386 4 5 2 3
+                              Store 2387 2389
+            2390:      6(int) Load 8(invocation)
+            2391:   2251(ptr) AccessChain 34(data) 57 2243
+            2392: 26(i64vec4) Load 2391
+            2393:2260(i64vec3) VectorShuffle 2392 2392 0 1 2
+            2394:2260(i64vec3) GroupNonUniformBitwiseOr 42 Reduce 2393
+            2395:   2251(ptr) AccessChain 34(data) 2390 2243
+            2396: 26(i64vec4) Load 2395
+            2397: 26(i64vec4) VectorShuffle 2396 2394 4 5 6 3
+                              Store 2395 2397
+            2398:      6(int) Load 8(invocation)
+            2399:   2251(ptr) AccessChain 34(data) 67 2243
+            2400: 26(i64vec4) Load 2399
+            2401: 26(i64vec4) GroupNonUniformBitwiseOr 42 Reduce 2400
+            2402:   2251(ptr) AccessChain 34(data) 2398 2243
+                              Store 2402 2401
+            2403:      6(int) Load 8(invocation)
+            2404:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2405: 25(int64_t) Load 2404
+            2406: 25(int64_t) GroupNonUniformBitwiseXor 42 Reduce 2405
+            2407:   2244(ptr) AccessChain 34(data) 2403 2243 38
+                              Store 2407 2406
+            2408:      6(int) Load 8(invocation)
+            2409:   2251(ptr) AccessChain 34(data) 46 2243
+            2410: 26(i64vec4) Load 2409
+            2411:2250(i64vec2) VectorShuffle 2410 2410 0 1
+            2412:2250(i64vec2) GroupNonUniformBitwiseXor 42 Reduce 2411
+            2413:   2251(ptr) AccessChain 34(data) 2408 2243
+            2414: 26(i64vec4) Load 2413
+            2415: 26(i64vec4) VectorShuffle 2414 2412 4 5 2 3
+                              Store 2413 2415
+            2416:      6(int) Load 8(invocation)
+            2417:   2251(ptr) AccessChain 34(data) 57 2243
+            2418: 26(i64vec4) Load 2417
+            2419:2260(i64vec3) VectorShuffle 2418 2418 0 1 2
+            2420:2260(i64vec3) GroupNonUniformBitwiseXor 42 Reduce 2419
+            2421:   2251(ptr) AccessChain 34(data) 2416 2243
+            2422: 26(i64vec4) Load 2421
+            2423: 26(i64vec4) VectorShuffle 2422 2420 4 5 6 3
+                              Store 2421 2423
+            2424:      6(int) Load 8(invocation)
+            2425:   2251(ptr) AccessChain 34(data) 67 2243
+            2426: 26(i64vec4) Load 2425
+            2427: 26(i64vec4) GroupNonUniformBitwiseXor 42 Reduce 2426
+            2428:   2251(ptr) AccessChain 34(data) 2424 2243
+                              Store 2428 2427
+            2429:      6(int) Load 8(invocation)
+            2430:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2431: 25(int64_t) Load 2430
+            2432: 25(int64_t) GroupNonUniformIAdd 42 InclusiveScan 2431
+            2433:   2244(ptr) AccessChain 34(data) 2429 2243 38
+                              Store 2433 2432
+            2434:      6(int) Load 8(invocation)
+            2435:   2251(ptr) AccessChain 34(data) 46 2243
+            2436: 26(i64vec4) Load 2435
+            2437:2250(i64vec2) VectorShuffle 2436 2436 0 1
+            2438:2250(i64vec2) GroupNonUniformIAdd 42 InclusiveScan 2437
+            2439:   2251(ptr) AccessChain 34(data) 2434 2243
+            2440: 26(i64vec4) Load 2439
+            2441: 26(i64vec4) VectorShuffle 2440 2438 4 5 2 3
+                              Store 2439 2441
+            2442:      6(int) Load 8(invocation)
+            2443:   2251(ptr) AccessChain 34(data) 57 2243
+            2444: 26(i64vec4) Load 2443
+            2445:2260(i64vec3) VectorShuffle 2444 2444 0 1 2
+            2446:2260(i64vec3) GroupNonUniformIAdd 42 InclusiveScan 2445
+            2447:   2251(ptr) AccessChain 34(data) 2442 2243
+            2448: 26(i64vec4) Load 2447
+            2449: 26(i64vec4) VectorShuffle 2448 2446 4 5 6 3
+                              Store 2447 2449
+            2450:      6(int) Load 8(invocation)
+            2451:   2251(ptr) AccessChain 34(data) 67 2243
+            2452: 26(i64vec4) Load 2451
+            2453: 26(i64vec4) GroupNonUniformIAdd 42 InclusiveScan 2452
+            2454:   2251(ptr) AccessChain 34(data) 2450 2243
+                              Store 2454 2453
+            2455:      6(int) Load 8(invocation)
+            2456:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2457: 25(int64_t) Load 2456
+            2458: 25(int64_t) GroupNonUniformIMul 42 InclusiveScan 2457
+            2459:   2244(ptr) AccessChain 34(data) 2455 2243 38
+                              Store 2459 2458
+            2460:      6(int) Load 8(invocation)
+            2461:   2251(ptr) AccessChain 34(data) 46 2243
+            2462: 26(i64vec4) Load 2461
+            2463:2250(i64vec2) VectorShuffle 2462 2462 0 1
+            2464:2250(i64vec2) GroupNonUniformIMul 42 InclusiveScan 2463
+            2465:   2251(ptr) AccessChain 34(data) 2460 2243
+            2466: 26(i64vec4) Load 2465
+            2467: 26(i64vec4) VectorShuffle 2466 2464 4 5 2 3
+                              Store 2465 2467
+            2468:      6(int) Load 8(invocation)
+            2469:   2251(ptr) AccessChain 34(data) 57 2243
+            2470: 26(i64vec4) Load 2469
+            2471:2260(i64vec3) VectorShuffle 2470 2470 0 1 2
+            2472:2260(i64vec3) GroupNonUniformIMul 42 InclusiveScan 2471
+            2473:   2251(ptr) AccessChain 34(data) 2468 2243
+            2474: 26(i64vec4) Load 2473
+            2475: 26(i64vec4) VectorShuffle 2474 2472 4 5 6 3
+                              Store 2473 2475
+            2476:      6(int) Load 8(invocation)
+            2477:   2251(ptr) AccessChain 34(data) 67 2243
+            2478: 26(i64vec4) Load 2477
+            2479: 26(i64vec4) GroupNonUniformIMul 42 InclusiveScan 2478
+            2480:   2251(ptr) AccessChain 34(data) 2476 2243
+                              Store 2480 2479
+            2481:      6(int) Load 8(invocation)
+            2482:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2483: 25(int64_t) Load 2482
+            2484: 25(int64_t) GroupNonUniformSMin 42 InclusiveScan 2483
+            2485:   2244(ptr) AccessChain 34(data) 2481 2243 38
+                              Store 2485 2484
+            2486:      6(int) Load 8(invocation)
+            2487:   2251(ptr) AccessChain 34(data) 46 2243
+            2488: 26(i64vec4) Load 2487
+            2489:2250(i64vec2) VectorShuffle 2488 2488 0 1
+            2490:2250(i64vec2) GroupNonUniformSMin 42 InclusiveScan 2489
+            2491:   2251(ptr) AccessChain 34(data) 2486 2243
+            2492: 26(i64vec4) Load 2491
+            2493: 26(i64vec4) VectorShuffle 2492 2490 4 5 2 3
+                              Store 2491 2493
+            2494:      6(int) Load 8(invocation)
+            2495:   2251(ptr) AccessChain 34(data) 57 2243
+            2496: 26(i64vec4) Load 2495
+            2497:2260(i64vec3) VectorShuffle 2496 2496 0 1 2
+            2498:2260(i64vec3) GroupNonUniformSMin 42 InclusiveScan 2497
+            2499:   2251(ptr) AccessChain 34(data) 2494 2243
+            2500: 26(i64vec4) Load 2499
+            2501: 26(i64vec4) VectorShuffle 2500 2498 4 5 6 3
+                              Store 2499 2501
+            2502:      6(int) Load 8(invocation)
+            2503:   2251(ptr) AccessChain 34(data) 67 2243
+            2504: 26(i64vec4) Load 2503
+            2505: 26(i64vec4) GroupNonUniformSMin 42 InclusiveScan 2504
+            2506:   2251(ptr) AccessChain 34(data) 2502 2243
+                              Store 2506 2505
+            2507:      6(int) Load 8(invocation)
+            2508:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2509: 25(int64_t) Load 2508
+            2510: 25(int64_t) GroupNonUniformSMax 42 InclusiveScan 2509
+            2511:   2244(ptr) AccessChain 34(data) 2507 2243 38
+                              Store 2511 2510
+            2512:      6(int) Load 8(invocation)
+            2513:   2251(ptr) AccessChain 34(data) 46 2243
+            2514: 26(i64vec4) Load 2513
+            2515:2250(i64vec2) VectorShuffle 2514 2514 0 1
+            2516:2250(i64vec2) GroupNonUniformSMax 42 InclusiveScan 2515
+            2517:   2251(ptr) AccessChain 34(data) 2512 2243
+            2518: 26(i64vec4) Load 2517
+            2519: 26(i64vec4) VectorShuffle 2518 2516 4 5 2 3
+                              Store 2517 2519
+            2520:      6(int) Load 8(invocation)
+            2521:   2251(ptr) AccessChain 34(data) 57 2243
+            2522: 26(i64vec4) Load 2521
+            2523:2260(i64vec3) VectorShuffle 2522 2522 0 1 2
+            2524:2260(i64vec3) GroupNonUniformSMax 42 InclusiveScan 2523
+            2525:   2251(ptr) AccessChain 34(data) 2520 2243
+            2526: 26(i64vec4) Load 2525
+            2527: 26(i64vec4) VectorShuffle 2526 2524 4 5 6 3
+                              Store 2525 2527
+            2528:      6(int) Load 8(invocation)
+            2529:   2251(ptr) AccessChain 34(data) 67 2243
+            2530: 26(i64vec4) Load 2529
+            2531: 26(i64vec4) GroupNonUniformSMax 42 InclusiveScan 2530
+            2532:   2251(ptr) AccessChain 34(data) 2528 2243
+                              Store 2532 2531
+            2533:      6(int) Load 8(invocation)
+            2534:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2535: 25(int64_t) Load 2534
+            2536: 25(int64_t) GroupNonUniformBitwiseAnd 42 InclusiveScan 2535
+            2537:   2244(ptr) AccessChain 34(data) 2533 2243 38
+                              Store 2537 2536
+            2538:      6(int) Load 8(invocation)
+            2539:   2251(ptr) AccessChain 34(data) 46 2243
+            2540: 26(i64vec4) Load 2539
+            2541:2250(i64vec2) VectorShuffle 2540 2540 0 1
+            2542:2250(i64vec2) GroupNonUniformBitwiseAnd 42 InclusiveScan 2541
+            2543:   2251(ptr) AccessChain 34(data) 2538 2243
+            2544: 26(i64vec4) Load 2543
+            2545: 26(i64vec4) VectorShuffle 2544 2542 4 5 2 3
+                              Store 2543 2545
+            2546:      6(int) Load 8(invocation)
+            2547:   2251(ptr) AccessChain 34(data) 57 2243
+            2548: 26(i64vec4) Load 2547
+            2549:2260(i64vec3) VectorShuffle 2548 2548 0 1 2
+            2550:2260(i64vec3) GroupNonUniformBitwiseAnd 42 InclusiveScan 2549
+            2551:   2251(ptr) AccessChain 34(data) 2546 2243
+            2552: 26(i64vec4) Load 2551
+            2553: 26(i64vec4) VectorShuffle 2552 2550 4 5 6 3
+                              Store 2551 2553
+            2554:      6(int) Load 8(invocation)
+            2555:   2251(ptr) AccessChain 34(data) 67 2243
+            2556: 26(i64vec4) Load 2555
+            2557: 26(i64vec4) GroupNonUniformBitwiseAnd 42 InclusiveScan 2556
+            2558:   2251(ptr) AccessChain 34(data) 2554 2243
+                              Store 2558 2557
+            2559:      6(int) Load 8(invocation)
+            2560:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2561: 25(int64_t) Load 2560
+            2562: 25(int64_t) GroupNonUniformBitwiseOr 42 InclusiveScan 2561
+            2563:   2244(ptr) AccessChain 34(data) 2559 2243 38
+                              Store 2563 2562
+            2564:      6(int) Load 8(invocation)
+            2565:   2251(ptr) AccessChain 34(data) 46 2243
+            2566: 26(i64vec4) Load 2565
+            2567:2250(i64vec2) VectorShuffle 2566 2566 0 1
+            2568:2250(i64vec2) GroupNonUniformBitwiseOr 42 InclusiveScan 2567
+            2569:   2251(ptr) AccessChain 34(data) 2564 2243
+            2570: 26(i64vec4) Load 2569
+            2571: 26(i64vec4) VectorShuffle 2570 2568 4 5 2 3
+                              Store 2569 2571
+            2572:      6(int) Load 8(invocation)
+            2573:   2251(ptr) AccessChain 34(data) 57 2243
+            2574: 26(i64vec4) Load 2573
+            2575:2260(i64vec3) VectorShuffle 2574 2574 0 1 2
+            2576:2260(i64vec3) GroupNonUniformBitwiseOr 42 InclusiveScan 2575
+            2577:   2251(ptr) AccessChain 34(data) 2572 2243
+            2578: 26(i64vec4) Load 2577
+            2579: 26(i64vec4) VectorShuffle 2578 2576 4 5 6 3
+                              Store 2577 2579
+            2580:      6(int) Load 8(invocation)
+            2581:   2251(ptr) AccessChain 34(data) 67 2243
+            2582: 26(i64vec4) Load 2581
+            2583: 26(i64vec4) GroupNonUniformBitwiseOr 42 InclusiveScan 2582
+            2584:   2251(ptr) AccessChain 34(data) 2580 2243
+                              Store 2584 2583
+            2585:      6(int) Load 8(invocation)
+            2586:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2587: 25(int64_t) Load 2586
+            2588: 25(int64_t) GroupNonUniformBitwiseXor 42 InclusiveScan 2587
+            2589:   2244(ptr) AccessChain 34(data) 2585 2243 38
+                              Store 2589 2588
+            2590:      6(int) Load 8(invocation)
+            2591:   2251(ptr) AccessChain 34(data) 46 2243
+            2592: 26(i64vec4) Load 2591
+            2593:2250(i64vec2) VectorShuffle 2592 2592 0 1
+            2594:2250(i64vec2) GroupNonUniformBitwiseXor 42 InclusiveScan 2593
+            2595:   2251(ptr) AccessChain 34(data) 2590 2243
+            2596: 26(i64vec4) Load 2595
+            2597: 26(i64vec4) VectorShuffle 2596 2594 4 5 2 3
+                              Store 2595 2597
+            2598:      6(int) Load 8(invocation)
+            2599:   2251(ptr) AccessChain 34(data) 57 2243
+            2600: 26(i64vec4) Load 2599
+            2601:2260(i64vec3) VectorShuffle 2600 2600 0 1 2
+            2602:2260(i64vec3) GroupNonUniformBitwiseXor 42 InclusiveScan 2601
+            2603:   2251(ptr) AccessChain 34(data) 2598 2243
+            2604: 26(i64vec4) Load 2603
+            2605: 26(i64vec4) VectorShuffle 2604 2602 4 5 6 3
+                              Store 2603 2605
+            2606:      6(int) Load 8(invocation)
+            2607:   2251(ptr) AccessChain 34(data) 67 2243
+            2608: 26(i64vec4) Load 2607
+            2609: 26(i64vec4) GroupNonUniformBitwiseXor 42 InclusiveScan 2608
+            2610:   2251(ptr) AccessChain 34(data) 2606 2243
+                              Store 2610 2609
+            2611:      6(int) Load 8(invocation)
+            2612:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2613: 25(int64_t) Load 2612
+            2614: 25(int64_t) GroupNonUniformIAdd 42 ExclusiveScan 2613
+            2615:   2244(ptr) AccessChain 34(data) 2611 2243 38
+                              Store 2615 2614
+            2616:      6(int) Load 8(invocation)
+            2617:   2251(ptr) AccessChain 34(data) 46 2243
+            2618: 26(i64vec4) Load 2617
+            2619:2250(i64vec2) VectorShuffle 2618 2618 0 1
+            2620:2250(i64vec2) GroupNonUniformIAdd 42 ExclusiveScan 2619
+            2621:   2251(ptr) AccessChain 34(data) 2616 2243
+            2622: 26(i64vec4) Load 2621
+            2623: 26(i64vec4) VectorShuffle 2622 2620 4 5 2 3
+                              Store 2621 2623
+            2624:      6(int) Load 8(invocation)
+            2625:   2251(ptr) AccessChain 34(data) 57 2243
+            2626: 26(i64vec4) Load 2625
+            2627:2260(i64vec3) VectorShuffle 2626 2626 0 1 2
+            2628:2260(i64vec3) GroupNonUniformIAdd 42 ExclusiveScan 2627
+            2629:   2251(ptr) AccessChain 34(data) 2624 2243
+            2630: 26(i64vec4) Load 2629
+            2631: 26(i64vec4) VectorShuffle 2630 2628 4 5 6 3
+                              Store 2629 2631
+            2632:      6(int) Load 8(invocation)
+            2633:   2251(ptr) AccessChain 34(data) 67 2243
+            2634: 26(i64vec4) Load 2633
+            2635: 26(i64vec4) GroupNonUniformIAdd 42 ExclusiveScan 2634
+            2636:   2251(ptr) AccessChain 34(data) 2632 2243
+                              Store 2636 2635
+            2637:      6(int) Load 8(invocation)
+            2638:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2639: 25(int64_t) Load 2638
+            2640: 25(int64_t) GroupNonUniformIMul 42 ExclusiveScan 2639
+            2641:   2244(ptr) AccessChain 34(data) 2637 2243 38
+                              Store 2641 2640
+            2642:      6(int) Load 8(invocation)
+            2643:   2251(ptr) AccessChain 34(data) 46 2243
+            2644: 26(i64vec4) Load 2643
+            2645:2250(i64vec2) VectorShuffle 2644 2644 0 1
+            2646:2250(i64vec2) GroupNonUniformIMul 42 ExclusiveScan 2645
+            2647:   2251(ptr) AccessChain 34(data) 2642 2243
+            2648: 26(i64vec4) Load 2647
+            2649: 26(i64vec4) VectorShuffle 2648 2646 4 5 2 3
+                              Store 2647 2649
+            2650:      6(int) Load 8(invocation)
+            2651:   2251(ptr) AccessChain 34(data) 57 2243
+            2652: 26(i64vec4) Load 2651
+            2653:2260(i64vec3) VectorShuffle 2652 2652 0 1 2
+            2654:2260(i64vec3) GroupNonUniformIMul 42 ExclusiveScan 2653
+            2655:   2251(ptr) AccessChain 34(data) 2650 2243
+            2656: 26(i64vec4) Load 2655
+            2657: 26(i64vec4) VectorShuffle 2656 2654 4 5 6 3
+                              Store 2655 2657
+            2658:      6(int) Load 8(invocation)
+            2659:   2251(ptr) AccessChain 34(data) 67 2243
+            2660: 26(i64vec4) Load 2659
+            2661: 26(i64vec4) GroupNonUniformIMul 42 ExclusiveScan 2660
+            2662:   2251(ptr) AccessChain 34(data) 2658 2243
+                              Store 2662 2661
+            2663:      6(int) Load 8(invocation)
+            2664:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2665: 25(int64_t) Load 2664
+            2666: 25(int64_t) GroupNonUniformSMin 42 ExclusiveScan 2665
+            2667:   2244(ptr) AccessChain 34(data) 2663 2243 38
+                              Store 2667 2666
+            2668:      6(int) Load 8(invocation)
+            2669:   2251(ptr) AccessChain 34(data) 46 2243
+            2670: 26(i64vec4) Load 2669
+            2671:2250(i64vec2) VectorShuffle 2670 2670 0 1
+            2672:2250(i64vec2) GroupNonUniformSMin 42 ExclusiveScan 2671
+            2673:   2251(ptr) AccessChain 34(data) 2668 2243
+            2674: 26(i64vec4) Load 2673
+            2675: 26(i64vec4) VectorShuffle 2674 2672 4 5 2 3
+                              Store 2673 2675
+            2676:      6(int) Load 8(invocation)
+            2677:   2251(ptr) AccessChain 34(data) 57 2243
+            2678: 26(i64vec4) Load 2677
+            2679:2260(i64vec3) VectorShuffle 2678 2678 0 1 2
+            2680:2260(i64vec3) GroupNonUniformSMin 42 ExclusiveScan 2679
+            2681:   2251(ptr) AccessChain 34(data) 2676 2243
+            2682: 26(i64vec4) Load 2681
+            2683: 26(i64vec4) VectorShuffle 2682 2680 4 5 6 3
+                              Store 2681 2683
+            2684:      6(int) Load 8(invocation)
+            2685:   2251(ptr) AccessChain 34(data) 67 2243
+            2686: 26(i64vec4) Load 2685
+            2687: 26(i64vec4) GroupNonUniformSMin 42 ExclusiveScan 2686
+            2688:   2251(ptr) AccessChain 34(data) 2684 2243
+                              Store 2688 2687
+            2689:      6(int) Load 8(invocation)
+            2690:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2691: 25(int64_t) Load 2690
+            2692: 25(int64_t) GroupNonUniformSMax 42 ExclusiveScan 2691
+            2693:   2244(ptr) AccessChain 34(data) 2689 2243 38
+                              Store 2693 2692
+            2694:      6(int) Load 8(invocation)
+            2695:   2251(ptr) AccessChain 34(data) 46 2243
+            2696: 26(i64vec4) Load 2695
+            2697:2250(i64vec2) VectorShuffle 2696 2696 0 1
+            2698:2250(i64vec2) GroupNonUniformSMax 42 ExclusiveScan 2697
+            2699:   2251(ptr) AccessChain 34(data) 2694 2243
+            2700: 26(i64vec4) Load 2699
+            2701: 26(i64vec4) VectorShuffle 2700 2698 4 5 2 3
+                              Store 2699 2701
+            2702:      6(int) Load 8(invocation)
+            2703:   2251(ptr) AccessChain 34(data) 57 2243
+            2704: 26(i64vec4) Load 2703
+            2705:2260(i64vec3) VectorShuffle 2704 2704 0 1 2
+            2706:2260(i64vec3) GroupNonUniformSMax 42 ExclusiveScan 2705
+            2707:   2251(ptr) AccessChain 34(data) 2702 2243
+            2708: 26(i64vec4) Load 2707
+            2709: 26(i64vec4) VectorShuffle 2708 2706 4 5 6 3
+                              Store 2707 2709
+            2710:      6(int) Load 8(invocation)
+            2711:   2251(ptr) AccessChain 34(data) 67 2243
+            2712: 26(i64vec4) Load 2711
+            2713: 26(i64vec4) GroupNonUniformSMax 42 ExclusiveScan 2712
+            2714:   2251(ptr) AccessChain 34(data) 2710 2243
+                              Store 2714 2713
+            2715:      6(int) Load 8(invocation)
+            2716:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2717: 25(int64_t) Load 2716
+            2718: 25(int64_t) GroupNonUniformBitwiseAnd 42 ExclusiveScan 2717
+            2719:   2244(ptr) AccessChain 34(data) 2715 2243 38
+                              Store 2719 2718
+            2720:      6(int) Load 8(invocation)
+            2721:   2251(ptr) AccessChain 34(data) 46 2243
+            2722: 26(i64vec4) Load 2721
+            2723:2250(i64vec2) VectorShuffle 2722 2722 0 1
+            2724:2250(i64vec2) GroupNonUniformBitwiseAnd 42 ExclusiveScan 2723
+            2725:   2251(ptr) AccessChain 34(data) 2720 2243
+            2726: 26(i64vec4) Load 2725
+            2727: 26(i64vec4) VectorShuffle 2726 2724 4 5 2 3
+                              Store 2725 2727
+            2728:      6(int) Load 8(invocation)
+            2729:   2251(ptr) AccessChain 34(data) 57 2243
+            2730: 26(i64vec4) Load 2729
+            2731:2260(i64vec3) VectorShuffle 2730 2730 0 1 2
+            2732:2260(i64vec3) GroupNonUniformBitwiseAnd 42 ExclusiveScan 2731
+            2733:   2251(ptr) AccessChain 34(data) 2728 2243
+            2734: 26(i64vec4) Load 2733
+            2735: 26(i64vec4) VectorShuffle 2734 2732 4 5 6 3
+                              Store 2733 2735
+            2736:      6(int) Load 8(invocation)
+            2737:   2251(ptr) AccessChain 34(data) 67 2243
+            2738: 26(i64vec4) Load 2737
+            2739: 26(i64vec4) GroupNonUniformBitwiseAnd 42 ExclusiveScan 2738
+            2740:   2251(ptr) AccessChain 34(data) 2736 2243
+                              Store 2740 2739
+            2741:      6(int) Load 8(invocation)
+            2742:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2743: 25(int64_t) Load 2742
+            2744: 25(int64_t) GroupNonUniformBitwiseOr 42 ExclusiveScan 2743
+            2745:   2244(ptr) AccessChain 34(data) 2741 2243 38
+                              Store 2745 2744
+            2746:      6(int) Load 8(invocation)
+            2747:   2251(ptr) AccessChain 34(data) 46 2243
+            2748: 26(i64vec4) Load 2747
+            2749:2250(i64vec2) VectorShuffle 2748 2748 0 1
+            2750:2250(i64vec2) GroupNonUniformBitwiseOr 42 ExclusiveScan 2749
+            2751:   2251(ptr) AccessChain 34(data) 2746 2243
+            2752: 26(i64vec4) Load 2751
+            2753: 26(i64vec4) VectorShuffle 2752 2750 4 5 2 3
+                              Store 2751 2753
+            2754:      6(int) Load 8(invocation)
+            2755:   2251(ptr) AccessChain 34(data) 57 2243
+            2756: 26(i64vec4) Load 2755
+            2757:2260(i64vec3) VectorShuffle 2756 2756 0 1 2
+            2758:2260(i64vec3) GroupNonUniformBitwiseOr 42 ExclusiveScan 2757
+            2759:   2251(ptr) AccessChain 34(data) 2754 2243
+            2760: 26(i64vec4) Load 2759
+            2761: 26(i64vec4) VectorShuffle 2760 2758 4 5 6 3
+                              Store 2759 2761
+            2762:      6(int) Load 8(invocation)
+            2763:   2251(ptr) AccessChain 34(data) 67 2243
+            2764: 26(i64vec4) Load 2763
+            2765: 26(i64vec4) GroupNonUniformBitwiseOr 42 ExclusiveScan 2764
+            2766:   2251(ptr) AccessChain 34(data) 2762 2243
+                              Store 2766 2765
+            2767:      6(int) Load 8(invocation)
+            2768:   2244(ptr) AccessChain 34(data) 37 2243 38
+            2769: 25(int64_t) Load 2768
+            2770: 25(int64_t) GroupNonUniformBitwiseXor 42 ExclusiveScan 2769
+            2771:   2244(ptr) AccessChain 34(data) 2767 2243 38
+                              Store 2771 2770
+            2772:      6(int) Load 8(invocation)
+            2773:   2251(ptr) AccessChain 34(data) 46 2243
+            2774: 26(i64vec4) Load 2773
+            2775:2250(i64vec2) VectorShuffle 2774 2774 0 1
+            2776:2250(i64vec2) GroupNonUniformBitwiseXor 42 ExclusiveScan 2775
+            2777:   2251(ptr) AccessChain 34(data) 2772 2243
+            2778: 26(i64vec4) Load 2777
+            2779: 26(i64vec4) VectorShuffle 2778 2776 4 5 2 3
+                              Store 2777 2779
+            2780:      6(int) Load 8(invocation)
+            2781:   2251(ptr) AccessChain 34(data) 57 2243
+            2782: 26(i64vec4) Load 2781
+            2783:2260(i64vec3) VectorShuffle 2782 2782 0 1 2
+            2784:2260(i64vec3) GroupNonUniformBitwiseXor 42 ExclusiveScan 2783
+            2785:   2251(ptr) AccessChain 34(data) 2780 2243
+            2786: 26(i64vec4) Load 2785
+            2787: 26(i64vec4) VectorShuffle 2786 2784 4 5 6 3
+                              Store 2785 2787
+            2788:      6(int) Load 8(invocation)
+            2789:   2251(ptr) AccessChain 34(data) 67 2243
+            2790: 26(i64vec4) Load 2789
+            2791: 26(i64vec4) GroupNonUniformBitwiseXor 42 ExclusiveScan 2790
+            2792:   2251(ptr) AccessChain 34(data) 2788 2243
+                              Store 2792 2791
+            2793:      6(int) Load 8(invocation)
+            2796:   2795(ptr) AccessChain 34(data) 37 2794 38
+            2797: 27(int64_t) Load 2796
+            2798: 27(int64_t) GroupNonUniformIAdd 42 Reduce 2797
+            2799:   2795(ptr) AccessChain 34(data) 2793 2794 38
+                              Store 2799 2798
+            2800:      6(int) Load 8(invocation)
+            2803:   2802(ptr) AccessChain 34(data) 46 2794
+            2804: 28(i64vec4) Load 2803
+            2805:2801(i64vec2) VectorShuffle 2804 2804 0 1
+            2806:2801(i64vec2) GroupNonUniformIAdd 42 Reduce 2805
+            2807:   2802(ptr) AccessChain 34(data) 2800 2794
+            2808: 28(i64vec4) Load 2807
+            2809: 28(i64vec4) VectorShuffle 2808 2806 4 5 2 3
+                              Store 2807 2809
+            2810:      6(int) Load 8(invocation)
+            2812:   2802(ptr) AccessChain 34(data) 57 2794
+            2813: 28(i64vec4) Load 2812
+            2814:2811(i64vec3) VectorShuffle 2813 2813 0 1 2
+            2815:2811(i64vec3) GroupNonUniformIAdd 42 Reduce 2814
+            2816:   2802(ptr) AccessChain 34(data) 2810 2794
+            2817: 28(i64vec4) Load 2816
+            2818: 28(i64vec4) VectorShuffle 2817 2815 4 5 6 3
+                              Store 2816 2818
+            2819:      6(int) Load 8(invocation)
+            2820:   2802(ptr) AccessChain 34(data) 67 2794
+            2821: 28(i64vec4) Load 2820
+            2822: 28(i64vec4) GroupNonUniformIAdd 42 Reduce 2821
+            2823:   2802(ptr) AccessChain 34(data) 2819 2794
+                              Store 2823 2822
+            2824:      6(int) Load 8(invocation)
+            2825:   2795(ptr) AccessChain 34(data) 37 2794 38
+            2826: 27(int64_t) Load 2825
+            2827: 27(int64_t) GroupNonUniformIMul 42 Reduce 2826
+            2828:   2795(ptr) AccessChain 34(data) 2824 2794 38
+                              Store 2828 2827
+            2829:      6(int) Load 8(invocation)
+            2830:   2802(ptr) AccessChain 34(data) 46 2794
+            2831: 28(i64vec4) Load 2830
+            2832:2801(i64vec2) VectorShuffle 2831 2831 0 1
+            2833:2801(i64vec2) GroupNonUniformIMul 42 Reduce 2832
+            2834:   2802(ptr) AccessChain 34(data) 2829 2794
+            2835: 28(i64vec4) Load 2834
+            2836: 28(i64vec4) VectorShuffle 2835 2833 4 5 2 3
+                              Store 2834 2836
+            2837:      6(int) Load 8(invocation)
+            2838:   2802(ptr) AccessChain 34(data) 57 2794
+            2839: 28(i64vec4) Load 2838
+            2840:2811(i64vec3) VectorShuffle 2839 2839 0 1 2
+            2841:2811(i64vec3) GroupNonUniformIMul 42 Reduce 2840
+            2842:   2802(ptr) AccessChain 34(data) 2837 2794
+            2843: 28(i64vec4) Load 2842
+            2844: 28(i64vec4) VectorShuffle 2843 2841 4 5 6 3
+                              Store 2842 2844
+            2845:      6(int) Load 8(invocation)
+            2846:   2802(ptr) AccessChain 34(data) 67 2794
+            2847: 28(i64vec4) Load 2846
+            2848: 28(i64vec4) GroupNonUniformIMul 42 Reduce 2847
+            2849:   2802(ptr) AccessChain 34(data) 2845 2794
+                              Store 2849 2848
+            2850:      6(int) Load 8(invocation)
+            2851:   2795(ptr) AccessChain 34(data) 37 2794 38
+            2852: 27(int64_t) Load 2851
+            2853: 27(int64_t) GroupNonUniformUMin 42 Reduce 2852
+            2854:   2795(ptr) AccessChain 34(data) 2850 2794 38
+                              Store 2854 2853
+            2855:      6(int) Load 8(invocation)
+            2856:   2802(ptr) AccessChain 34(data) 46 2794
+            2857: 28(i64vec4) Load 2856
+            2858:2801(i64vec2) VectorShuffle 2857 2857 0 1
+            2859:2801(i64vec2) GroupNonUniformUMin 42 Reduce 2858
+            2860:   2802(ptr) AccessChain 34(data) 2855 2794
+            2861: 28(i64vec4) Load 2860
+            2862: 28(i64vec4) VectorShuffle 2861 2859 4 5 2 3
+                              Store 2860 2862
+            2863:      6(int) Load 8(invocation)
+            2864:   2802(ptr) AccessChain 34(data) 57 2794
+            2865: 28(i64vec4) Load 2864
+            2866:2811(i64vec3) VectorShuffle 2865 2865 0 1 2
+            2867:2811(i64vec3) GroupNonUniformUMin 42 Reduce 2866
+            2868:   2802(ptr) AccessChain 34(data) 2863 2794
+            2869: 28(i64vec4) Load 2868
+            2870: 28(i64vec4) VectorShuffle 2869 2867 4 5 6 3
+                              Store 2868 2870
+            2871:      6(int) Load 8(invocation)
+            2872:   2802(ptr) AccessChain 34(data) 67 2794
+            2873: 28(i64vec4) Load 2872
+            2874: 28(i64vec4) GroupNonUniformUMin 42 Reduce 2873
+            2875:   2802(ptr) AccessChain 34(data) 2871 2794
+                              Store 2875 2874
+            2876:      6(int) Load 8(invocation)
+            2877:   2795(ptr) AccessChain 34(data) 37 2794 38
+            2878: 27(int64_t) Load 2877
+            2879: 27(int64_t) GroupNonUniformUMax 42 Reduce 2878
+            2880:   2795(ptr) AccessChain 34(data) 2876 2794 38
+                              Store 2880 2879
+            2881:      6(int) Load 8(invocation)
+            2882:   2802(ptr) AccessChain 34(data) 46 2794
+            2883: 28(i64vec4) Load 2882
+            2884:2801(i64vec2) VectorShuffle 2883 2883 0 1
+            2885:2801(i64vec2) GroupNonUniformUMax 42 Reduce 2884
+            2886:   2802(ptr) AccessChain 34(data) 2881 2794
+            2887: 28(i64vec4) Load 2886
+            2888: 28(i64vec4) VectorShuffle 2887 2885 4 5 2 3
+                              Store 2886 2888
+            2889:      6(int) Load 8(invocation)
+            2890:   2802(ptr) AccessChain 34(data) 57 2794
+            2891: 28(i64vec4) Load 2890
+            2892:2811(i64vec3) VectorShuffle 2891 2891 0 1 2
+            2893:2811(i64vec3) GroupNonUniformUMax 42 Reduce 2892
+            2894:   2802(ptr) AccessChain 34(data) 2889 2794
+            2895: 28(i64vec4) Load 2894
+            2896: 28(i64vec4) VectorShuffle 2895 2893 4 5 6 3
+                              Store 2894 2896
+            2897:      6(int) Load 8(invocation)
+            2898:   2802(ptr) AccessChain 34(data) 67 2794
+            2899: 28(i64vec4) Load 2898
+            2900: 28(i64vec4) GroupNonUniformUMax 42 Reduce 2899
+            2901:   2802(ptr) AccessChain 34(data) 2897 2794
+                              Store 2901 2900
+            2902:      6(int) Load 8(invocation)
+            2903:   2795(ptr) AccessChain 34(data) 37 2794 38
+            2904: 27(int64_t) Load 2903
+            2905: 27(int64_t) GroupNonUniformBitwiseAnd 42 Reduce 2904
+            2906:   2795(ptr) AccessChain 34(data) 2902 2794 38
+                              Store 2906 2905
+            2907:      6(int) Load 8(invocation)
+            2908:   2802(ptr) AccessChain 34(data) 46 2794
+            2909: 28(i64vec4) Load 2908
+            2910:2801(i64vec2) VectorShuffle 2909 2909 0 1
+            2911:2801(i64vec2) GroupNonUniformBitwiseAnd 42 Reduce 2910
+            2912:   2802(ptr) AccessChain 34(data) 2907 2794
+            2913: 28(i64vec4) Load 2912
+            2914: 28(i64vec4) VectorShuffle 2913 2911 4 5 2 3
+                              Store 2912 2914
+            2915:      6(int) Load 8(invocation)
+            2916:   2802(ptr) AccessChain 34(data) 57 2794
+            2917: 28(i64vec4) Load 2916
+            2918:2811(i64vec3) VectorShuffle 2917 2917 0 1 2
+            2919:2811(i64vec3) GroupNonUniformBitwiseAnd 42 Reduce 2918
+            2920:   2802(ptr) AccessChain 34(data) 2915 2794
+            2921: 28(i64vec4) Load 2920
+            2922: 28(i64vec4) VectorShuffle 2921 2919 4 5 6 3
+                              Store 2920 2922
+            2923:      6(int) Load 8(invocation)
+            2924:   2802(ptr) AccessChain 34(data) 67 2794
+            2925: 28(i64vec4) Load 2924
+            2926: 28(i64vec4) GroupNonUniformBitwiseAnd 42 Reduce 2925
+            2927:   2802(ptr) AccessChain 34(data) 2923 2794
+                              Store 2927 2926
+            2928:      6(int) Load 8(invocation)
+            2929:   2795(ptr) AccessChain 34(data) 37 2794 38
+            2930: 27(int64_t) Load 2929
+            2931: 27(int64_t) GroupNonUniformBitwiseOr 42 Reduce 2930
+            2932:   2795(ptr) AccessChain 34(data) 2928 2794 38
+                              Store 2932 2931
+            2933:      6(int) Load 8(invocation)
+            2934:   2802(ptr) AccessChain 34(data) 46 2794
+            2935: 28(i64vec4) Load 2934
+            2936:2801(i64vec2) VectorShuffle 2935 2935 0 1
+            2937:2801(i64vec2) GroupNonUniformBitwiseOr 42 Reduce 2936
+            2938:   2802(ptr) AccessChain 34(data) 2933 2794
+            2939: 28(i64vec4) Load 2938
+            2940: 28(i64vec4) VectorShuffle 2939 2937 4 5 2 3
+                              Store 2938 2940
+            2941:      6(int) Load 8(invocation)
+            2942:   2802(ptr) AccessChain 34(data) 57 2794
+            2943: 28(i64vec4) Load 2942
+            2944:2811(i64vec3) VectorShuffle 2943 2943 0 1 2
+            2945:2811(i64vec3) GroupNonUniformBitwiseOr 42 Reduce 2944
+            2946:   2802(ptr) AccessChain 34(data) 2941 2794
+            2947: 28(i64vec4) Load 2946
+            2948: 28(i64vec4) VectorShuffle 2947 2945 4 5 6 3
+                              Store 2946 2948
+            2949:      6(int) Load 8(invocation)
+            2950:   2802(ptr) AccessChain 34(data) 67 2794
+            2951: 28(i64vec4) Load 2950
+            2952: 28(i64vec4) GroupNonUniformBitwiseOr 42 Reduce 2951
+            2953:   2802(ptr) AccessChain 34(data) 2949 2794
+                              Store 2953 2952
+            2954:      6(int) Load 8(invocation)
+            2955:   2795(ptr) AccessChain 34(data) 37 2794 38
+            2956: 27(int64_t) Load 2955
+            2957: 27(int64_t) GroupNonUniformBitwiseXor 42 Reduce 2956
+            2958:   2795(ptr) AccessChain 34(data) 2954 2794 38
+                              Store 2958 2957
+            2959:      6(int) Load 8(invocation)
+            2960:   2802(ptr) AccessChain 34(data) 46 2794
+            2961: 28(i64vec4) Load 2960
+            2962:2801(i64vec2) VectorShuffle 2961 2961 0 1
+            2963:2801(i64vec2) GroupNonUniformBitwiseXor 42 Reduce 2962
+            2964:   2802(ptr) AccessChain 34(data) 2959 2794
+            2965: 28(i64vec4) Load 2964
+            2966: 28(i64vec4) VectorShuffle 2965 2963 4 5 2 3
+                              Store 2964 2966
+            2967:      6(int) Load 8(invocation)
+            2968:   2802(ptr) AccessChain 34(data) 57 2794
+            2969: 28(i64vec4) Load 2968
+            2970:2811(i64vec3) VectorShuffle 2969 2969 0 1 2
+            2971:2811(i64vec3) GroupNonUniformBitwiseXor 42 Reduce 2970
+            2972:   2802(ptr) AccessChain 34(data) 2967 2794
+            2973: 28(i64vec4) Load 2972
+            2974: 28(i64vec4) VectorShuffle 2973 2971 4 5 6 3
+                              Store 2972 2974
+            2975:      6(int) Load 8(invocation)
+            2976:   2802(ptr) AccessChain 34(data) 67 2794
+            2977: 28(i64vec4) Load 2976
+            2978: 28(i64vec4) GroupNonUniformBitwiseXor 42 Reduce 2977
+            2979:   2802(ptr) AccessChain 34(data) 2975 2794
+                              Store 2979 2978
+            2980:      6(int) Load 8(invocation)
+            2981:   2795(ptr) AccessChain 34(data) 37 2794 38
+            2982: 27(int64_t) Load 2981
+            2983: 27(int64_t) GroupNonUniformIAdd 42 InclusiveScan 2982
+            2984:   2795(ptr) AccessChain 34(data) 2980 2794 38
+                              Store 2984 2983
+            2985:      6(int) Load 8(invocation)
+            2986:   2802(ptr) AccessChain 34(data) 46 2794
+            2987: 28(i64vec4) Load 2986
+            2988:2801(i64vec2) VectorShuffle 2987 2987 0 1
+            2989:2801(i64vec2) GroupNonUniformIAdd 42 InclusiveScan 2988
+            2990:   2802(ptr) AccessChain 34(data) 2985 2794
+            2991: 28(i64vec4) Load 2990
+            2992: 28(i64vec4) VectorShuffle 2991 2989 4 5 2 3
+                              Store 2990 2992
+            2993:      6(int) Load 8(invocation)
+            2994:   2802(ptr) AccessChain 34(data) 57 2794
+            2995: 28(i64vec4) Load 2994
+            2996:2811(i64vec3) VectorShuffle 2995 2995 0 1 2
+            2997:2811(i64vec3) GroupNonUniformIAdd 42 InclusiveScan 2996
+            2998:   2802(ptr) AccessChain 34(data) 2993 2794
+            2999: 28(i64vec4) Load 2998
+            3000: 28(i64vec4) VectorShuffle 2999 2997 4 5 6 3
+                              Store 2998 3000
+            3001:      6(int) Load 8(invocation)
+            3002:   2802(ptr) AccessChain 34(data) 67 2794
+            3003: 28(i64vec4) Load 3002
+            3004: 28(i64vec4) GroupNonUniformIAdd 42 InclusiveScan 3003
+            3005:   2802(ptr) AccessChain 34(data) 3001 2794
+                              Store 3005 3004
+            3006:      6(int) Load 8(invocation)
+            3007:   2795(ptr) AccessChain 34(data) 37 2794 38
+            3008: 27(int64_t) Load 3007
+            3009: 27(int64_t) GroupNonUniformIMul 42 InclusiveScan 3008
+            3010:   2795(ptr) AccessChain 34(data) 3006 2794 38
+                              Store 3010 3009
+            3011:      6(int) Load 8(invocation)
+            3012:   2802(ptr) AccessChain 34(data) 46 2794
+            3013: 28(i64vec4) Load 3012
+            3014:2801(i64vec2) VectorShuffle 3013 3013 0 1
+            3015:2801(i64vec2) GroupNonUniformIMul 42 InclusiveScan 3014
+            3016:   2802(ptr) AccessChain 34(data) 3011 2794
+            3017: 28(i64vec4) Load 3016
+            3018: 28(i64vec4) VectorShuffle 3017 3015 4 5 2 3
+                              Store 3016 3018
+            3019:      6(int) Load 8(invocation)
+            3020:   2802(ptr) AccessChain 34(data) 57 2794
+            3021: 28(i64vec4) Load 3020
+            3022:2811(i64vec3) VectorShuffle 3021 3021 0 1 2
+            3023:2811(i64vec3) GroupNonUniformIMul 42 InclusiveScan 3022
+            3024:   2802(ptr) AccessChain 34(data) 3019 2794
+            3025: 28(i64vec4) Load 3024
+            3026: 28(i64vec4) VectorShuffle 3025 3023 4 5 6 3
+                              Store 3024 3026
+            3027:      6(int) Load 8(invocation)
+            3028:   2802(ptr) AccessChain 34(data) 67 2794
+            3029: 28(i64vec4) Load 3028
+            3030: 28(i64vec4) GroupNonUniformIMul 42 InclusiveScan 3029
+            3031:   2802(ptr) AccessChain 34(data) 3027 2794
+                              Store 3031 3030
+            3032:      6(int) Load 8(invocation)
+            3033:   2795(ptr) AccessChain 34(data) 37 2794 38
+            3034: 27(int64_t) Load 3033
+            3035: 27(int64_t) GroupNonUniformUMin 42 InclusiveScan 3034
+            3036:   2795(ptr) AccessChain 34(data) 3032 2794 38
+                              Store 3036 3035
+            3037:      6(int) Load 8(invocation)
+            3038:   2802(ptr) AccessChain 34(data) 46 2794
+            3039: 28(i64vec4) Load 3038
+            3040:2801(i64vec2) VectorShuffle 3039 3039 0 1
+            3041:2801(i64vec2) GroupNonUniformUMin 42 InclusiveScan 3040
+            3042:   2802(ptr) AccessChain 34(data) 3037 2794
+            3043: 28(i64vec4) Load 3042
+            3044: 28(i64vec4) VectorShuffle 3043 3041 4 5 2 3
+                              Store 3042 3044
+            3045:      6(int) Load 8(invocation)
+            3046:   2802(ptr) AccessChain 34(data) 57 2794
+            3047: 28(i64vec4) Load 3046
+            3048:2811(i64vec3) VectorShuffle 3047 3047 0 1 2
+            3049:2811(i64vec3) GroupNonUniformUMin 42 InclusiveScan 3048
+            3050:   2802(ptr) AccessChain 34(data) 3045 2794
+            3051: 28(i64vec4) Load 3050
+            3052: 28(i64vec4) VectorShuffle 3051 3049 4 5 6 3
+                              Store 3050 3052
+            3053:      6(int) Load 8(invocation)
+            3054:   2802(ptr) AccessChain 34(data) 67 2794
+            3055: 28(i64vec4) Load 3054
+            3056: 28(i64vec4) GroupNonUniformUMin 42 InclusiveScan 3055
+            3057:   2802(ptr) AccessChain 34(data) 3053 2794
+                              Store 3057 3056
+            3058:      6(int) Load 8(invocation)
+            3059:   2795(ptr) AccessChain 34(data) 37 2794 38
+            3060: 27(int64_t) Load 3059
+            3061: 27(int64_t) GroupNonUniformUMax 42 InclusiveScan 3060
+            3062:   2795(ptr) AccessChain 34(data) 3058 2794 38
+                              Store 3062 3061
+            3063:      6(int) Load 8(invocation)
+            3064:   2802(ptr) AccessChain 34(data) 46 2794
+            3065: 28(i64vec4) Load 3064
+            3066:2801(i64vec2) VectorShuffle 3065 3065 0 1
+            3067:2801(i64vec2) GroupNonUniformUMax 42 InclusiveScan 3066
+            3068:   2802(ptr) AccessChain 34(data) 3063 2794
+            3069: 28(i64vec4) Load 3068
+            3070: 28(i64vec4) VectorShuffle 3069 3067 4 5 2 3
+                              Store 3068 3070
+            3071:      6(int) Load 8(invocation)
+            3072:   2802(ptr) AccessChain 34(data) 57 2794
+            3073: 28(i64vec4) Load 3072
+            3074:2811(i64vec3) VectorShuffle 3073 3073 0 1 2
+            3075:2811(i64vec3) GroupNonUniformUMax 42 InclusiveScan 3074
+            3076:   2802(ptr) AccessChain 34(data) 3071 2794
+            3077: 28(i64vec4) Load 3076
+            3078: 28(i64vec4) VectorShuffle 3077 3075 4 5 6 3
+                              Store 3076 3078
+            3079:      6(int) Load 8(invocation)
+            3080:   2802(ptr) AccessChain 34(data) 67 2794
+            3081: 28(i64vec4) Load 3080
+            3082: 28(i64vec4) GroupNonUniformUMax 42 InclusiveScan 3081
+            3083:   2802(ptr) AccessChain 34(data) 3079 2794
+                              Store 3083 3082
+            3084:      6(int) Load 8(invocation)
+            3085:   2795(ptr) AccessChain 34(data) 37 2794 38
+            3086: 27(int64_t) Load 3085
+            3087: 27(int64_t) GroupNonUniformBitwiseAnd 42 InclusiveScan 3086
+            3088:   2795(ptr) AccessChain 34(data) 3084 2794 38
+                              Store 3088 3087
+            3089:      6(int) Load 8(invocation)
+            3090:   2802(ptr) AccessChain 34(data) 46 2794
+            3091: 28(i64vec4) Load 3090
+            3092:2801(i64vec2) VectorShuffle 3091 3091 0 1
+            3093:2801(i64vec2) GroupNonUniformBitwiseAnd 42 InclusiveScan 3092
+            3094:   2802(ptr) AccessChain 34(data) 3089 2794
+            3095: 28(i64vec4) Load 3094
+            3096: 28(i64vec4) VectorShuffle 3095 3093 4 5 2 3
+                              Store 3094 3096
+            3097:      6(int) Load 8(invocation)
+            3098:   2802(ptr) AccessChain 34(data) 57 2794
+            3099: 28(i64vec4) Load 3098
+            3100:2811(i64vec3) VectorShuffle 3099 3099 0 1 2
+            3101:2811(i64vec3) GroupNonUniformBitwiseAnd 42 InclusiveScan 3100
+            3102:   2802(ptr) AccessChain 34(data) 3097 2794
+            3103: 28(i64vec4) Load 3102
+            3104: 28(i64vec4) VectorShuffle 3103 3101 4 5 6 3
+                              Store 3102 3104
+            3105:      6(int) Load 8(invocation)
+            3106:   2802(ptr) AccessChain 34(data) 67 2794
+            3107: 28(i64vec4) Load 3106
+            3108: 28(i64vec4) GroupNonUniformBitwiseAnd 42 InclusiveScan 3107
+            3109:   2802(ptr) AccessChain 34(data) 3105 2794
+                              Store 3109 3108
+            3110:      6(int) Load 8(invocation)
+            3111:   2795(ptr) AccessChain 34(data) 37 2794 38
+            3112: 27(int64_t) Load 3111
+            3113: 27(int64_t) GroupNonUniformBitwiseOr 42 InclusiveScan 3112
+            3114:   2795(ptr) AccessChain 34(data) 3110 2794 38
+                              Store 3114 3113
+            3115:      6(int) Load 8(invocation)
+            3116:   2802(ptr) AccessChain 34(data) 46 2794
+            3117: 28(i64vec4) Load 3116
+            3118:2801(i64vec2) VectorShuffle 3117 3117 0 1
+            3119:2801(i64vec2) GroupNonUniformBitwiseOr 42 InclusiveScan 3118
+            3120:   2802(ptr) AccessChain 34(data) 3115 2794
+            3121: 28(i64vec4) Load 3120
+            3122: 28(i64vec4) VectorShuffle 3121 3119 4 5 2 3
+                              Store 3120 3122
+            3123:      6(int) Load 8(invocation)
+            3124:   2802(ptr) AccessChain 34(data) 57 2794
+            3125: 28(i64vec4) Load 3124
+            3126:2811(i64vec3) VectorShuffle 3125 3125 0 1 2
+            3127:2811(i64vec3) GroupNonUniformBitwiseOr 42 InclusiveScan 3126
+            3128:   2802(ptr) AccessChain 34(data) 3123 2794
+            3129: 28(i64vec4) Load 3128
+            3130: 28(i64vec4) VectorShuffle 3129 3127 4 5 6 3
+                              Store 3128 3130
+            3131:      6(int) Load 8(invocation)
+            3132:   2802(ptr) AccessChain 34(data) 67 2794
+            3133: 28(i64vec4) Load 3132
+            3134: 28(i64vec4) GroupNonUniformBitwiseOr 42 InclusiveScan 3133
+            3135:   2802(ptr) AccessChain 34(data) 3131 2794
+                              Store 3135 3134
+            3136:      6(int) Load 8(invocation)
+            3137:   2795(ptr) AccessChain 34(data) 37 2794 38
+            3138: 27(int64_t) Load 3137
+            3139: 27(int64_t) GroupNonUniformBitwiseXor 42 InclusiveScan 3138
+            3140:   2795(ptr) AccessChain 34(data) 3136 2794 38
+                              Store 3140 3139
+            3141:      6(int) Load 8(invocation)
+            3142:   2802(ptr) AccessChain 34(data) 46 2794
+            3143: 28(i64vec4) Load 3142
+            3144:2801(i64vec2) VectorShuffle 3143 3143 0 1
+            3145:2801(i64vec2) GroupNonUniformBitwiseXor 42 InclusiveScan 3144
+            3146:   2802(ptr) AccessChain 34(data) 3141 2794
+            3147: 28(i64vec4) Load 3146
+            3148: 28(i64vec4) VectorShuffle 3147 3145 4 5 2 3
+                              Store 3146 3148
+            3149:      6(int) Load 8(invocation)
+            3150:   2802(ptr) AccessChain 34(data) 57 2794
+            3151: 28(i64vec4) Load 3150
+            3152:2811(i64vec3) VectorShuffle 3151 3151 0 1 2
+            3153:2811(i64vec3) GroupNonUniformBitwiseXor 42 InclusiveScan 3152
+            3154:   2802(ptr) AccessChain 34(data) 3149 2794
+            3155: 28(i64vec4) Load 3154
+            3156: 28(i64vec4) VectorShuffle 3155 3153 4 5 6 3
+                              Store 3154 3156
+            3157:      6(int) Load 8(invocation)
+            3158:   2802(ptr) AccessChain 34(data) 67 2794
+            3159: 28(i64vec4) Load 3158
+            3160: 28(i64vec4) GroupNonUniformBitwiseXor 42 InclusiveScan 3159
+            3161:   2802(ptr) AccessChain 34(data) 3157 2794
+                              Store 3161 3160
+            3162:      6(int) Load 8(invocation)
+            3163:   2795(ptr) AccessChain 34(data) 37 2794 38
+            3164: 27(int64_t) Load 3163
+            3165: 27(int64_t) GroupNonUniformIAdd 42 ExclusiveScan 3164
+            3166:   2795(ptr) AccessChain 34(data) 3162 2794 38
+                              Store 3166 3165
+            3167:      6(int) Load 8(invocation)
+            3168:   2802(ptr) AccessChain 34(data) 46 2794
+            3169: 28(i64vec4) Load 3168
+            3170:2801(i64vec2) VectorShuffle 3169 3169 0 1
+            3171:2801(i64vec2) GroupNonUniformIAdd 42 ExclusiveScan 3170
+            3172:   2802(ptr) AccessChain 34(data) 3167 2794
+            3173: 28(i64vec4) Load 3172
+            3174: 28(i64vec4) VectorShuffle 3173 3171 4 5 2 3
+                              Store 3172 3174
+            3175:      6(int) Load 8(invocation)
+            3176:   2802(ptr) AccessChain 34(data) 57 2794
+            3177: 28(i64vec4) Load 3176
+            3178:2811(i64vec3) VectorShuffle 3177 3177 0 1 2
+            3179:2811(i64vec3) GroupNonUniformIAdd 42 ExclusiveScan 3178
+            3180:   2802(ptr) AccessChain 34(data) 3175 2794
+            3181: 28(i64vec4) Load 3180
+            3182: 28(i64vec4) VectorShuffle 3181 3179 4 5 6 3
+                              Store 3180 3182
+            3183:      6(int) Load 8(invocation)
+            3184:   2802(ptr) AccessChain 34(data) 67 2794
+            3185: 28(i64vec4) Load 3184
+            3186: 28(i64vec4) GroupNonUniformIAdd 42 ExclusiveScan 3185
+            3187:   2802(ptr) AccessChain 34(data) 3183 2794
+                              Store 3187 3186
+            3188:      6(int) Load 8(invocation)
+            3189:   2795(ptr) AccessChain 34(data) 37 2794 38
+            3190: 27(int64_t) Load 3189
+            3191: 27(int64_t) GroupNonUniformIMul 42 ExclusiveScan 3190
+            3192:   2795(ptr) AccessChain 34(data) 3188 2794 38
+                              Store 3192 3191
+            3193:      6(int) Load 8(invocation)
+            3194:   2802(ptr) AccessChain 34(data) 46 2794
+            3195: 28(i64vec4) Load 3194
+            3196:2801(i64vec2) VectorShuffle 3195 3195 0 1
+            3197:2801(i64vec2) GroupNonUniformIMul 42 ExclusiveScan 3196
+            3198:   2802(ptr) AccessChain 34(data) 3193 2794
+            3199: 28(i64vec4) Load 3198
+            3200: 28(i64vec4) VectorShuffle 3199 3197 4 5 2 3
+                              Store 3198 3200
+            3201:      6(int) Load 8(invocation)
+            3202:   2802(ptr) AccessChain 34(data) 57 2794
+            3203: 28(i64vec4) Load 3202
+            3204:2811(i64vec3) VectorShuffle 3203 3203 0 1 2
+            3205:2811(i64vec3) GroupNonUniformIMul 42 ExclusiveScan 3204
+            3206:   2802(ptr) AccessChain 34(data) 3201 2794
+            3207: 28(i64vec4) Load 3206
+            3208: 28(i64vec4) VectorShuffle 3207 3205 4 5 6 3
+                              Store 3206 3208
+            3209:      6(int) Load 8(invocation)
+            3210:   2802(ptr) AccessChain 34(data) 67 2794
+            3211: 28(i64vec4) Load 3210
+            3212: 28(i64vec4) GroupNonUniformIMul 42 ExclusiveScan 3211
+            3213:   2802(ptr) AccessChain 34(data) 3209 2794
+                              Store 3213 3212
+            3214:      6(int) Load 8(invocation)
+            3215:   2795(ptr) AccessChain 34(data) 37 2794 38
+            3216: 27(int64_t) Load 3215
+            3217: 27(int64_t) GroupNonUniformUMin 42 ExclusiveScan 3216
+            3218:   2795(ptr) AccessChain 34(data) 3214 2794 38
+                              Store 3218 3217
+            3219:      6(int) Load 8(invocation)
+            3220:   2802(ptr) AccessChain 34(data) 46 2794
+            3221: 28(i64vec4) Load 3220
+            3222:2801(i64vec2) VectorShuffle 3221 3221 0 1
+            3223:2801(i64vec2) GroupNonUniformUMin 42 ExclusiveScan 3222
+            3224:   2802(ptr) AccessChain 34(data) 3219 2794
+            3225: 28(i64vec4) Load 3224
+            3226: 28(i64vec4) VectorShuffle 3225 3223 4 5 2 3
+                              Store 3224 3226
+            3227:      6(int) Load 8(invocation)
+            3228:   2802(ptr) AccessChain 34(data) 57 2794
+            3229: 28(i64vec4) Load 3228
+            3230:2811(i64vec3) VectorShuffle 3229 3229 0 1 2
+            3231:2811(i64vec3) GroupNonUniformUMin 42 ExclusiveScan 3230
+            3232:   2802(ptr) AccessChain 34(data) 3227 2794
+            3233: 28(i64vec4) Load 3232
+            3234: 28(i64vec4) VectorShuffle 3233 3231 4 5 6 3
+                              Store 3232 3234
+            3235:      6(int) Load 8(invocation)
+            3236:   2802(ptr) AccessChain 34(data) 67 2794
+            3237: 28(i64vec4) Load 3236
+            3238: 28(i64vec4) GroupNonUniformUMin 42 ExclusiveScan 3237
+            3239:   2802(ptr) AccessChain 34(data) 3235 2794
+                              Store 3239 3238
+            3240:      6(int) Load 8(invocation)
+            3241:   2795(ptr) AccessChain 34(data) 37 2794 38
+            3242: 27(int64_t) Load 3241
+            3243: 27(int64_t) GroupNonUniformUMax 42 ExclusiveScan 3242
+            3244:   2795(ptr) AccessChain 34(data) 3240 2794 38
+                              Store 3244 3243
+            3245:      6(int) Load 8(invocation)
+            3246:   2802(ptr) AccessChain 34(data) 46 2794
+            3247: 28(i64vec4) Load 3246
+            3248:2801(i64vec2) VectorShuffle 3247 3247 0 1
+            3249:2801(i64vec2) GroupNonUniformUMax 42 ExclusiveScan 3248
+            3250:   2802(ptr) AccessChain 34(data) 3245 2794
+            3251: 28(i64vec4) Load 3250
+            3252: 28(i64vec4) VectorShuffle 3251 3249 4 5 2 3
+                              Store 3250 3252
+            3253:      6(int) Load 8(invocation)
+            3254:   2802(ptr) AccessChain 34(data) 57 2794
+            3255: 28(i64vec4) Load 3254
+            3256:2811(i64vec3) VectorShuffle 3255 3255 0 1 2
+            3257:2811(i64vec3) GroupNonUniformUMax 42 ExclusiveScan 3256
+            3258:   2802(ptr) AccessChain 34(data) 3253 2794
+            3259: 28(i64vec4) Load 3258
+            3260: 28(i64vec4) VectorShuffle 3259 3257 4 5 6 3
+                              Store 3258 3260
+            3261:      6(int) Load 8(invocation)
+            3262:   2802(ptr) AccessChain 34(data) 67 2794
+            3263: 28(i64vec4) Load 3262
+            3264: 28(i64vec4) GroupNonUniformUMax 42 ExclusiveScan 3263
+            3265:   2802(ptr) AccessChain 34(data) 3261 2794
+                              Store 3265 3264
+            3266:      6(int) Load 8(invocation)
+            3267:   2795(ptr) AccessChain 34(data) 37 2794 38
+            3268: 27(int64_t) Load 3267
+            3269: 27(int64_t) GroupNonUniformBitwiseAnd 42 ExclusiveScan 3268
+            3270:   2795(ptr) AccessChain 34(data) 3266 2794 38
+                              Store 3270 3269
+            3271:      6(int) Load 8(invocation)
+            3272:   2802(ptr) AccessChain 34(data) 46 2794
+            3273: 28(i64vec4) Load 3272
+            3274:2801(i64vec2) VectorShuffle 3273 3273 0 1
+            3275:2801(i64vec2) GroupNonUniformBitwiseAnd 42 ExclusiveScan 3274
+            3276:   2802(ptr) AccessChain 34(data) 3271 2794
+            3277: 28(i64vec4) Load 3276
+            3278: 28(i64vec4) VectorShuffle 3277 3275 4 5 2 3
+                              Store 3276 3278
+            3279:      6(int) Load 8(invocation)
+            3280:   2802(ptr) AccessChain 34(data) 57 2794
+            3281: 28(i64vec4) Load 3280
+            3282:2811(i64vec3) VectorShuffle 3281 3281 0 1 2
+            3283:2811(i64vec3) GroupNonUniformBitwiseAnd 42 ExclusiveScan 3282
+            3284:   2802(ptr) AccessChain 34(data) 3279 2794
+            3285: 28(i64vec4) Load 3284
+            3286: 28(i64vec4) VectorShuffle 3285 3283 4 5 6 3
+                              Store 3284 3286
+            3287:      6(int) Load 8(invocation)
+            3288:   2802(ptr) AccessChain 34(data) 67 2794
+            3289: 28(i64vec4) Load 3288
+            3290: 28(i64vec4) GroupNonUniformBitwiseAnd 42 ExclusiveScan 3289
+            3291:   2802(ptr) AccessChain 34(data) 3287 2794
+                              Store 3291 3290
+            3292:      6(int) Load 8(invocation)
+            3293:   2795(ptr) AccessChain 34(data) 37 2794 38
+            3294: 27(int64_t) Load 3293
+            3295: 27(int64_t) GroupNonUniformBitwiseOr 42 ExclusiveScan 3294
+            3296:   2795(ptr) AccessChain 34(data) 3292 2794 38
+                              Store 3296 3295
+            3297:      6(int) Load 8(invocation)
+            3298:   2802(ptr) AccessChain 34(data) 46 2794
+            3299: 28(i64vec4) Load 3298
+            3300:2801(i64vec2) VectorShuffle 3299 3299 0 1
+            3301:2801(i64vec2) GroupNonUniformBitwiseOr 42 ExclusiveScan 3300
+            3302:   2802(ptr) AccessChain 34(data) 3297 2794
+            3303: 28(i64vec4) Load 3302
+            3304: 28(i64vec4) VectorShuffle 3303 3301 4 5 2 3
+                              Store 3302 3304
+            3305:      6(int) Load 8(invocation)
+            3306:   2802(ptr) AccessChain 34(data) 57 2794
+            3307: 28(i64vec4) Load 3306
+            3308:2811(i64vec3) VectorShuffle 3307 3307 0 1 2
+            3309:2811(i64vec3) GroupNonUniformBitwiseOr 42 ExclusiveScan 3308
+            3310:   2802(ptr) AccessChain 34(data) 3305 2794
+            3311: 28(i64vec4) Load 3310
+            3312: 28(i64vec4) VectorShuffle 3311 3309 4 5 6 3
+                              Store 3310 3312
+            3313:      6(int) Load 8(invocation)
+            3314:   2802(ptr) AccessChain 34(data) 67 2794
+            3315: 28(i64vec4) Load 3314
+            3316: 28(i64vec4) GroupNonUniformBitwiseOr 42 ExclusiveScan 3315
+            3317:   2802(ptr) AccessChain 34(data) 3313 2794
+                              Store 3317 3316
+            3318:      6(int) Load 8(invocation)
+            3319:   2795(ptr) AccessChain 34(data) 37 2794 38
+            3320: 27(int64_t) Load 3319
+            3321: 27(int64_t) GroupNonUniformBitwiseXor 42 ExclusiveScan 3320
+            3322:   2795(ptr) AccessChain 34(data) 3318 2794 38
+                              Store 3322 3321
+            3323:      6(int) Load 8(invocation)
+            3324:   2802(ptr) AccessChain 34(data) 46 2794
+            3325: 28(i64vec4) Load 3324
+            3326:2801(i64vec2) VectorShuffle 3325 3325 0 1
+            3327:2801(i64vec2) GroupNonUniformBitwiseXor 42 ExclusiveScan 3326
+            3328:   2802(ptr) AccessChain 34(data) 3323 2794
+            3329: 28(i64vec4) Load 3328
+            3330: 28(i64vec4) VectorShuffle 3329 3327 4 5 2 3
+                              Store 3328 3330
+            3331:      6(int) Load 8(invocation)
+            3332:   2802(ptr) AccessChain 34(data) 57 2794
+            3333: 28(i64vec4) Load 3332
+            3334:2811(i64vec3) VectorShuffle 3333 3333 0 1 2
+            3335:2811(i64vec3) GroupNonUniformBitwiseXor 42 ExclusiveScan 3334
+            3336:   2802(ptr) AccessChain 34(data) 3331 2794
+            3337: 28(i64vec4) Load 3336
+            3338: 28(i64vec4) VectorShuffle 3337 3335 4 5 6 3
+                              Store 3336 3338
+            3339:      6(int) Load 8(invocation)
+            3340:   2802(ptr) AccessChain 34(data) 67 2794
+            3341: 28(i64vec4) Load 3340
+            3342: 28(i64vec4) GroupNonUniformBitwiseXor 42 ExclusiveScan 3341
+            3343:   2802(ptr) AccessChain 34(data) 3339 2794
+                              Store 3343 3342
+            3344:      6(int) Load 8(invocation)
+            3347:   3346(ptr) AccessChain 34(data) 37 3345 38
+            3348:29(float16_t) Load 3347
+            3349:29(float16_t) GroupNonUniformFAdd 42 Reduce 3348
+            3350:   3346(ptr) AccessChain 34(data) 3344 3345 38
+                              Store 3350 3349
+            3351:      6(int) Load 8(invocation)
+            3354:   3353(ptr) AccessChain 34(data) 46 3345
+            3355: 30(f16vec4) Load 3354
+            3356:3352(f16vec2) VectorShuffle 3355 3355 0 1
+            3357:3352(f16vec2) GroupNonUniformFAdd 42 Reduce 3356
+            3358:   3353(ptr) AccessChain 34(data) 3351 3345
+            3359: 30(f16vec4) Load 3358
+            3360: 30(f16vec4) VectorShuffle 3359 3357 4 5 2 3
+                              Store 3358 3360
+            3361:      6(int) Load 8(invocation)
+            3363:   3353(ptr) AccessChain 34(data) 57 3345
+            3364: 30(f16vec4) Load 3363
+            3365:3362(f16vec3) VectorShuffle 3364 3364 0 1 2
+            3366:3362(f16vec3) GroupNonUniformFAdd 42 Reduce 3365
+            3367:   3353(ptr) AccessChain 34(data) 3361 3345
+            3368: 30(f16vec4) Load 3367
+            3369: 30(f16vec4) VectorShuffle 3368 3366 4 5 6 3
+                              Store 3367 3369
+            3370:      6(int) Load 8(invocation)
+            3371:   3353(ptr) AccessChain 34(data) 67 3345
+            3372: 30(f16vec4) Load 3371
+            3373: 30(f16vec4) GroupNonUniformFAdd 42 Reduce 3372
+            3374:   3353(ptr) AccessChain 34(data) 3370 3345
+                              Store 3374 3373
+            3375:      6(int) Load 8(invocation)
+            3376:   3346(ptr) AccessChain 34(data) 37 3345 38
+            3377:29(float16_t) Load 3376
+            3378:29(float16_t) GroupNonUniformFMul 42 Reduce 3377
+            3379:   3346(ptr) AccessChain 34(data) 3375 3345 38
+                              Store 3379 3378
+            3380:      6(int) Load 8(invocation)
+            3381:   3353(ptr) AccessChain 34(data) 46 3345
+            3382: 30(f16vec4) Load 3381
+            3383:3352(f16vec2) VectorShuffle 3382 3382 0 1
+            3384:3352(f16vec2) GroupNonUniformFMul 42 Reduce 3383
+            3385:   3353(ptr) AccessChain 34(data) 3380 3345
+            3386: 30(f16vec4) Load 3385
+            3387: 30(f16vec4) VectorShuffle 3386 3384 4 5 2 3
+                              Store 3385 3387
+            3388:      6(int) Load 8(invocation)
+            3389:   3353(ptr) AccessChain 34(data) 57 3345
+            3390: 30(f16vec4) Load 3389
+            3391:3362(f16vec3) VectorShuffle 3390 3390 0 1 2
+            3392:3362(f16vec3) GroupNonUniformFMul 42 Reduce 3391
+            3393:   3353(ptr) AccessChain 34(data) 3388 3345
+            3394: 30(f16vec4) Load 3393
+            3395: 30(f16vec4) VectorShuffle 3394 3392 4 5 6 3
+                              Store 3393 3395
+            3396:      6(int) Load 8(invocation)
+            3397:   3353(ptr) AccessChain 34(data) 67 3345
+            3398: 30(f16vec4) Load 3397
+            3399: 30(f16vec4) GroupNonUniformFMul 42 Reduce 3398
+            3400:   3353(ptr) AccessChain 34(data) 3396 3345
+                              Store 3400 3399
+            3401:      6(int) Load 8(invocation)
+            3402:   3346(ptr) AccessChain 34(data) 37 3345 38
+            3403:29(float16_t) Load 3402
+            3404:29(float16_t) GroupNonUniformFMin 42 Reduce 3403
+            3405:   3346(ptr) AccessChain 34(data) 3401 3345 38
+                              Store 3405 3404
+            3406:      6(int) Load 8(invocation)
+            3407:   3353(ptr) AccessChain 34(data) 46 3345
+            3408: 30(f16vec4) Load 3407
+            3409:3352(f16vec2) VectorShuffle 3408 3408 0 1
+            3410:3352(f16vec2) GroupNonUniformFMin 42 Reduce 3409
+            3411:   3353(ptr) AccessChain 34(data) 3406 3345
+            3412: 30(f16vec4) Load 3411
+            3413: 30(f16vec4) VectorShuffle 3412 3410 4 5 2 3
+                              Store 3411 3413
+            3414:      6(int) Load 8(invocation)
+            3415:   3353(ptr) AccessChain 34(data) 57 3345
+            3416: 30(f16vec4) Load 3415
+            3417:3362(f16vec3) VectorShuffle 3416 3416 0 1 2
+            3418:3362(f16vec3) GroupNonUniformFMin 42 Reduce 3417
+            3419:   3353(ptr) AccessChain 34(data) 3414 3345
+            3420: 30(f16vec4) Load 3419
+            3421: 30(f16vec4) VectorShuffle 3420 3418 4 5 6 3
+                              Store 3419 3421
+            3422:      6(int) Load 8(invocation)
+            3423:   3353(ptr) AccessChain 34(data) 67 3345
+            3424: 30(f16vec4) Load 3423
+            3425: 30(f16vec4) GroupNonUniformFMin 42 Reduce 3424
+            3426:   3353(ptr) AccessChain 34(data) 3422 3345
+                              Store 3426 3425
+            3427:      6(int) Load 8(invocation)
+            3428:   3346(ptr) AccessChain 34(data) 37 3345 38
+            3429:29(float16_t) Load 3428
+            3430:29(float16_t) GroupNonUniformFMax 42 Reduce 3429
+            3431:   3346(ptr) AccessChain 34(data) 3427 3345 38
+                              Store 3431 3430
+            3432:      6(int) Load 8(invocation)
+            3433:   3353(ptr) AccessChain 34(data) 46 3345
+            3434: 30(f16vec4) Load 3433
+            3435:3352(f16vec2) VectorShuffle 3434 3434 0 1
+            3436:3352(f16vec2) GroupNonUniformFMax 42 Reduce 3435
+            3437:   3353(ptr) AccessChain 34(data) 3432 3345
+            3438: 30(f16vec4) Load 3437
+            3439: 30(f16vec4) VectorShuffle 3438 3436 4 5 2 3
+                              Store 3437 3439
+            3440:      6(int) Load 8(invocation)
+            3441:   3353(ptr) AccessChain 34(data) 57 3345
+            3442: 30(f16vec4) Load 3441
+            3443:3362(f16vec3) VectorShuffle 3442 3442 0 1 2
+            3444:3362(f16vec3) GroupNonUniformFMax 42 Reduce 3443
+            3445:   3353(ptr) AccessChain 34(data) 3440 3345
+            3446: 30(f16vec4) Load 3445
+            3447: 30(f16vec4) VectorShuffle 3446 3444 4 5 6 3
+                              Store 3445 3447
+            3448:      6(int) Load 8(invocation)
+            3449:   3353(ptr) AccessChain 34(data) 67 3345
+            3450: 30(f16vec4) Load 3449
+            3451: 30(f16vec4) GroupNonUniformFMax 42 Reduce 3450
+            3452:   3353(ptr) AccessChain 34(data) 3448 3345
+                              Store 3452 3451
+            3453:      6(int) Load 8(invocation)
+            3454:   3346(ptr) AccessChain 34(data) 37 3345 38
+            3455:29(float16_t) Load 3454
+            3456:29(float16_t) GroupNonUniformFAdd 42 InclusiveScan 3455
+            3457:   3346(ptr) AccessChain 34(data) 3453 3345 38
+                              Store 3457 3456
+            3458:      6(int) Load 8(invocation)
+            3459:   3353(ptr) AccessChain 34(data) 46 3345
+            3460: 30(f16vec4) Load 3459
+            3461:3352(f16vec2) VectorShuffle 3460 3460 0 1
+            3462:3352(f16vec2) GroupNonUniformFAdd 42 InclusiveScan 3461
+            3463:   3353(ptr) AccessChain 34(data) 3458 3345
+            3464: 30(f16vec4) Load 3463
+            3465: 30(f16vec4) VectorShuffle 3464 3462 4 5 2 3
+                              Store 3463 3465
+            3466:      6(int) Load 8(invocation)
+            3467:   3353(ptr) AccessChain 34(data) 57 3345
+            3468: 30(f16vec4) Load 3467
+            3469:3362(f16vec3) VectorShuffle 3468 3468 0 1 2
+            3470:3362(f16vec3) GroupNonUniformFAdd 42 InclusiveScan 3469
+            3471:   3353(ptr) AccessChain 34(data) 3466 3345
+            3472: 30(f16vec4) Load 3471
+            3473: 30(f16vec4) VectorShuffle 3472 3470 4 5 6 3
+                              Store 3471 3473
+            3474:      6(int) Load 8(invocation)
+            3475:   3353(ptr) AccessChain 34(data) 67 3345
+            3476: 30(f16vec4) Load 3475
+            3477: 30(f16vec4) GroupNonUniformFAdd 42 InclusiveScan 3476
+            3478:   3353(ptr) AccessChain 34(data) 3474 3345
+                              Store 3478 3477
+            3479:      6(int) Load 8(invocation)
+            3480:   3346(ptr) AccessChain 34(data) 37 3345 38
+            3481:29(float16_t) Load 3480
+            3482:29(float16_t) GroupNonUniformFMul 42 InclusiveScan 3481
+            3483:   3346(ptr) AccessChain 34(data) 3479 3345 38
+                              Store 3483 3482
+            3484:      6(int) Load 8(invocation)
+            3485:   3353(ptr) AccessChain 34(data) 46 3345
+            3486: 30(f16vec4) Load 3485
+            3487:3352(f16vec2) VectorShuffle 3486 3486 0 1
+            3488:3352(f16vec2) GroupNonUniformFMul 42 InclusiveScan 3487
+            3489:   3353(ptr) AccessChain 34(data) 3484 3345
+            3490: 30(f16vec4) Load 3489
+            3491: 30(f16vec4) VectorShuffle 3490 3488 4 5 2 3
+                              Store 3489 3491
+            3492:      6(int) Load 8(invocation)
+            3493:   3353(ptr) AccessChain 34(data) 57 3345
+            3494: 30(f16vec4) Load 3493
+            3495:3362(f16vec3) VectorShuffle 3494 3494 0 1 2
+            3496:3362(f16vec3) GroupNonUniformFMul 42 InclusiveScan 3495
+            3497:   3353(ptr) AccessChain 34(data) 3492 3345
+            3498: 30(f16vec4) Load 3497
+            3499: 30(f16vec4) VectorShuffle 3498 3496 4 5 6 3
+                              Store 3497 3499
+            3500:      6(int) Load 8(invocation)
+            3501:   3353(ptr) AccessChain 34(data) 67 3345
+            3502: 30(f16vec4) Load 3501
+            3503: 30(f16vec4) GroupNonUniformFMul 42 InclusiveScan 3502
+            3504:   3353(ptr) AccessChain 34(data) 3500 3345
+                              Store 3504 3503
+            3505:      6(int) Load 8(invocation)
+            3506:   3346(ptr) AccessChain 34(data) 37 3345 38
+            3507:29(float16_t) Load 3506
+            3508:29(float16_t) GroupNonUniformFMin 42 InclusiveScan 3507
+            3509:   3346(ptr) AccessChain 34(data) 3505 3345 38
+                              Store 3509 3508
+            3510:      6(int) Load 8(invocation)
+            3511:   3353(ptr) AccessChain 34(data) 46 3345
+            3512: 30(f16vec4) Load 3511
+            3513:3352(f16vec2) VectorShuffle 3512 3512 0 1
+            3514:3352(f16vec2) GroupNonUniformFMin 42 InclusiveScan 3513
+            3515:   3353(ptr) AccessChain 34(data) 3510 3345
+            3516: 30(f16vec4) Load 3515
+            3517: 30(f16vec4) VectorShuffle 3516 3514 4 5 2 3
+                              Store 3515 3517
+            3518:      6(int) Load 8(invocation)
+            3519:   3353(ptr) AccessChain 34(data) 57 3345
+            3520: 30(f16vec4) Load 3519
+            3521:3362(f16vec3) VectorShuffle 3520 3520 0 1 2
+            3522:3362(f16vec3) GroupNonUniformFMin 42 InclusiveScan 3521
+            3523:   3353(ptr) AccessChain 34(data) 3518 3345
+            3524: 30(f16vec4) Load 3523
+            3525: 30(f16vec4) VectorShuffle 3524 3522 4 5 6 3
+                              Store 3523 3525
+            3526:      6(int) Load 8(invocation)
+            3527:   3353(ptr) AccessChain 34(data) 67 3345
+            3528: 30(f16vec4) Load 3527
+            3529: 30(f16vec4) GroupNonUniformFMin 42 InclusiveScan 3528
+            3530:   3353(ptr) AccessChain 34(data) 3526 3345
+                              Store 3530 3529
+            3531:      6(int) Load 8(invocation)
+            3532:   3346(ptr) AccessChain 34(data) 37 3345 38
+            3533:29(float16_t) Load 3532
+            3534:29(float16_t) GroupNonUniformFMax 42 InclusiveScan 3533
+            3535:   3346(ptr) AccessChain 34(data) 3531 3345 38
+                              Store 3535 3534
+            3536:      6(int) Load 8(invocation)
+            3537:   3353(ptr) AccessChain 34(data) 46 3345
+            3538: 30(f16vec4) Load 3537
+            3539:3352(f16vec2) VectorShuffle 3538 3538 0 1
+            3540:3352(f16vec2) GroupNonUniformFMax 42 InclusiveScan 3539
+            3541:   3353(ptr) AccessChain 34(data) 3536 3345
+            3542: 30(f16vec4) Load 3541
+            3543: 30(f16vec4) VectorShuffle 3542 3540 4 5 2 3
+                              Store 3541 3543
+            3544:      6(int) Load 8(invocation)
+            3545:   3353(ptr) AccessChain 34(data) 57 3345
+            3546: 30(f16vec4) Load 3545
+            3547:3362(f16vec3) VectorShuffle 3546 3546 0 1 2
+            3548:3362(f16vec3) GroupNonUniformFMax 42 InclusiveScan 3547
+            3549:   3353(ptr) AccessChain 34(data) 3544 3345
+            3550: 30(f16vec4) Load 3549
+            3551: 30(f16vec4) VectorShuffle 3550 3548 4 5 6 3
+                              Store 3549 3551
+            3552:      6(int) Load 8(invocation)
+            3553:   3353(ptr) AccessChain 34(data) 67 3345
+            3554: 30(f16vec4) Load 3553
+            3555: 30(f16vec4) GroupNonUniformFMax 42 InclusiveScan 3554
+            3556:   3353(ptr) AccessChain 34(data) 3552 3345
+                              Store 3556 3555
+            3557:      6(int) Load 8(invocation)
+            3558:   3346(ptr) AccessChain 34(data) 37 3345 38
+            3559:29(float16_t) Load 3558
+            3560:29(float16_t) GroupNonUniformFAdd 42 ExclusiveScan 3559
+            3561:   3346(ptr) AccessChain 34(data) 3557 3345 38
+                              Store 3561 3560
+            3562:      6(int) Load 8(invocation)
+            3563:   3353(ptr) AccessChain 34(data) 46 3345
+            3564: 30(f16vec4) Load 3563
+            3565:3352(f16vec2) VectorShuffle 3564 3564 0 1
+            3566:3352(f16vec2) GroupNonUniformFAdd 42 ExclusiveScan 3565
+            3567:   3353(ptr) AccessChain 34(data) 3562 3345
+            3568: 30(f16vec4) Load 3567
+            3569: 30(f16vec4) VectorShuffle 3568 3566 4 5 2 3
+                              Store 3567 3569
+            3570:      6(int) Load 8(invocation)
+            3571:   3353(ptr) AccessChain 34(data) 57 3345
+            3572: 30(f16vec4) Load 3571
+            3573:3362(f16vec3) VectorShuffle 3572 3572 0 1 2
+            3574:3362(f16vec3) GroupNonUniformFAdd 42 ExclusiveScan 3573
+            3575:   3353(ptr) AccessChain 34(data) 3570 3345
+            3576: 30(f16vec4) Load 3575
+            3577: 30(f16vec4) VectorShuffle 3576 3574 4 5 6 3
+                              Store 3575 3577
+            3578:      6(int) Load 8(invocation)
+            3579:   3353(ptr) AccessChain 34(data) 67 3345
+            3580: 30(f16vec4) Load 3579
+            3581: 30(f16vec4) GroupNonUniformFAdd 42 ExclusiveScan 3580
+            3582:   3353(ptr) AccessChain 34(data) 3578 3345
+                              Store 3582 3581
+            3583:      6(int) Load 8(invocation)
+            3584:   3346(ptr) AccessChain 34(data) 37 3345 38
+            3585:29(float16_t) Load 3584
+            3586:29(float16_t) GroupNonUniformFMul 42 ExclusiveScan 3585
+            3587:   3346(ptr) AccessChain 34(data) 3583 3345 38
+                              Store 3587 3586
+            3588:      6(int) Load 8(invocation)
+            3589:   3353(ptr) AccessChain 34(data) 46 3345
+            3590: 30(f16vec4) Load 3589
+            3591:3352(f16vec2) VectorShuffle 3590 3590 0 1
+            3592:3352(f16vec2) GroupNonUniformFMul 42 ExclusiveScan 3591
+            3593:   3353(ptr) AccessChain 34(data) 3588 3345
+            3594: 30(f16vec4) Load 3593
+            3595: 30(f16vec4) VectorShuffle 3594 3592 4 5 2 3
+                              Store 3593 3595
+            3596:      6(int) Load 8(invocation)
+            3597:   3353(ptr) AccessChain 34(data) 57 3345
+            3598: 30(f16vec4) Load 3597
+            3599:3362(f16vec3) VectorShuffle 3598 3598 0 1 2
+            3600:3362(f16vec3) GroupNonUniformFMul 42 ExclusiveScan 3599
+            3601:   3353(ptr) AccessChain 34(data) 3596 3345
+            3602: 30(f16vec4) Load 3601
+            3603: 30(f16vec4) VectorShuffle 3602 3600 4 5 6 3
+                              Store 3601 3603
+            3604:      6(int) Load 8(invocation)
+            3605:   3353(ptr) AccessChain 34(data) 67 3345
+            3606: 30(f16vec4) Load 3605
+            3607: 30(f16vec4) GroupNonUniformFMul 42 ExclusiveScan 3606
+            3608:   3353(ptr) AccessChain 34(data) 3604 3345
+                              Store 3608 3607
+            3609:      6(int) Load 8(invocation)
+            3610:   3346(ptr) AccessChain 34(data) 37 3345 38
+            3611:29(float16_t) Load 3610
+            3612:29(float16_t) GroupNonUniformFMin 42 ExclusiveScan 3611
+            3613:   3346(ptr) AccessChain 34(data) 3609 3345 38
+                              Store 3613 3612
+            3614:      6(int) Load 8(invocation)
+            3615:   3353(ptr) AccessChain 34(data) 46 3345
+            3616: 30(f16vec4) Load 3615
+            3617:3352(f16vec2) VectorShuffle 3616 3616 0 1
+            3618:3352(f16vec2) GroupNonUniformFMin 42 ExclusiveScan 3617
+            3619:   3353(ptr) AccessChain 34(data) 3614 3345
+            3620: 30(f16vec4) Load 3619
+            3621: 30(f16vec4) VectorShuffle 3620 3618 4 5 2 3
+                              Store 3619 3621
+            3622:      6(int) Load 8(invocation)
+            3623:   3353(ptr) AccessChain 34(data) 57 3345
+            3624: 30(f16vec4) Load 3623
+            3625:3362(f16vec3) VectorShuffle 3624 3624 0 1 2
+            3626:3362(f16vec3) GroupNonUniformFMin 42 ExclusiveScan 3625
+            3627:   3353(ptr) AccessChain 34(data) 3622 3345
+            3628: 30(f16vec4) Load 3627
+            3629: 30(f16vec4) VectorShuffle 3628 3626 4 5 6 3
+                              Store 3627 3629
+            3630:      6(int) Load 8(invocation)
+            3631:   3353(ptr) AccessChain 34(data) 67 3345
+            3632: 30(f16vec4) Load 3631
+            3633: 30(f16vec4) GroupNonUniformFMin 42 ExclusiveScan 3632
+            3634:   3353(ptr) AccessChain 34(data) 3630 3345
+                              Store 3634 3633
+            3635:      6(int) Load 8(invocation)
+            3636:   3346(ptr) AccessChain 34(data) 37 3345 38
+            3637:29(float16_t) Load 3636
+            3638:29(float16_t) GroupNonUniformFMax 42 ExclusiveScan 3637
+            3639:   3346(ptr) AccessChain 34(data) 3635 3345 38
+                              Store 3639 3638
+            3640:      6(int) Load 8(invocation)
+            3641:   3353(ptr) AccessChain 34(data) 46 3345
+            3642: 30(f16vec4) Load 3641
+            3643:3352(f16vec2) VectorShuffle 3642 3642 0 1
+            3644:3352(f16vec2) GroupNonUniformFMax 42 ExclusiveScan 3643
+            3645:   3353(ptr) AccessChain 34(data) 3640 3345
+            3646: 30(f16vec4) Load 3645
+            3647: 30(f16vec4) VectorShuffle 3646 3644 4 5 2 3
+                              Store 3645 3647
+            3648:      6(int) Load 8(invocation)
+            3649:   3353(ptr) AccessChain 34(data) 57 3345
+            3650: 30(f16vec4) Load 3649
+            3651:3362(f16vec3) VectorShuffle 3650 3650 0 1 2
+            3652:3362(f16vec3) GroupNonUniformFMax 42 ExclusiveScan 3651
+            3653:   3353(ptr) AccessChain 34(data) 3648 3345
+            3654: 30(f16vec4) Load 3653
+            3655: 30(f16vec4) VectorShuffle 3654 3652 4 5 6 3
+                              Store 3653 3655
+            3656:      6(int) Load 8(invocation)
+            3657:   3353(ptr) AccessChain 34(data) 67 3345
+            3658: 30(f16vec4) Load 3657
+            3659: 30(f16vec4) GroupNonUniformFMax 42 ExclusiveScan 3658
+            3660:   3353(ptr) AccessChain 34(data) 3656 3345
+                              Store 3660 3659
+                              Return
+                              FunctionEnd
diff --git a/Test/baseResults/spv.subgroupExtendedTypesArithmeticNeg.comp.out b/Test/baseResults/spv.subgroupExtendedTypesArithmeticNeg.comp.out
new file mode 100644
index 0000000..4750559
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesArithmeticNeg.comp.out
@@ -0,0 +1,557 @@
+spv.subgroupExtendedTypesArithmeticNeg.comp
+ERROR: 0:26: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:27: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:28: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:29: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:31: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:32: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:33: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:34: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:36: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:37: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:38: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:39: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:41: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:42: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:43: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:44: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:46: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:47: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:48: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:49: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:51: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:52: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:53: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:54: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:56: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:57: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:58: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:59: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:61: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:62: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:63: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:64: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:66: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:67: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:68: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:69: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:71: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:72: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:73: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:74: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:76: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:77: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:78: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:79: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:81: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:82: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:83: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:84: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:86: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:87: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:88: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:89: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:91: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:92: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:93: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:94: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:96: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:97: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:98: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:99: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:101: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:102: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:103: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:104: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:106: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:107: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:108: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:109: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:111: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:112: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:113: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:114: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:116: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:117: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:118: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:119: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:121: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:122: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:123: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:124: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:126: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:127: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:128: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:129: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:131: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:132: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:133: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:134: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:136: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:137: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:138: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:139: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:141: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:142: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:143: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:144: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:146: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:147: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:148: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:149: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:151: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:152: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:153: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:154: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:156: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:157: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:158: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:159: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:161: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:162: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:163: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:164: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:166: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:167: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:168: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:169: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:171: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:172: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:173: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:174: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:176: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:177: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:178: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:179: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:181: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:182: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:183: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:184: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:186: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:187: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:188: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:189: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:191: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:192: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:193: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:194: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:196: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:197: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:198: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:199: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:201: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:202: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:203: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:204: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:206: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:207: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:208: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:209: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:211: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:212: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:213: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:214: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:216: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:217: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:218: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:219: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:221: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:222: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:223: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:224: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:226: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:227: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:228: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:229: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:231: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:232: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:233: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:234: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:236: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:237: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:238: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:239: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:241: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:242: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:243: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:244: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:246: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:247: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:248: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:249: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:251: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:252: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:253: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:254: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:256: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:257: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:258: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:259: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:261: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:262: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:263: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:264: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:266: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:267: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:268: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:269: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:271: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:272: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:273: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:274: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:276: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:277: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:278: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:279: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:281: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:282: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:283: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:284: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:286: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:287: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:288: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:289: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:291: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:292: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:293: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:294: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:296: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:297: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:298: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:299: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:301: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:302: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:303: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:304: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:306: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:307: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:308: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:309: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:311: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:312: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:313: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:314: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:316: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:317: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:318: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:319: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:321: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:322: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:323: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:324: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:326: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:327: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:328: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:329: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:331: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:332: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:333: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:334: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:336: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:337: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:338: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:339: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:341: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:342: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:343: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:344: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:346: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:347: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:348: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:349: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:351: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:352: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:353: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:354: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:356: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:357: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:358: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:359: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:361: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:362: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:363: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:364: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:366: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:367: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:368: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:369: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:371: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:372: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:373: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:374: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:376: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:377: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:378: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:379: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:381: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:382: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:383: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:384: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:386: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:387: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:388: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:389: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:391: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:392: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:393: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:394: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:396: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:397: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:398: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:399: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:401: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:402: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:403: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:404: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:406: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:407: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:408: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:409: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:411: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:412: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:413: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:414: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:416: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:417: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:418: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:419: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:421: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:422: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:423: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:424: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:426: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:427: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:428: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:429: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:431: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:432: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:433: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:434: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:436: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:437: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:438: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:439: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:441: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:442: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:443: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:444: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:446: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:447: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:448: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:449: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:451: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:452: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:453: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:454: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:456: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:457: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:458: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:459: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:461: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:462: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:463: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:464: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:466: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:467: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:468: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:469: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:471: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:472: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:473: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:474: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:476: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:477: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:478: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:479: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:481: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:482: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:483: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:484: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:486: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:487: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:488: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:489: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:491: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:492: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:493: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:494: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:496: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:497: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:498: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:499: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:501: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:502: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:503: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:504: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:506: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:507: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:508: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:509: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:511: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:512: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:513: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:514: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:516: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:517: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:518: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:519: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:521: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:522: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:523: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:524: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:526: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:527: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:528: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:529: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:531: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:532: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:533: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:534: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:536: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:537: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:538: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:539: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:541: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:542: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:543: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:544: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:546: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:547: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:548: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:549: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:551: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:552: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:553: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:554: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:556: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:557: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:558: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:559: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:561: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:562: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:563: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:564: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:566: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:567: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:568: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:569: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:571: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:572: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:573: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:574: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:576: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:577: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:578: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:579: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:581: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:582: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:583: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:584: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:586: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:587: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:588: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:589: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:591: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:592: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:593: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:594: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:596: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:597: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:598: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:599: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:601: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:602: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:603: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:604: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:606: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:607: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:608: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:609: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:611: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:612: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:613: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:614: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:616: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:617: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:618: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:619: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:621: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:622: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:623: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:624: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:626: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:627: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:628: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:629: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:631: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:632: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:633: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:634: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:636: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:637: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:638: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:639: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:641: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:642: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:643: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:644: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:646: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:647: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:648: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:649: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:651: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:652: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:653: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:654: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:656: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:657: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:658: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:659: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:661: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:662: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:663: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:664: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:666: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:667: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:668: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:669: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:671: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:672: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:673: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:674: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:676: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:677: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:678: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:679: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:681: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:682: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:683: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:684: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:686: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:687: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:688: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:689: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:691: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:692: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:693: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:694: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:696: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:697: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:698: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:699: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:701: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:702: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:703: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:704: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:706: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:707: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:708: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:709: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:711: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:712: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:713: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:714: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 552 compilation errors.  No code generated.
+
+
+SPIR-V is not generated for failed compile or link
diff --git a/Test/baseResults/spv.subgroupExtendedTypesBallot.comp.out b/Test/baseResults/spv.subgroupExtendedTypesBallot.comp.out
new file mode 100644
index 0000000..4e1b2dc
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesBallot.comp.out
@@ -0,0 +1,560 @@
+spv.subgroupExtendedTypesBallot.comp
+// Module Version 10300
+// Generated by (magic number): 80007
+// Id's are bound by 441
+
+                              Capability Shader
+                              Capability Float16
+                              Capability Int64
+                              Capability Int16
+                              Capability Int8
+                              Capability GroupNonUniform
+                              Capability GroupNonUniformBallot
+                              Capability StorageUniformBufferBlock16
+                              Capability StorageBuffer8BitAccess
+                              Extension  "SPV_KHR_8bit_storage"
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint GLCompute 4  "main" 10 12
+                              ExecutionMode 4 LocalSize 8 1 1
+                              Source GLSL 450
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_float16"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int16"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int64"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int8"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_float16"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int16"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int64"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int8"
+                              SourceExtension  "GL_KHR_shader_subgroup_ballot"
+                              SourceExtension  "GL_KHR_shader_subgroup_basic"
+                              Name 4  "main"
+                              Name 8  "invocation"
+                              Name 10  "gl_SubgroupInvocationID"
+                              Name 12  "gl_SubgroupSize"
+                              Name 31  "Buffers"
+                              MemberName 31(Buffers) 0  "i8"
+                              MemberName 31(Buffers) 1  "u8"
+                              MemberName 31(Buffers) 2  "i16"
+                              MemberName 31(Buffers) 3  "u16"
+                              MemberName 31(Buffers) 4  "i64"
+                              MemberName 31(Buffers) 5  "u64"
+                              MemberName 31(Buffers) 6  "f16"
+                              Name 34  "data"
+                              Decorate 10(gl_SubgroupInvocationID) RelaxedPrecision
+                              Decorate 10(gl_SubgroupInvocationID) BuiltIn SubgroupLocalInvocationId
+                              Decorate 11 RelaxedPrecision
+                              Decorate 12(gl_SubgroupSize) RelaxedPrecision
+                              Decorate 12(gl_SubgroupSize) BuiltIn SubgroupSize
+                              Decorate 13 RelaxedPrecision
+                              Decorate 14 RelaxedPrecision
+                              Decorate 16 RelaxedPrecision
+                              MemberDecorate 31(Buffers) 0 Offset 0
+                              MemberDecorate 31(Buffers) 1 Offset 4
+                              MemberDecorate 31(Buffers) 2 Offset 8
+                              MemberDecorate 31(Buffers) 3 Offset 16
+                              MemberDecorate 31(Buffers) 4 Offset 32
+                              MemberDecorate 31(Buffers) 5 Offset 64
+                              MemberDecorate 31(Buffers) 6 Offset 96
+                              Decorate 31(Buffers) Block
+                              Decorate 34(data) DescriptorSet 0
+                              Decorate 34(data) Binding 0
+                              Decorate 440 BuiltIn WorkgroupSize
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeInt 32 0
+               7:             TypePointer Function 6(int)
+               9:             TypePointer Input 6(int)
+10(gl_SubgroupInvocationID):      9(ptr) Variable Input
+12(gl_SubgroupSize):      9(ptr) Variable Input
+              15:      6(int) Constant 4
+              17:             TypeInt 8 1
+              18:             TypeVector 17(int8_t) 4
+              19:             TypeInt 8 0
+              20:             TypeVector 19(int8_t) 4
+              21:             TypeInt 16 1
+              22:             TypeVector 21(int16_t) 4
+              23:             TypeInt 16 0
+              24:             TypeVector 23(int16_t) 4
+              25:             TypeInt 64 1
+              26:             TypeVector 25(int64_t) 4
+              27:             TypeInt 64 0
+              28:             TypeVector 27(int64_t) 4
+              29:             TypeFloat 16
+              30:             TypeVector 29(float16_t) 4
+     31(Buffers):             TypeStruct 18(i8vec4) 20(i8vec4) 22(i16vec4) 24(i16vec4) 26(i64vec4) 28(i64vec4) 30(f16vec4)
+              32:             TypeArray 31(Buffers) 15
+              33:             TypePointer StorageBuffer 32
+        34(data):     33(ptr) Variable StorageBuffer
+              36:             TypeInt 32 1
+              37:     36(int) Constant 0
+              38:      6(int) Constant 0
+              39:             TypePointer StorageBuffer 17(int8_t)
+              42:      6(int) Constant 3
+              46:     36(int) Constant 1
+              47:             TypeVector 17(int8_t) 2
+              48:             TypePointer StorageBuffer 18(i8vec4)
+              57:     36(int) Constant 2
+              58:             TypeVector 17(int8_t) 3
+              67:     36(int) Constant 3
+              99:             TypePointer StorageBuffer 19(int8_t)
+             105:             TypeVector 19(int8_t) 2
+             106:             TypePointer StorageBuffer 20(i8vec4)
+             115:             TypeVector 19(int8_t) 3
+             155:             TypePointer StorageBuffer 21(int16_t)
+             161:             TypeVector 21(int16_t) 2
+             162:             TypePointer StorageBuffer 22(i16vec4)
+             171:             TypeVector 21(int16_t) 3
+             211:             TypePointer StorageBuffer 23(int16_t)
+             217:             TypeVector 23(int16_t) 2
+             218:             TypePointer StorageBuffer 24(i16vec4)
+             227:             TypeVector 23(int16_t) 3
+             267:     36(int) Constant 4
+             268:             TypePointer StorageBuffer 25(int64_t)
+             274:             TypeVector 25(int64_t) 2
+             275:             TypePointer StorageBuffer 26(i64vec4)
+             284:             TypeVector 25(int64_t) 3
+             324:     36(int) Constant 5
+             325:             TypePointer StorageBuffer 27(int64_t)
+             331:             TypeVector 27(int64_t) 2
+             332:             TypePointer StorageBuffer 28(i64vec4)
+             341:             TypeVector 27(int64_t) 3
+             381:     36(int) Constant 6
+             382:             TypePointer StorageBuffer 29(float16_t)
+             388:             TypeVector 29(float16_t) 2
+             389:             TypePointer StorageBuffer 30(f16vec4)
+             398:             TypeVector 29(float16_t) 3
+             437:             TypeVector 6(int) 3
+             438:      6(int) Constant 8
+             439:      6(int) Constant 1
+             440:  437(ivec3) ConstantComposite 438 439 439
+         4(main):           2 Function None 3
+               5:             Label
+   8(invocation):      7(ptr) Variable Function
+              11:      6(int) Load 10(gl_SubgroupInvocationID)
+              13:      6(int) Load 12(gl_SubgroupSize)
+              14:      6(int) IAdd 11 13
+              16:      6(int) UMod 14 15
+                              Store 8(invocation) 16
+              35:      6(int) Load 8(invocation)
+              40:     39(ptr) AccessChain 34(data) 37 37 38
+              41:  17(int8_t) Load 40
+              43:  17(int8_t) GroupNonUniformBroadcast 42 41 42
+              44:     39(ptr) AccessChain 34(data) 35 37 38
+                              Store 44 43
+              45:      6(int) Load 8(invocation)
+              49:     48(ptr) AccessChain 34(data) 46 37
+              50:  18(i8vec4) Load 49
+              51:  47(i8vec2) VectorShuffle 50 50 0 1
+              52:  47(i8vec2) GroupNonUniformBroadcast 42 51 42
+              53:     48(ptr) AccessChain 34(data) 45 37
+              54:  18(i8vec4) Load 53
+              55:  18(i8vec4) VectorShuffle 54 52 4 5 2 3
+                              Store 53 55
+              56:      6(int) Load 8(invocation)
+              59:     48(ptr) AccessChain 34(data) 57 37
+              60:  18(i8vec4) Load 59
+              61:  58(i8vec3) VectorShuffle 60 60 0 1 2
+              62:  58(i8vec3) GroupNonUniformBroadcast 42 61 42
+              63:     48(ptr) AccessChain 34(data) 56 37
+              64:  18(i8vec4) Load 63
+              65:  18(i8vec4) VectorShuffle 64 62 4 5 6 3
+                              Store 63 65
+              66:      6(int) Load 8(invocation)
+              68:     48(ptr) AccessChain 34(data) 67 37
+              69:  18(i8vec4) Load 68
+              70:  18(i8vec4) GroupNonUniformBroadcast 42 69 42
+              71:     48(ptr) AccessChain 34(data) 66 37
+                              Store 71 70
+              72:      6(int) Load 8(invocation)
+              73:     39(ptr) AccessChain 34(data) 37 37 38
+              74:  17(int8_t) Load 73
+              75:  17(int8_t) GroupNonUniformBroadcastFirst 42 74
+              76:     39(ptr) AccessChain 34(data) 72 37 38
+                              Store 76 75
+              77:      6(int) Load 8(invocation)
+              78:     48(ptr) AccessChain 34(data) 46 37
+              79:  18(i8vec4) Load 78
+              80:  47(i8vec2) VectorShuffle 79 79 0 1
+              81:  47(i8vec2) GroupNonUniformBroadcastFirst 42 80
+              82:     48(ptr) AccessChain 34(data) 77 37
+              83:  18(i8vec4) Load 82
+              84:  18(i8vec4) VectorShuffle 83 81 4 5 2 3
+                              Store 82 84
+              85:      6(int) Load 8(invocation)
+              86:     48(ptr) AccessChain 34(data) 57 37
+              87:  18(i8vec4) Load 86
+              88:  58(i8vec3) VectorShuffle 87 87 0 1 2
+              89:  58(i8vec3) GroupNonUniformBroadcastFirst 42 88
+              90:     48(ptr) AccessChain 34(data) 85 37
+              91:  18(i8vec4) Load 90
+              92:  18(i8vec4) VectorShuffle 91 89 4 5 6 3
+                              Store 90 92
+              93:      6(int) Load 8(invocation)
+              94:     48(ptr) AccessChain 34(data) 67 37
+              95:  18(i8vec4) Load 94
+              96:  18(i8vec4) GroupNonUniformBroadcastFirst 42 95
+              97:     48(ptr) AccessChain 34(data) 93 37
+                              Store 97 96
+              98:      6(int) Load 8(invocation)
+             100:     99(ptr) AccessChain 34(data) 37 46 38
+             101:  19(int8_t) Load 100
+             102:  19(int8_t) GroupNonUniformBroadcast 42 101 42
+             103:     99(ptr) AccessChain 34(data) 98 46 38
+                              Store 103 102
+             104:      6(int) Load 8(invocation)
+             107:    106(ptr) AccessChain 34(data) 46 46
+             108:  20(i8vec4) Load 107
+             109: 105(i8vec2) VectorShuffle 108 108 0 1
+             110: 105(i8vec2) GroupNonUniformBroadcast 42 109 42
+             111:    106(ptr) AccessChain 34(data) 104 46
+             112:  20(i8vec4) Load 111
+             113:  20(i8vec4) VectorShuffle 112 110 4 5 2 3
+                              Store 111 113
+             114:      6(int) Load 8(invocation)
+             116:    106(ptr) AccessChain 34(data) 57 46
+             117:  20(i8vec4) Load 116
+             118: 115(i8vec3) VectorShuffle 117 117 0 1 2
+             119: 115(i8vec3) GroupNonUniformBroadcast 42 118 42
+             120:    106(ptr) AccessChain 34(data) 114 46
+             121:  20(i8vec4) Load 120
+             122:  20(i8vec4) VectorShuffle 121 119 4 5 6 3
+                              Store 120 122
+             123:      6(int) Load 8(invocation)
+             124:    106(ptr) AccessChain 34(data) 67 46
+             125:  20(i8vec4) Load 124
+             126:  20(i8vec4) GroupNonUniformBroadcast 42 125 42
+             127:    106(ptr) AccessChain 34(data) 123 46
+                              Store 127 126
+             128:      6(int) Load 8(invocation)
+             129:     99(ptr) AccessChain 34(data) 37 46 38
+             130:  19(int8_t) Load 129
+             131:  19(int8_t) GroupNonUniformBroadcastFirst 42 130
+             132:     99(ptr) AccessChain 34(data) 128 46 38
+                              Store 132 131
+             133:      6(int) Load 8(invocation)
+             134:    106(ptr) AccessChain 34(data) 46 46
+             135:  20(i8vec4) Load 134
+             136: 105(i8vec2) VectorShuffle 135 135 0 1
+             137: 105(i8vec2) GroupNonUniformBroadcastFirst 42 136
+             138:    106(ptr) AccessChain 34(data) 133 46
+             139:  20(i8vec4) Load 138
+             140:  20(i8vec4) VectorShuffle 139 137 4 5 2 3
+                              Store 138 140
+             141:      6(int) Load 8(invocation)
+             142:    106(ptr) AccessChain 34(data) 57 46
+             143:  20(i8vec4) Load 142
+             144: 115(i8vec3) VectorShuffle 143 143 0 1 2
+             145: 115(i8vec3) GroupNonUniformBroadcastFirst 42 144
+             146:    106(ptr) AccessChain 34(data) 141 46
+             147:  20(i8vec4) Load 146
+             148:  20(i8vec4) VectorShuffle 147 145 4 5 6 3
+                              Store 146 148
+             149:      6(int) Load 8(invocation)
+             150:    106(ptr) AccessChain 34(data) 67 46
+             151:  20(i8vec4) Load 150
+             152:  20(i8vec4) GroupNonUniformBroadcastFirst 42 151
+             153:    106(ptr) AccessChain 34(data) 149 46
+                              Store 153 152
+             154:      6(int) Load 8(invocation)
+             156:    155(ptr) AccessChain 34(data) 37 57 38
+             157: 21(int16_t) Load 156
+             158: 21(int16_t) GroupNonUniformBroadcast 42 157 42
+             159:    155(ptr) AccessChain 34(data) 154 57 38
+                              Store 159 158
+             160:      6(int) Load 8(invocation)
+             163:    162(ptr) AccessChain 34(data) 46 57
+             164: 22(i16vec4) Load 163
+             165:161(i16vec2) VectorShuffle 164 164 0 1
+             166:161(i16vec2) GroupNonUniformBroadcast 42 165 42
+             167:    162(ptr) AccessChain 34(data) 160 57
+             168: 22(i16vec4) Load 167
+             169: 22(i16vec4) VectorShuffle 168 166 4 5 2 3
+                              Store 167 169
+             170:      6(int) Load 8(invocation)
+             172:    162(ptr) AccessChain 34(data) 57 57
+             173: 22(i16vec4) Load 172
+             174:171(i16vec3) VectorShuffle 173 173 0 1 2
+             175:171(i16vec3) GroupNonUniformBroadcast 42 174 42
+             176:    162(ptr) AccessChain 34(data) 170 57
+             177: 22(i16vec4) Load 176
+             178: 22(i16vec4) VectorShuffle 177 175 4 5 6 3
+                              Store 176 178
+             179:      6(int) Load 8(invocation)
+             180:    162(ptr) AccessChain 34(data) 67 57
+             181: 22(i16vec4) Load 180
+             182: 22(i16vec4) GroupNonUniformBroadcast 42 181 42
+             183:    162(ptr) AccessChain 34(data) 179 57
+                              Store 183 182
+             184:      6(int) Load 8(invocation)
+             185:    155(ptr) AccessChain 34(data) 37 57 38
+             186: 21(int16_t) Load 185
+             187: 21(int16_t) GroupNonUniformBroadcastFirst 42 186
+             188:    155(ptr) AccessChain 34(data) 184 57 38
+                              Store 188 187
+             189:      6(int) Load 8(invocation)
+             190:    162(ptr) AccessChain 34(data) 46 57
+             191: 22(i16vec4) Load 190
+             192:161(i16vec2) VectorShuffle 191 191 0 1
+             193:161(i16vec2) GroupNonUniformBroadcastFirst 42 192
+             194:    162(ptr) AccessChain 34(data) 189 57
+             195: 22(i16vec4) Load 194
+             196: 22(i16vec4) VectorShuffle 195 193 4 5 2 3
+                              Store 194 196
+             197:      6(int) Load 8(invocation)
+             198:    162(ptr) AccessChain 34(data) 57 57
+             199: 22(i16vec4) Load 198
+             200:171(i16vec3) VectorShuffle 199 199 0 1 2
+             201:171(i16vec3) GroupNonUniformBroadcastFirst 42 200
+             202:    162(ptr) AccessChain 34(data) 197 57
+             203: 22(i16vec4) Load 202
+             204: 22(i16vec4) VectorShuffle 203 201 4 5 6 3
+                              Store 202 204
+             205:      6(int) Load 8(invocation)
+             206:    162(ptr) AccessChain 34(data) 67 57
+             207: 22(i16vec4) Load 206
+             208: 22(i16vec4) GroupNonUniformBroadcastFirst 42 207
+             209:    162(ptr) AccessChain 34(data) 205 57
+                              Store 209 208
+             210:      6(int) Load 8(invocation)
+             212:    211(ptr) AccessChain 34(data) 37 67 38
+             213: 23(int16_t) Load 212
+             214: 23(int16_t) GroupNonUniformBroadcast 42 213 42
+             215:    211(ptr) AccessChain 34(data) 210 67 38
+                              Store 215 214
+             216:      6(int) Load 8(invocation)
+             219:    218(ptr) AccessChain 34(data) 46 67
+             220: 24(i16vec4) Load 219
+             221:217(i16vec2) VectorShuffle 220 220 0 1
+             222:217(i16vec2) GroupNonUniformBroadcast 42 221 42
+             223:    218(ptr) AccessChain 34(data) 216 67
+             224: 24(i16vec4) Load 223
+             225: 24(i16vec4) VectorShuffle 224 222 4 5 2 3
+                              Store 223 225
+             226:      6(int) Load 8(invocation)
+             228:    218(ptr) AccessChain 34(data) 57 67
+             229: 24(i16vec4) Load 228
+             230:227(i16vec3) VectorShuffle 229 229 0 1 2
+             231:227(i16vec3) GroupNonUniformBroadcast 42 230 42
+             232:    218(ptr) AccessChain 34(data) 226 67
+             233: 24(i16vec4) Load 232
+             234: 24(i16vec4) VectorShuffle 233 231 4 5 6 3
+                              Store 232 234
+             235:      6(int) Load 8(invocation)
+             236:    218(ptr) AccessChain 34(data) 67 67
+             237: 24(i16vec4) Load 236
+             238: 24(i16vec4) GroupNonUniformBroadcast 42 237 42
+             239:    218(ptr) AccessChain 34(data) 235 67
+                              Store 239 238
+             240:      6(int) Load 8(invocation)
+             241:    211(ptr) AccessChain 34(data) 37 67 38
+             242: 23(int16_t) Load 241
+             243: 23(int16_t) GroupNonUniformBroadcastFirst 42 242
+             244:    211(ptr) AccessChain 34(data) 240 67 38
+                              Store 244 243
+             245:      6(int) Load 8(invocation)
+             246:    218(ptr) AccessChain 34(data) 46 67
+             247: 24(i16vec4) Load 246
+             248:217(i16vec2) VectorShuffle 247 247 0 1
+             249:217(i16vec2) GroupNonUniformBroadcastFirst 42 248
+             250:    218(ptr) AccessChain 34(data) 245 67
+             251: 24(i16vec4) Load 250
+             252: 24(i16vec4) VectorShuffle 251 249 4 5 2 3
+                              Store 250 252
+             253:      6(int) Load 8(invocation)
+             254:    218(ptr) AccessChain 34(data) 57 67
+             255: 24(i16vec4) Load 254
+             256:227(i16vec3) VectorShuffle 255 255 0 1 2
+             257:227(i16vec3) GroupNonUniformBroadcastFirst 42 256
+             258:    218(ptr) AccessChain 34(data) 253 67
+             259: 24(i16vec4) Load 258
+             260: 24(i16vec4) VectorShuffle 259 257 4 5 6 3
+                              Store 258 260
+             261:      6(int) Load 8(invocation)
+             262:    218(ptr) AccessChain 34(data) 67 67
+             263: 24(i16vec4) Load 262
+             264: 24(i16vec4) GroupNonUniformBroadcastFirst 42 263
+             265:    218(ptr) AccessChain 34(data) 261 67
+                              Store 265 264
+             266:      6(int) Load 8(invocation)
+             269:    268(ptr) AccessChain 34(data) 37 267 38
+             270: 25(int64_t) Load 269
+             271: 25(int64_t) GroupNonUniformBroadcast 42 270 42
+             272:    268(ptr) AccessChain 34(data) 266 267 38
+                              Store 272 271
+             273:      6(int) Load 8(invocation)
+             276:    275(ptr) AccessChain 34(data) 46 267
+             277: 26(i64vec4) Load 276
+             278:274(i64vec2) VectorShuffle 277 277 0 1
+             279:274(i64vec2) GroupNonUniformBroadcast 42 278 42
+             280:    275(ptr) AccessChain 34(data) 273 267
+             281: 26(i64vec4) Load 280
+             282: 26(i64vec4) VectorShuffle 281 279 4 5 2 3
+                              Store 280 282
+             283:      6(int) Load 8(invocation)
+             285:    275(ptr) AccessChain 34(data) 57 267
+             286: 26(i64vec4) Load 285
+             287:284(i64vec3) VectorShuffle 286 286 0 1 2
+             288:284(i64vec3) GroupNonUniformBroadcast 42 287 42
+             289:    275(ptr) AccessChain 34(data) 283 267
+             290: 26(i64vec4) Load 289
+             291: 26(i64vec4) VectorShuffle 290 288 4 5 6 3
+                              Store 289 291
+             292:      6(int) Load 8(invocation)
+             293:    275(ptr) AccessChain 34(data) 67 267
+             294: 26(i64vec4) Load 293
+             295: 26(i64vec4) GroupNonUniformBroadcast 42 294 42
+             296:    275(ptr) AccessChain 34(data) 292 267
+                              Store 296 295
+             297:      6(int) Load 8(invocation)
+             298:    268(ptr) AccessChain 34(data) 37 267 38
+             299: 25(int64_t) Load 298
+             300: 25(int64_t) GroupNonUniformBroadcastFirst 42 299
+             301:    268(ptr) AccessChain 34(data) 297 267 38
+                              Store 301 300
+             302:      6(int) Load 8(invocation)
+             303:    275(ptr) AccessChain 34(data) 46 267
+             304: 26(i64vec4) Load 303
+             305:274(i64vec2) VectorShuffle 304 304 0 1
+             306:274(i64vec2) GroupNonUniformBroadcastFirst 42 305
+             307:    275(ptr) AccessChain 34(data) 302 267
+             308: 26(i64vec4) Load 307
+             309: 26(i64vec4) VectorShuffle 308 306 4 5 2 3
+                              Store 307 309
+             310:      6(int) Load 8(invocation)
+             311:    275(ptr) AccessChain 34(data) 57 267
+             312: 26(i64vec4) Load 311
+             313:284(i64vec3) VectorShuffle 312 312 0 1 2
+             314:284(i64vec3) GroupNonUniformBroadcastFirst 42 313
+             315:    275(ptr) AccessChain 34(data) 310 267
+             316: 26(i64vec4) Load 315
+             317: 26(i64vec4) VectorShuffle 316 314 4 5 6 3
+                              Store 315 317
+             318:      6(int) Load 8(invocation)
+             319:    275(ptr) AccessChain 34(data) 67 267
+             320: 26(i64vec4) Load 319
+             321: 26(i64vec4) GroupNonUniformBroadcastFirst 42 320
+             322:    275(ptr) AccessChain 34(data) 318 267
+                              Store 322 321
+             323:      6(int) Load 8(invocation)
+             326:    325(ptr) AccessChain 34(data) 37 324 38
+             327: 27(int64_t) Load 326
+             328: 27(int64_t) GroupNonUniformBroadcast 42 327 42
+             329:    325(ptr) AccessChain 34(data) 323 324 38
+                              Store 329 328
+             330:      6(int) Load 8(invocation)
+             333:    332(ptr) AccessChain 34(data) 46 324
+             334: 28(i64vec4) Load 333
+             335:331(i64vec2) VectorShuffle 334 334 0 1
+             336:331(i64vec2) GroupNonUniformBroadcast 42 335 42
+             337:    332(ptr) AccessChain 34(data) 330 324
+             338: 28(i64vec4) Load 337
+             339: 28(i64vec4) VectorShuffle 338 336 4 5 2 3
+                              Store 337 339
+             340:      6(int) Load 8(invocation)
+             342:    332(ptr) AccessChain 34(data) 57 324
+             343: 28(i64vec4) Load 342
+             344:341(i64vec3) VectorShuffle 343 343 0 1 2
+             345:341(i64vec3) GroupNonUniformBroadcast 42 344 42
+             346:    332(ptr) AccessChain 34(data) 340 324
+             347: 28(i64vec4) Load 346
+             348: 28(i64vec4) VectorShuffle 347 345 4 5 6 3
+                              Store 346 348
+             349:      6(int) Load 8(invocation)
+             350:    332(ptr) AccessChain 34(data) 67 324
+             351: 28(i64vec4) Load 350
+             352: 28(i64vec4) GroupNonUniformBroadcast 42 351 42
+             353:    332(ptr) AccessChain 34(data) 349 324
+                              Store 353 352
+             354:      6(int) Load 8(invocation)
+             355:    325(ptr) AccessChain 34(data) 37 324 38
+             356: 27(int64_t) Load 355
+             357: 27(int64_t) GroupNonUniformBroadcastFirst 42 356
+             358:    325(ptr) AccessChain 34(data) 354 324 38
+                              Store 358 357
+             359:      6(int) Load 8(invocation)
+             360:    332(ptr) AccessChain 34(data) 46 324
+             361: 28(i64vec4) Load 360
+             362:331(i64vec2) VectorShuffle 361 361 0 1
+             363:331(i64vec2) GroupNonUniformBroadcastFirst 42 362
+             364:    332(ptr) AccessChain 34(data) 359 324
+             365: 28(i64vec4) Load 364
+             366: 28(i64vec4) VectorShuffle 365 363 4 5 2 3
+                              Store 364 366
+             367:      6(int) Load 8(invocation)
+             368:    332(ptr) AccessChain 34(data) 57 324
+             369: 28(i64vec4) Load 368
+             370:341(i64vec3) VectorShuffle 369 369 0 1 2
+             371:341(i64vec3) GroupNonUniformBroadcastFirst 42 370
+             372:    332(ptr) AccessChain 34(data) 367 324
+             373: 28(i64vec4) Load 372
+             374: 28(i64vec4) VectorShuffle 373 371 4 5 6 3
+                              Store 372 374
+             375:      6(int) Load 8(invocation)
+             376:    332(ptr) AccessChain 34(data) 67 324
+             377: 28(i64vec4) Load 376
+             378: 28(i64vec4) GroupNonUniformBroadcastFirst 42 377
+             379:    332(ptr) AccessChain 34(data) 375 324
+                              Store 379 378
+             380:      6(int) Load 8(invocation)
+             383:    382(ptr) AccessChain 34(data) 37 381 38
+             384:29(float16_t) Load 383
+             385:29(float16_t) GroupNonUniformBroadcast 42 384 42
+             386:    382(ptr) AccessChain 34(data) 380 381 38
+                              Store 386 385
+             387:      6(int) Load 8(invocation)
+             390:    389(ptr) AccessChain 34(data) 46 381
+             391: 30(f16vec4) Load 390
+             392:388(f16vec2) VectorShuffle 391 391 0 1
+             393:388(f16vec2) GroupNonUniformBroadcast 42 392 42
+             394:    389(ptr) AccessChain 34(data) 387 381
+             395: 30(f16vec4) Load 394
+             396: 30(f16vec4) VectorShuffle 395 393 4 5 2 3
+                              Store 394 396
+             397:      6(int) Load 8(invocation)
+             399:    389(ptr) AccessChain 34(data) 57 381
+             400: 30(f16vec4) Load 399
+             401:398(f16vec3) VectorShuffle 400 400 0 1 2
+             402:398(f16vec3) GroupNonUniformBroadcast 42 401 42
+             403:    389(ptr) AccessChain 34(data) 397 381
+             404: 30(f16vec4) Load 403
+             405: 30(f16vec4) VectorShuffle 404 402 4 5 6 3
+                              Store 403 405
+             406:      6(int) Load 8(invocation)
+             407:    389(ptr) AccessChain 34(data) 67 381
+             408: 30(f16vec4) Load 407
+             409: 30(f16vec4) GroupNonUniformBroadcast 42 408 42
+             410:    389(ptr) AccessChain 34(data) 406 381
+                              Store 410 409
+             411:      6(int) Load 8(invocation)
+             412:    382(ptr) AccessChain 34(data) 37 381 38
+             413:29(float16_t) Load 412
+             414:29(float16_t) GroupNonUniformBroadcastFirst 42 413
+             415:    382(ptr) AccessChain 34(data) 411 381 38
+                              Store 415 414
+             416:      6(int) Load 8(invocation)
+             417:    389(ptr) AccessChain 34(data) 46 381
+             418: 30(f16vec4) Load 417
+             419:388(f16vec2) VectorShuffle 418 418 0 1
+             420:388(f16vec2) GroupNonUniformBroadcastFirst 42 419
+             421:    389(ptr) AccessChain 34(data) 416 381
+             422: 30(f16vec4) Load 421
+             423: 30(f16vec4) VectorShuffle 422 420 4 5 2 3
+                              Store 421 423
+             424:      6(int) Load 8(invocation)
+             425:    389(ptr) AccessChain 34(data) 57 381
+             426: 30(f16vec4) Load 425
+             427:398(f16vec3) VectorShuffle 426 426 0 1 2
+             428:398(f16vec3) GroupNonUniformBroadcastFirst 42 427
+             429:    389(ptr) AccessChain 34(data) 424 381
+             430: 30(f16vec4) Load 429
+             431: 30(f16vec4) VectorShuffle 430 428 4 5 6 3
+                              Store 429 431
+             432:      6(int) Load 8(invocation)
+             433:    389(ptr) AccessChain 34(data) 67 381
+             434: 30(f16vec4) Load 433
+             435: 30(f16vec4) GroupNonUniformBroadcastFirst 42 434
+             436:    389(ptr) AccessChain 34(data) 432 381
+                              Store 436 435
+                              Return
+                              FunctionEnd
diff --git a/Test/baseResults/spv.subgroupExtendedTypesBallotNeg.comp.out b/Test/baseResults/spv.subgroupExtendedTypesBallotNeg.comp.out
new file mode 100644
index 0000000..534560d
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesBallotNeg.comp.out
@@ -0,0 +1,61 @@
+spv.subgroupExtendedTypesBallotNeg.comp
+ERROR: 0:26: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:27: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:28: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:29: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:30: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:31: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:32: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:33: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:35: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:36: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:37: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:38: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:39: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:40: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:41: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:42: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:44: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:45: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:46: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:47: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:48: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:49: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:50: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:51: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:53: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:54: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:55: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:56: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:57: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:58: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:59: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:60: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:62: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:63: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:64: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:65: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:66: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:67: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:68: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:69: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:71: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:72: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:73: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:74: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:75: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:76: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:77: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:78: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:80: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:81: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:82: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:83: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:84: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:85: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:86: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:87: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 56 compilation errors.  No code generated.
+
+
+SPIR-V is not generated for failed compile or link
diff --git a/Test/baseResults/spv.subgroupExtendedTypesBasic.comp.out b/Test/baseResults/spv.subgroupExtendedTypesBasic.comp.out
new file mode 100644
index 0000000..6319697
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesBasic.comp.out
@@ -0,0 +1,9 @@
+spv.subgroupExtendedTypesBasic.comp
+ERROR: #version: compute shaders require es profile with version 310 or above, or non-es profile with version 420 or above
+ERROR: #version: statement must appear first in es-profile shader; before comments or newlines
+ERROR: 1 compilation errors.  No code generated.
+
+
+ERROR: Linking compute stage: Missing entry point: Each stage requires one entry point
+
+SPIR-V is not generated for failed compile or link
diff --git a/Test/baseResults/spv.subgroupExtendedTypesClustered.comp.out b/Test/baseResults/spv.subgroupExtendedTypesClustered.comp.out
new file mode 100644
index 0000000..b0b94b5
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesClustered.comp.out
@@ -0,0 +1,1520 @@
+spv.subgroupExtendedTypesClustered.comp
+// Module Version 10300
+// Generated by (magic number): 80007
+// Id's are bound by 1273
+
+                              Capability Shader
+                              Capability Float16
+                              Capability Int64
+                              Capability Int16
+                              Capability Int8
+                              Capability GroupNonUniform
+                              Capability GroupNonUniformClustered
+                              Capability StorageUniformBufferBlock16
+                              Capability StorageBuffer8BitAccess
+                              Extension  "SPV_KHR_8bit_storage"
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint GLCompute 4  "main" 10 12
+                              ExecutionMode 4 LocalSize 8 1 1
+                              Source GLSL 450
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_float16"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int16"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int64"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int8"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_float16"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int16"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int64"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int8"
+                              SourceExtension  "GL_KHR_shader_subgroup_basic"
+                              SourceExtension  "GL_KHR_shader_subgroup_clustered"
+                              Name 4  "main"
+                              Name 8  "invocation"
+                              Name 10  "gl_SubgroupInvocationID"
+                              Name 12  "gl_SubgroupSize"
+                              Name 31  "Buffers"
+                              MemberName 31(Buffers) 0  "i8"
+                              MemberName 31(Buffers) 1  "u8"
+                              MemberName 31(Buffers) 2  "i16"
+                              MemberName 31(Buffers) 3  "u16"
+                              MemberName 31(Buffers) 4  "i64"
+                              MemberName 31(Buffers) 5  "u64"
+                              MemberName 31(Buffers) 6  "f16"
+                              Name 34  "data"
+                              Decorate 10(gl_SubgroupInvocationID) RelaxedPrecision
+                              Decorate 10(gl_SubgroupInvocationID) BuiltIn SubgroupLocalInvocationId
+                              Decorate 11 RelaxedPrecision
+                              Decorate 12(gl_SubgroupSize) RelaxedPrecision
+                              Decorate 12(gl_SubgroupSize) BuiltIn SubgroupSize
+                              Decorate 13 RelaxedPrecision
+                              Decorate 14 RelaxedPrecision
+                              Decorate 16 RelaxedPrecision
+                              MemberDecorate 31(Buffers) 0 Offset 0
+                              MemberDecorate 31(Buffers) 1 Offset 4
+                              MemberDecorate 31(Buffers) 2 Offset 8
+                              MemberDecorate 31(Buffers) 3 Offset 16
+                              MemberDecorate 31(Buffers) 4 Offset 32
+                              MemberDecorate 31(Buffers) 5 Offset 64
+                              MemberDecorate 31(Buffers) 6 Offset 96
+                              Decorate 31(Buffers) Block
+                              Decorate 34(data) DescriptorSet 0
+                              Decorate 34(data) Binding 0
+                              Decorate 1272 BuiltIn WorkgroupSize
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeInt 32 0
+               7:             TypePointer Function 6(int)
+               9:             TypePointer Input 6(int)
+10(gl_SubgroupInvocationID):      9(ptr) Variable Input
+12(gl_SubgroupSize):      9(ptr) Variable Input
+              15:      6(int) Constant 4
+              17:             TypeInt 8 1
+              18:             TypeVector 17(int8_t) 4
+              19:             TypeInt 8 0
+              20:             TypeVector 19(int8_t) 4
+              21:             TypeInt 16 1
+              22:             TypeVector 21(int16_t) 4
+              23:             TypeInt 16 0
+              24:             TypeVector 23(int16_t) 4
+              25:             TypeInt 64 1
+              26:             TypeVector 25(int64_t) 4
+              27:             TypeInt 64 0
+              28:             TypeVector 27(int64_t) 4
+              29:             TypeFloat 16
+              30:             TypeVector 29(float16_t) 4
+     31(Buffers):             TypeStruct 18(i8vec4) 20(i8vec4) 22(i16vec4) 24(i16vec4) 26(i64vec4) 28(i64vec4) 30(f16vec4)
+              32:             TypeArray 31(Buffers) 15
+              33:             TypePointer StorageBuffer 32
+        34(data):     33(ptr) Variable StorageBuffer
+              36:             TypeInt 32 1
+              37:     36(int) Constant 0
+              38:      6(int) Constant 0
+              39:             TypePointer StorageBuffer 17(int8_t)
+              42:      6(int) Constant 1
+              43:      6(int) Constant 3
+              47:     36(int) Constant 1
+              48:             TypeVector 17(int8_t) 2
+              49:             TypePointer StorageBuffer 18(i8vec4)
+              58:     36(int) Constant 2
+              59:             TypeVector 17(int8_t) 3
+              68:     36(int) Constant 3
+             230:             TypePointer StorageBuffer 19(int8_t)
+             236:             TypeVector 19(int8_t) 2
+             237:             TypePointer StorageBuffer 20(i8vec4)
+             246:             TypeVector 19(int8_t) 3
+             416:             TypePointer StorageBuffer 21(int16_t)
+             422:             TypeVector 21(int16_t) 2
+             423:             TypePointer StorageBuffer 22(i16vec4)
+             432:             TypeVector 21(int16_t) 3
+             602:             TypePointer StorageBuffer 23(int16_t)
+             608:             TypeVector 23(int16_t) 2
+             609:             TypePointer StorageBuffer 24(i16vec4)
+             618:             TypeVector 23(int16_t) 3
+             788:     36(int) Constant 4
+             789:             TypePointer StorageBuffer 25(int64_t)
+             795:             TypeVector 25(int64_t) 2
+             796:             TypePointer StorageBuffer 26(i64vec4)
+             805:             TypeVector 25(int64_t) 3
+             975:     36(int) Constant 5
+             976:             TypePointer StorageBuffer 27(int64_t)
+             982:             TypeVector 27(int64_t) 2
+             983:             TypePointer StorageBuffer 28(i64vec4)
+             992:             TypeVector 27(int64_t) 3
+            1162:     36(int) Constant 6
+            1163:             TypePointer StorageBuffer 29(float16_t)
+            1169:             TypeVector 29(float16_t) 2
+            1170:             TypePointer StorageBuffer 30(f16vec4)
+            1179:             TypeVector 29(float16_t) 3
+            1270:             TypeVector 6(int) 3
+            1271:      6(int) Constant 8
+            1272: 1270(ivec3) ConstantComposite 1271 42 42
+         4(main):           2 Function None 3
+               5:             Label
+   8(invocation):      7(ptr) Variable Function
+              11:      6(int) Load 10(gl_SubgroupInvocationID)
+              13:      6(int) Load 12(gl_SubgroupSize)
+              14:      6(int) IAdd 11 13
+              16:      6(int) UMod 14 15
+                              Store 8(invocation) 16
+              35:      6(int) Load 8(invocation)
+              40:     39(ptr) AccessChain 34(data) 37 37 38
+              41:  17(int8_t) Load 40
+              44:  17(int8_t) GroupNonUniformIAdd 43 ClusteredReduce 41 42
+              45:     39(ptr) AccessChain 34(data) 35 37 38
+                              Store 45 44
+              46:      6(int) Load 8(invocation)
+              50:     49(ptr) AccessChain 34(data) 47 37
+              51:  18(i8vec4) Load 50
+              52:  48(i8vec2) VectorShuffle 51 51 0 1
+              53:  48(i8vec2) GroupNonUniformIAdd 43 ClusteredReduce 52 42
+              54:     49(ptr) AccessChain 34(data) 46 37
+              55:  18(i8vec4) Load 54
+              56:  18(i8vec4) VectorShuffle 55 53 4 5 2 3
+                              Store 54 56
+              57:      6(int) Load 8(invocation)
+              60:     49(ptr) AccessChain 34(data) 58 37
+              61:  18(i8vec4) Load 60
+              62:  59(i8vec3) VectorShuffle 61 61 0 1 2
+              63:  59(i8vec3) GroupNonUniformIAdd 43 ClusteredReduce 62 42
+              64:     49(ptr) AccessChain 34(data) 57 37
+              65:  18(i8vec4) Load 64
+              66:  18(i8vec4) VectorShuffle 65 63 4 5 6 3
+                              Store 64 66
+              67:      6(int) Load 8(invocation)
+              69:     49(ptr) AccessChain 34(data) 68 37
+              70:  18(i8vec4) Load 69
+              71:  18(i8vec4) GroupNonUniformIAdd 43 ClusteredReduce 70 42
+              72:     49(ptr) AccessChain 34(data) 67 37
+                              Store 72 71
+              73:      6(int) Load 8(invocation)
+              74:     39(ptr) AccessChain 34(data) 37 37 38
+              75:  17(int8_t) Load 74
+              76:  17(int8_t) GroupNonUniformIMul 43 ClusteredReduce 75 42
+              77:     39(ptr) AccessChain 34(data) 73 37 38
+                              Store 77 76
+              78:      6(int) Load 8(invocation)
+              79:     49(ptr) AccessChain 34(data) 47 37
+              80:  18(i8vec4) Load 79
+              81:  48(i8vec2) VectorShuffle 80 80 0 1
+              82:  48(i8vec2) GroupNonUniformIMul 43 ClusteredReduce 81 42
+              83:     49(ptr) AccessChain 34(data) 78 37
+              84:  18(i8vec4) Load 83
+              85:  18(i8vec4) VectorShuffle 84 82 4 5 2 3
+                              Store 83 85
+              86:      6(int) Load 8(invocation)
+              87:     49(ptr) AccessChain 34(data) 58 37
+              88:  18(i8vec4) Load 87
+              89:  59(i8vec3) VectorShuffle 88 88 0 1 2
+              90:  59(i8vec3) GroupNonUniformIMul 43 ClusteredReduce 89 42
+              91:     49(ptr) AccessChain 34(data) 86 37
+              92:  18(i8vec4) Load 91
+              93:  18(i8vec4) VectorShuffle 92 90 4 5 6 3
+                              Store 91 93
+              94:      6(int) Load 8(invocation)
+              95:     49(ptr) AccessChain 34(data) 68 37
+              96:  18(i8vec4) Load 95
+              97:  18(i8vec4) GroupNonUniformIMul 43 ClusteredReduce 96 42
+              98:     49(ptr) AccessChain 34(data) 94 37
+                              Store 98 97
+              99:      6(int) Load 8(invocation)
+             100:     39(ptr) AccessChain 34(data) 37 37 38
+             101:  17(int8_t) Load 100
+             102:  17(int8_t) GroupNonUniformSMin 43 ClusteredReduce 101 42
+             103:     39(ptr) AccessChain 34(data) 99 37 38
+                              Store 103 102
+             104:      6(int) Load 8(invocation)
+             105:     49(ptr) AccessChain 34(data) 47 37
+             106:  18(i8vec4) Load 105
+             107:  48(i8vec2) VectorShuffle 106 106 0 1
+             108:  48(i8vec2) GroupNonUniformSMin 43 ClusteredReduce 107 42
+             109:     49(ptr) AccessChain 34(data) 104 37
+             110:  18(i8vec4) Load 109
+             111:  18(i8vec4) VectorShuffle 110 108 4 5 2 3
+                              Store 109 111
+             112:      6(int) Load 8(invocation)
+             113:     49(ptr) AccessChain 34(data) 58 37
+             114:  18(i8vec4) Load 113
+             115:  59(i8vec3) VectorShuffle 114 114 0 1 2
+             116:  59(i8vec3) GroupNonUniformSMin 43 ClusteredReduce 115 42
+             117:     49(ptr) AccessChain 34(data) 112 37
+             118:  18(i8vec4) Load 117
+             119:  18(i8vec4) VectorShuffle 118 116 4 5 6 3
+                              Store 117 119
+             120:      6(int) Load 8(invocation)
+             121:     49(ptr) AccessChain 34(data) 68 37
+             122:  18(i8vec4) Load 121
+             123:  18(i8vec4) GroupNonUniformSMin 43 ClusteredReduce 122 42
+             124:     49(ptr) AccessChain 34(data) 120 37
+                              Store 124 123
+             125:      6(int) Load 8(invocation)
+             126:     39(ptr) AccessChain 34(data) 37 37 38
+             127:  17(int8_t) Load 126
+             128:  17(int8_t) GroupNonUniformSMax 43 ClusteredReduce 127 42
+             129:     39(ptr) AccessChain 34(data) 125 37 38
+                              Store 129 128
+             130:      6(int) Load 8(invocation)
+             131:     49(ptr) AccessChain 34(data) 47 37
+             132:  18(i8vec4) Load 131
+             133:  48(i8vec2) VectorShuffle 132 132 0 1
+             134:  48(i8vec2) GroupNonUniformSMax 43 ClusteredReduce 133 42
+             135:     49(ptr) AccessChain 34(data) 130 37
+             136:  18(i8vec4) Load 135
+             137:  18(i8vec4) VectorShuffle 136 134 4 5 2 3
+                              Store 135 137
+             138:      6(int) Load 8(invocation)
+             139:     49(ptr) AccessChain 34(data) 58 37
+             140:  18(i8vec4) Load 139
+             141:  59(i8vec3) VectorShuffle 140 140 0 1 2
+             142:  59(i8vec3) GroupNonUniformSMax 43 ClusteredReduce 141 42
+             143:     49(ptr) AccessChain 34(data) 138 37
+             144:  18(i8vec4) Load 143
+             145:  18(i8vec4) VectorShuffle 144 142 4 5 6 3
+                              Store 143 145
+             146:      6(int) Load 8(invocation)
+             147:     49(ptr) AccessChain 34(data) 68 37
+             148:  18(i8vec4) Load 147
+             149:  18(i8vec4) GroupNonUniformSMax 43 ClusteredReduce 148 42
+             150:     49(ptr) AccessChain 34(data) 146 37
+                              Store 150 149
+             151:      6(int) Load 8(invocation)
+             152:     39(ptr) AccessChain 34(data) 37 37 38
+             153:  17(int8_t) Load 152
+             154:  17(int8_t) GroupNonUniformBitwiseAnd 43 ClusteredReduce 153 42
+             155:     39(ptr) AccessChain 34(data) 151 37 38
+                              Store 155 154
+             156:      6(int) Load 8(invocation)
+             157:     49(ptr) AccessChain 34(data) 47 37
+             158:  18(i8vec4) Load 157
+             159:  48(i8vec2) VectorShuffle 158 158 0 1
+             160:  48(i8vec2) GroupNonUniformBitwiseAnd 43 ClusteredReduce 159 42
+             161:     49(ptr) AccessChain 34(data) 156 37
+             162:  18(i8vec4) Load 161
+             163:  18(i8vec4) VectorShuffle 162 160 4 5 2 3
+                              Store 161 163
+             164:      6(int) Load 8(invocation)
+             165:     49(ptr) AccessChain 34(data) 58 37
+             166:  18(i8vec4) Load 165
+             167:  59(i8vec3) VectorShuffle 166 166 0 1 2
+             168:  59(i8vec3) GroupNonUniformBitwiseAnd 43 ClusteredReduce 167 42
+             169:     49(ptr) AccessChain 34(data) 164 37
+             170:  18(i8vec4) Load 169
+             171:  18(i8vec4) VectorShuffle 170 168 4 5 6 3
+                              Store 169 171
+             172:      6(int) Load 8(invocation)
+             173:     49(ptr) AccessChain 34(data) 68 37
+             174:  18(i8vec4) Load 173
+             175:  18(i8vec4) GroupNonUniformBitwiseAnd 43 ClusteredReduce 174 42
+             176:     49(ptr) AccessChain 34(data) 172 37
+                              Store 176 175
+             177:      6(int) Load 8(invocation)
+             178:     39(ptr) AccessChain 34(data) 37 37 38
+             179:  17(int8_t) Load 178
+             180:  17(int8_t) GroupNonUniformBitwiseOr 43 ClusteredReduce 179 42
+             181:     39(ptr) AccessChain 34(data) 177 37 38
+                              Store 181 180
+             182:      6(int) Load 8(invocation)
+             183:     49(ptr) AccessChain 34(data) 47 37
+             184:  18(i8vec4) Load 183
+             185:  48(i8vec2) VectorShuffle 184 184 0 1
+             186:  48(i8vec2) GroupNonUniformBitwiseOr 43 ClusteredReduce 185 42
+             187:     49(ptr) AccessChain 34(data) 182 37
+             188:  18(i8vec4) Load 187
+             189:  18(i8vec4) VectorShuffle 188 186 4 5 2 3
+                              Store 187 189
+             190:      6(int) Load 8(invocation)
+             191:     49(ptr) AccessChain 34(data) 58 37
+             192:  18(i8vec4) Load 191
+             193:  59(i8vec3) VectorShuffle 192 192 0 1 2
+             194:  59(i8vec3) GroupNonUniformBitwiseOr 43 ClusteredReduce 193 42
+             195:     49(ptr) AccessChain 34(data) 190 37
+             196:  18(i8vec4) Load 195
+             197:  18(i8vec4) VectorShuffle 196 194 4 5 6 3
+                              Store 195 197
+             198:      6(int) Load 8(invocation)
+             199:     49(ptr) AccessChain 34(data) 68 37
+             200:  18(i8vec4) Load 199
+             201:  18(i8vec4) GroupNonUniformBitwiseOr 43 ClusteredReduce 200 42
+             202:     49(ptr) AccessChain 34(data) 198 37
+                              Store 202 201
+             203:      6(int) Load 8(invocation)
+             204:     39(ptr) AccessChain 34(data) 37 37 38
+             205:  17(int8_t) Load 204
+             206:  17(int8_t) GroupNonUniformBitwiseXor 43 ClusteredReduce 205 42
+             207:     39(ptr) AccessChain 34(data) 203 37 38
+                              Store 207 206
+             208:      6(int) Load 8(invocation)
+             209:     49(ptr) AccessChain 34(data) 47 37
+             210:  18(i8vec4) Load 209
+             211:  48(i8vec2) VectorShuffle 210 210 0 1
+             212:  48(i8vec2) GroupNonUniformBitwiseXor 43 ClusteredReduce 211 42
+             213:     49(ptr) AccessChain 34(data) 208 37
+             214:  18(i8vec4) Load 213
+             215:  18(i8vec4) VectorShuffle 214 212 4 5 2 3
+                              Store 213 215
+             216:      6(int) Load 8(invocation)
+             217:     49(ptr) AccessChain 34(data) 58 37
+             218:  18(i8vec4) Load 217
+             219:  59(i8vec3) VectorShuffle 218 218 0 1 2
+             220:  59(i8vec3) GroupNonUniformBitwiseXor 43 ClusteredReduce 219 42
+             221:     49(ptr) AccessChain 34(data) 216 37
+             222:  18(i8vec4) Load 221
+             223:  18(i8vec4) VectorShuffle 222 220 4 5 6 3
+                              Store 221 223
+             224:      6(int) Load 8(invocation)
+             225:     49(ptr) AccessChain 34(data) 68 37
+             226:  18(i8vec4) Load 225
+             227:  18(i8vec4) GroupNonUniformBitwiseXor 43 ClusteredReduce 226 42
+             228:     49(ptr) AccessChain 34(data) 224 37
+                              Store 228 227
+             229:      6(int) Load 8(invocation)
+             231:    230(ptr) AccessChain 34(data) 37 47 38
+             232:  19(int8_t) Load 231
+             233:  19(int8_t) GroupNonUniformIAdd 43 ClusteredReduce 232 42
+             234:    230(ptr) AccessChain 34(data) 229 47 38
+                              Store 234 233
+             235:      6(int) Load 8(invocation)
+             238:    237(ptr) AccessChain 34(data) 47 47
+             239:  20(i8vec4) Load 238
+             240: 236(i8vec2) VectorShuffle 239 239 0 1
+             241: 236(i8vec2) GroupNonUniformIAdd 43 ClusteredReduce 240 42
+             242:    237(ptr) AccessChain 34(data) 235 47
+             243:  20(i8vec4) Load 242
+             244:  20(i8vec4) VectorShuffle 243 241 4 5 2 3
+                              Store 242 244
+             245:      6(int) Load 8(invocation)
+             247:    237(ptr) AccessChain 34(data) 58 47
+             248:  20(i8vec4) Load 247
+             249: 246(i8vec3) VectorShuffle 248 248 0 1 2
+             250: 246(i8vec3) GroupNonUniformIAdd 43 ClusteredReduce 249 42
+             251:    237(ptr) AccessChain 34(data) 245 47
+             252:  20(i8vec4) Load 251
+             253:  20(i8vec4) VectorShuffle 252 250 4 5 6 3
+                              Store 251 253
+             254:      6(int) Load 8(invocation)
+             255:    237(ptr) AccessChain 34(data) 68 47
+             256:  20(i8vec4) Load 255
+             257:  20(i8vec4) GroupNonUniformIAdd 43 ClusteredReduce 256 42
+             258:    237(ptr) AccessChain 34(data) 254 47
+                              Store 258 257
+             259:      6(int) Load 8(invocation)
+             260:    230(ptr) AccessChain 34(data) 37 47 38
+             261:  19(int8_t) Load 260
+             262:  19(int8_t) GroupNonUniformIMul 43 ClusteredReduce 261 42
+             263:    230(ptr) AccessChain 34(data) 259 47 38
+                              Store 263 262
+             264:      6(int) Load 8(invocation)
+             265:    237(ptr) AccessChain 34(data) 47 47
+             266:  20(i8vec4) Load 265
+             267: 236(i8vec2) VectorShuffle 266 266 0 1
+             268: 236(i8vec2) GroupNonUniformIMul 43 ClusteredReduce 267 42
+             269:    237(ptr) AccessChain 34(data) 264 47
+             270:  20(i8vec4) Load 269
+             271:  20(i8vec4) VectorShuffle 270 268 4 5 2 3
+                              Store 269 271
+             272:      6(int) Load 8(invocation)
+             273:    237(ptr) AccessChain 34(data) 58 47
+             274:  20(i8vec4) Load 273
+             275: 246(i8vec3) VectorShuffle 274 274 0 1 2
+             276: 246(i8vec3) GroupNonUniformIMul 43 ClusteredReduce 275 42
+             277:    237(ptr) AccessChain 34(data) 272 47
+             278:  20(i8vec4) Load 277
+             279:  20(i8vec4) VectorShuffle 278 276 4 5 6 3
+                              Store 277 279
+             280:      6(int) Load 8(invocation)
+             281:    237(ptr) AccessChain 34(data) 68 47
+             282:  20(i8vec4) Load 281
+             283:  20(i8vec4) GroupNonUniformIMul 43 ClusteredReduce 282 42
+             284:    237(ptr) AccessChain 34(data) 280 47
+                              Store 284 283
+             285:      6(int) Load 8(invocation)
+             286:    230(ptr) AccessChain 34(data) 37 47 38
+             287:  19(int8_t) Load 286
+             288:  19(int8_t) GroupNonUniformUMin 43 ClusteredReduce 287 42
+             289:    230(ptr) AccessChain 34(data) 285 47 38
+                              Store 289 288
+             290:      6(int) Load 8(invocation)
+             291:    237(ptr) AccessChain 34(data) 47 47
+             292:  20(i8vec4) Load 291
+             293: 236(i8vec2) VectorShuffle 292 292 0 1
+             294: 236(i8vec2) GroupNonUniformUMin 43 ClusteredReduce 293 42
+             295:    237(ptr) AccessChain 34(data) 290 47
+             296:  20(i8vec4) Load 295
+             297:  20(i8vec4) VectorShuffle 296 294 4 5 2 3
+                              Store 295 297
+             298:      6(int) Load 8(invocation)
+             299:    237(ptr) AccessChain 34(data) 58 47
+             300:  20(i8vec4) Load 299
+             301: 246(i8vec3) VectorShuffle 300 300 0 1 2
+             302: 246(i8vec3) GroupNonUniformUMin 43 ClusteredReduce 301 42
+             303:    237(ptr) AccessChain 34(data) 298 47
+             304:  20(i8vec4) Load 303
+             305:  20(i8vec4) VectorShuffle 304 302 4 5 6 3
+                              Store 303 305
+             306:      6(int) Load 8(invocation)
+             307:    237(ptr) AccessChain 34(data) 68 47
+             308:  20(i8vec4) Load 307
+             309:  20(i8vec4) GroupNonUniformUMin 43 ClusteredReduce 308 42
+             310:    237(ptr) AccessChain 34(data) 306 47
+                              Store 310 309
+             311:      6(int) Load 8(invocation)
+             312:    230(ptr) AccessChain 34(data) 37 47 38
+             313:  19(int8_t) Load 312
+             314:  19(int8_t) GroupNonUniformUMax 43 ClusteredReduce 313 42
+             315:    230(ptr) AccessChain 34(data) 311 47 38
+                              Store 315 314
+             316:      6(int) Load 8(invocation)
+             317:    237(ptr) AccessChain 34(data) 47 47
+             318:  20(i8vec4) Load 317
+             319: 236(i8vec2) VectorShuffle 318 318 0 1
+             320: 236(i8vec2) GroupNonUniformUMax 43 ClusteredReduce 319 42
+             321:    237(ptr) AccessChain 34(data) 316 47
+             322:  20(i8vec4) Load 321
+             323:  20(i8vec4) VectorShuffle 322 320 4 5 2 3
+                              Store 321 323
+             324:      6(int) Load 8(invocation)
+             325:    237(ptr) AccessChain 34(data) 58 47
+             326:  20(i8vec4) Load 325
+             327: 246(i8vec3) VectorShuffle 326 326 0 1 2
+             328: 246(i8vec3) GroupNonUniformUMax 43 ClusteredReduce 327 42
+             329:    237(ptr) AccessChain 34(data) 324 47
+             330:  20(i8vec4) Load 329
+             331:  20(i8vec4) VectorShuffle 330 328 4 5 6 3
+                              Store 329 331
+             332:      6(int) Load 8(invocation)
+             333:    237(ptr) AccessChain 34(data) 68 47
+             334:  20(i8vec4) Load 333
+             335:  20(i8vec4) GroupNonUniformUMax 43 ClusteredReduce 334 42
+             336:    237(ptr) AccessChain 34(data) 332 47
+                              Store 336 335
+             337:      6(int) Load 8(invocation)
+             338:    230(ptr) AccessChain 34(data) 37 47 38
+             339:  19(int8_t) Load 338
+             340:  19(int8_t) GroupNonUniformBitwiseAnd 43 ClusteredReduce 339 42
+             341:    230(ptr) AccessChain 34(data) 337 47 38
+                              Store 341 340
+             342:      6(int) Load 8(invocation)
+             343:    237(ptr) AccessChain 34(data) 47 47
+             344:  20(i8vec4) Load 343
+             345: 236(i8vec2) VectorShuffle 344 344 0 1
+             346: 236(i8vec2) GroupNonUniformBitwiseAnd 43 ClusteredReduce 345 42
+             347:    237(ptr) AccessChain 34(data) 342 47
+             348:  20(i8vec4) Load 347
+             349:  20(i8vec4) VectorShuffle 348 346 4 5 2 3
+                              Store 347 349
+             350:      6(int) Load 8(invocation)
+             351:    237(ptr) AccessChain 34(data) 58 47
+             352:  20(i8vec4) Load 351
+             353: 246(i8vec3) VectorShuffle 352 352 0 1 2
+             354: 246(i8vec3) GroupNonUniformBitwiseAnd 43 ClusteredReduce 353 42
+             355:    237(ptr) AccessChain 34(data) 350 47
+             356:  20(i8vec4) Load 355
+             357:  20(i8vec4) VectorShuffle 356 354 4 5 6 3
+                              Store 355 357
+             358:      6(int) Load 8(invocation)
+             359:    237(ptr) AccessChain 34(data) 68 47
+             360:  20(i8vec4) Load 359
+             361:  20(i8vec4) GroupNonUniformBitwiseAnd 43 ClusteredReduce 360 42
+             362:    237(ptr) AccessChain 34(data) 358 47
+                              Store 362 361
+             363:      6(int) Load 8(invocation)
+             364:    230(ptr) AccessChain 34(data) 37 47 38
+             365:  19(int8_t) Load 364
+             366:  19(int8_t) GroupNonUniformBitwiseOr 43 ClusteredReduce 365 42
+             367:    230(ptr) AccessChain 34(data) 363 47 38
+                              Store 367 366
+             368:      6(int) Load 8(invocation)
+             369:    237(ptr) AccessChain 34(data) 47 47
+             370:  20(i8vec4) Load 369
+             371: 236(i8vec2) VectorShuffle 370 370 0 1
+             372: 236(i8vec2) GroupNonUniformBitwiseOr 43 ClusteredReduce 371 42
+             373:    237(ptr) AccessChain 34(data) 368 47
+             374:  20(i8vec4) Load 373
+             375:  20(i8vec4) VectorShuffle 374 372 4 5 2 3
+                              Store 373 375
+             376:      6(int) Load 8(invocation)
+             377:    237(ptr) AccessChain 34(data) 58 47
+             378:  20(i8vec4) Load 377
+             379: 246(i8vec3) VectorShuffle 378 378 0 1 2
+             380: 246(i8vec3) GroupNonUniformBitwiseOr 43 ClusteredReduce 379 42
+             381:    237(ptr) AccessChain 34(data) 376 47
+             382:  20(i8vec4) Load 381
+             383:  20(i8vec4) VectorShuffle 382 380 4 5 6 3
+                              Store 381 383
+             384:      6(int) Load 8(invocation)
+             385:    237(ptr) AccessChain 34(data) 68 47
+             386:  20(i8vec4) Load 385
+             387:  20(i8vec4) GroupNonUniformBitwiseOr 43 ClusteredReduce 386 42
+             388:    237(ptr) AccessChain 34(data) 384 47
+                              Store 388 387
+             389:      6(int) Load 8(invocation)
+             390:    230(ptr) AccessChain 34(data) 37 47 38
+             391:  19(int8_t) Load 390
+             392:  19(int8_t) GroupNonUniformBitwiseXor 43 ClusteredReduce 391 42
+             393:    230(ptr) AccessChain 34(data) 389 47 38
+                              Store 393 392
+             394:      6(int) Load 8(invocation)
+             395:    237(ptr) AccessChain 34(data) 47 47
+             396:  20(i8vec4) Load 395
+             397: 236(i8vec2) VectorShuffle 396 396 0 1
+             398: 236(i8vec2) GroupNonUniformBitwiseXor 43 ClusteredReduce 397 42
+             399:    237(ptr) AccessChain 34(data) 394 47
+             400:  20(i8vec4) Load 399
+             401:  20(i8vec4) VectorShuffle 400 398 4 5 2 3
+                              Store 399 401
+             402:      6(int) Load 8(invocation)
+             403:    237(ptr) AccessChain 34(data) 58 47
+             404:  20(i8vec4) Load 403
+             405: 246(i8vec3) VectorShuffle 404 404 0 1 2
+             406: 246(i8vec3) GroupNonUniformBitwiseXor 43 ClusteredReduce 405 42
+             407:    237(ptr) AccessChain 34(data) 402 47
+             408:  20(i8vec4) Load 407
+             409:  20(i8vec4) VectorShuffle 408 406 4 5 6 3
+                              Store 407 409
+             410:      6(int) Load 8(invocation)
+             411:    237(ptr) AccessChain 34(data) 68 47
+             412:  20(i8vec4) Load 411
+             413:  20(i8vec4) GroupNonUniformBitwiseXor 43 ClusteredReduce 412 42
+             414:    237(ptr) AccessChain 34(data) 410 47
+                              Store 414 413
+             415:      6(int) Load 8(invocation)
+             417:    416(ptr) AccessChain 34(data) 37 58 38
+             418: 21(int16_t) Load 417
+             419: 21(int16_t) GroupNonUniformIAdd 43 ClusteredReduce 418 42
+             420:    416(ptr) AccessChain 34(data) 415 58 38
+                              Store 420 419
+             421:      6(int) Load 8(invocation)
+             424:    423(ptr) AccessChain 34(data) 47 58
+             425: 22(i16vec4) Load 424
+             426:422(i16vec2) VectorShuffle 425 425 0 1
+             427:422(i16vec2) GroupNonUniformIAdd 43 ClusteredReduce 426 42
+             428:    423(ptr) AccessChain 34(data) 421 58
+             429: 22(i16vec4) Load 428
+             430: 22(i16vec4) VectorShuffle 429 427 4 5 2 3
+                              Store 428 430
+             431:      6(int) Load 8(invocation)
+             433:    423(ptr) AccessChain 34(data) 58 58
+             434: 22(i16vec4) Load 433
+             435:432(i16vec3) VectorShuffle 434 434 0 1 2
+             436:432(i16vec3) GroupNonUniformIAdd 43 ClusteredReduce 435 42
+             437:    423(ptr) AccessChain 34(data) 431 58
+             438: 22(i16vec4) Load 437
+             439: 22(i16vec4) VectorShuffle 438 436 4 5 6 3
+                              Store 437 439
+             440:      6(int) Load 8(invocation)
+             441:    423(ptr) AccessChain 34(data) 68 58
+             442: 22(i16vec4) Load 441
+             443: 22(i16vec4) GroupNonUniformIAdd 43 ClusteredReduce 442 42
+             444:    423(ptr) AccessChain 34(data) 440 58
+                              Store 444 443
+             445:      6(int) Load 8(invocation)
+             446:    416(ptr) AccessChain 34(data) 37 58 38
+             447: 21(int16_t) Load 446
+             448: 21(int16_t) GroupNonUniformIMul 43 ClusteredReduce 447 42
+             449:    416(ptr) AccessChain 34(data) 445 58 38
+                              Store 449 448
+             450:      6(int) Load 8(invocation)
+             451:    423(ptr) AccessChain 34(data) 47 58
+             452: 22(i16vec4) Load 451
+             453:422(i16vec2) VectorShuffle 452 452 0 1
+             454:422(i16vec2) GroupNonUniformIMul 43 ClusteredReduce 453 42
+             455:    423(ptr) AccessChain 34(data) 450 58
+             456: 22(i16vec4) Load 455
+             457: 22(i16vec4) VectorShuffle 456 454 4 5 2 3
+                              Store 455 457
+             458:      6(int) Load 8(invocation)
+             459:    423(ptr) AccessChain 34(data) 58 58
+             460: 22(i16vec4) Load 459
+             461:432(i16vec3) VectorShuffle 460 460 0 1 2
+             462:432(i16vec3) GroupNonUniformIMul 43 ClusteredReduce 461 42
+             463:    423(ptr) AccessChain 34(data) 458 58
+             464: 22(i16vec4) Load 463
+             465: 22(i16vec4) VectorShuffle 464 462 4 5 6 3
+                              Store 463 465
+             466:      6(int) Load 8(invocation)
+             467:    423(ptr) AccessChain 34(data) 68 58
+             468: 22(i16vec4) Load 467
+             469: 22(i16vec4) GroupNonUniformIMul 43 ClusteredReduce 468 42
+             470:    423(ptr) AccessChain 34(data) 466 58
+                              Store 470 469
+             471:      6(int) Load 8(invocation)
+             472:    416(ptr) AccessChain 34(data) 37 58 38
+             473: 21(int16_t) Load 472
+             474: 21(int16_t) GroupNonUniformSMin 43 ClusteredReduce 473 42
+             475:    416(ptr) AccessChain 34(data) 471 58 38
+                              Store 475 474
+             476:      6(int) Load 8(invocation)
+             477:    423(ptr) AccessChain 34(data) 47 58
+             478: 22(i16vec4) Load 477
+             479:422(i16vec2) VectorShuffle 478 478 0 1
+             480:422(i16vec2) GroupNonUniformSMin 43 ClusteredReduce 479 42
+             481:    423(ptr) AccessChain 34(data) 476 58
+             482: 22(i16vec4) Load 481
+             483: 22(i16vec4) VectorShuffle 482 480 4 5 2 3
+                              Store 481 483
+             484:      6(int) Load 8(invocation)
+             485:    423(ptr) AccessChain 34(data) 58 58
+             486: 22(i16vec4) Load 485
+             487:432(i16vec3) VectorShuffle 486 486 0 1 2
+             488:432(i16vec3) GroupNonUniformSMin 43 ClusteredReduce 487 42
+             489:    423(ptr) AccessChain 34(data) 484 58
+             490: 22(i16vec4) Load 489
+             491: 22(i16vec4) VectorShuffle 490 488 4 5 6 3
+                              Store 489 491
+             492:      6(int) Load 8(invocation)
+             493:    423(ptr) AccessChain 34(data) 68 58
+             494: 22(i16vec4) Load 493
+             495: 22(i16vec4) GroupNonUniformSMin 43 ClusteredReduce 494 42
+             496:    423(ptr) AccessChain 34(data) 492 58
+                              Store 496 495
+             497:      6(int) Load 8(invocation)
+             498:    416(ptr) AccessChain 34(data) 37 58 38
+             499: 21(int16_t) Load 498
+             500: 21(int16_t) GroupNonUniformSMax 43 ClusteredReduce 499 42
+             501:    416(ptr) AccessChain 34(data) 497 58 38
+                              Store 501 500
+             502:      6(int) Load 8(invocation)
+             503:    423(ptr) AccessChain 34(data) 47 58
+             504: 22(i16vec4) Load 503
+             505:422(i16vec2) VectorShuffle 504 504 0 1
+             506:422(i16vec2) GroupNonUniformSMax 43 ClusteredReduce 505 42
+             507:    423(ptr) AccessChain 34(data) 502 58
+             508: 22(i16vec4) Load 507
+             509: 22(i16vec4) VectorShuffle 508 506 4 5 2 3
+                              Store 507 509
+             510:      6(int) Load 8(invocation)
+             511:    423(ptr) AccessChain 34(data) 58 58
+             512: 22(i16vec4) Load 511
+             513:432(i16vec3) VectorShuffle 512 512 0 1 2
+             514:432(i16vec3) GroupNonUniformSMax 43 ClusteredReduce 513 42
+             515:    423(ptr) AccessChain 34(data) 510 58
+             516: 22(i16vec4) Load 515
+             517: 22(i16vec4) VectorShuffle 516 514 4 5 6 3
+                              Store 515 517
+             518:      6(int) Load 8(invocation)
+             519:    423(ptr) AccessChain 34(data) 68 58
+             520: 22(i16vec4) Load 519
+             521: 22(i16vec4) GroupNonUniformSMax 43 ClusteredReduce 520 42
+             522:    423(ptr) AccessChain 34(data) 518 58
+                              Store 522 521
+             523:      6(int) Load 8(invocation)
+             524:    416(ptr) AccessChain 34(data) 37 58 38
+             525: 21(int16_t) Load 524
+             526: 21(int16_t) GroupNonUniformBitwiseAnd 43 ClusteredReduce 525 42
+             527:    416(ptr) AccessChain 34(data) 523 58 38
+                              Store 527 526
+             528:      6(int) Load 8(invocation)
+             529:    423(ptr) AccessChain 34(data) 47 58
+             530: 22(i16vec4) Load 529
+             531:422(i16vec2) VectorShuffle 530 530 0 1
+             532:422(i16vec2) GroupNonUniformBitwiseAnd 43 ClusteredReduce 531 42
+             533:    423(ptr) AccessChain 34(data) 528 58
+             534: 22(i16vec4) Load 533
+             535: 22(i16vec4) VectorShuffle 534 532 4 5 2 3
+                              Store 533 535
+             536:      6(int) Load 8(invocation)
+             537:    423(ptr) AccessChain 34(data) 58 58
+             538: 22(i16vec4) Load 537
+             539:432(i16vec3) VectorShuffle 538 538 0 1 2
+             540:432(i16vec3) GroupNonUniformBitwiseAnd 43 ClusteredReduce 539 42
+             541:    423(ptr) AccessChain 34(data) 536 58
+             542: 22(i16vec4) Load 541
+             543: 22(i16vec4) VectorShuffle 542 540 4 5 6 3
+                              Store 541 543
+             544:      6(int) Load 8(invocation)
+             545:    423(ptr) AccessChain 34(data) 68 58
+             546: 22(i16vec4) Load 545
+             547: 22(i16vec4) GroupNonUniformBitwiseAnd 43 ClusteredReduce 546 42
+             548:    423(ptr) AccessChain 34(data) 544 58
+                              Store 548 547
+             549:      6(int) Load 8(invocation)
+             550:    416(ptr) AccessChain 34(data) 37 58 38
+             551: 21(int16_t) Load 550
+             552: 21(int16_t) GroupNonUniformBitwiseOr 43 ClusteredReduce 551 42
+             553:    416(ptr) AccessChain 34(data) 549 58 38
+                              Store 553 552
+             554:      6(int) Load 8(invocation)
+             555:    423(ptr) AccessChain 34(data) 47 58
+             556: 22(i16vec4) Load 555
+             557:422(i16vec2) VectorShuffle 556 556 0 1
+             558:422(i16vec2) GroupNonUniformBitwiseOr 43 ClusteredReduce 557 42
+             559:    423(ptr) AccessChain 34(data) 554 58
+             560: 22(i16vec4) Load 559
+             561: 22(i16vec4) VectorShuffle 560 558 4 5 2 3
+                              Store 559 561
+             562:      6(int) Load 8(invocation)
+             563:    423(ptr) AccessChain 34(data) 58 58
+             564: 22(i16vec4) Load 563
+             565:432(i16vec3) VectorShuffle 564 564 0 1 2
+             566:432(i16vec3) GroupNonUniformBitwiseOr 43 ClusteredReduce 565 42
+             567:    423(ptr) AccessChain 34(data) 562 58
+             568: 22(i16vec4) Load 567
+             569: 22(i16vec4) VectorShuffle 568 566 4 5 6 3
+                              Store 567 569
+             570:      6(int) Load 8(invocation)
+             571:    423(ptr) AccessChain 34(data) 68 58
+             572: 22(i16vec4) Load 571
+             573: 22(i16vec4) GroupNonUniformBitwiseOr 43 ClusteredReduce 572 42
+             574:    423(ptr) AccessChain 34(data) 570 58
+                              Store 574 573
+             575:      6(int) Load 8(invocation)
+             576:    416(ptr) AccessChain 34(data) 37 58 38
+             577: 21(int16_t) Load 576
+             578: 21(int16_t) GroupNonUniformBitwiseXor 43 ClusteredReduce 577 42
+             579:    416(ptr) AccessChain 34(data) 575 58 38
+                              Store 579 578
+             580:      6(int) Load 8(invocation)
+             581:    423(ptr) AccessChain 34(data) 47 58
+             582: 22(i16vec4) Load 581
+             583:422(i16vec2) VectorShuffle 582 582 0 1
+             584:422(i16vec2) GroupNonUniformBitwiseXor 43 ClusteredReduce 583 42
+             585:    423(ptr) AccessChain 34(data) 580 58
+             586: 22(i16vec4) Load 585
+             587: 22(i16vec4) VectorShuffle 586 584 4 5 2 3
+                              Store 585 587
+             588:      6(int) Load 8(invocation)
+             589:    423(ptr) AccessChain 34(data) 58 58
+             590: 22(i16vec4) Load 589
+             591:432(i16vec3) VectorShuffle 590 590 0 1 2
+             592:432(i16vec3) GroupNonUniformBitwiseXor 43 ClusteredReduce 591 42
+             593:    423(ptr) AccessChain 34(data) 588 58
+             594: 22(i16vec4) Load 593
+             595: 22(i16vec4) VectorShuffle 594 592 4 5 6 3
+                              Store 593 595
+             596:      6(int) Load 8(invocation)
+             597:    423(ptr) AccessChain 34(data) 68 58
+             598: 22(i16vec4) Load 597
+             599: 22(i16vec4) GroupNonUniformBitwiseXor 43 ClusteredReduce 598 42
+             600:    423(ptr) AccessChain 34(data) 596 58
+                              Store 600 599
+             601:      6(int) Load 8(invocation)
+             603:    602(ptr) AccessChain 34(data) 37 68 38
+             604: 23(int16_t) Load 603
+             605: 23(int16_t) GroupNonUniformIAdd 43 ClusteredReduce 604 42
+             606:    602(ptr) AccessChain 34(data) 601 68 38
+                              Store 606 605
+             607:      6(int) Load 8(invocation)
+             610:    609(ptr) AccessChain 34(data) 47 68
+             611: 24(i16vec4) Load 610
+             612:608(i16vec2) VectorShuffle 611 611 0 1
+             613:608(i16vec2) GroupNonUniformIAdd 43 ClusteredReduce 612 42
+             614:    609(ptr) AccessChain 34(data) 607 68
+             615: 24(i16vec4) Load 614
+             616: 24(i16vec4) VectorShuffle 615 613 4 5 2 3
+                              Store 614 616
+             617:      6(int) Load 8(invocation)
+             619:    609(ptr) AccessChain 34(data) 58 68
+             620: 24(i16vec4) Load 619
+             621:618(i16vec3) VectorShuffle 620 620 0 1 2
+             622:618(i16vec3) GroupNonUniformIAdd 43 ClusteredReduce 621 42
+             623:    609(ptr) AccessChain 34(data) 617 68
+             624: 24(i16vec4) Load 623
+             625: 24(i16vec4) VectorShuffle 624 622 4 5 6 3
+                              Store 623 625
+             626:      6(int) Load 8(invocation)
+             627:    609(ptr) AccessChain 34(data) 68 68
+             628: 24(i16vec4) Load 627
+             629: 24(i16vec4) GroupNonUniformIAdd 43 ClusteredReduce 628 42
+             630:    609(ptr) AccessChain 34(data) 626 68
+                              Store 630 629
+             631:      6(int) Load 8(invocation)
+             632:    602(ptr) AccessChain 34(data) 37 68 38
+             633: 23(int16_t) Load 632
+             634: 23(int16_t) GroupNonUniformIMul 43 ClusteredReduce 633 42
+             635:    602(ptr) AccessChain 34(data) 631 68 38
+                              Store 635 634
+             636:      6(int) Load 8(invocation)
+             637:    609(ptr) AccessChain 34(data) 47 68
+             638: 24(i16vec4) Load 637
+             639:608(i16vec2) VectorShuffle 638 638 0 1
+             640:608(i16vec2) GroupNonUniformIMul 43 ClusteredReduce 639 42
+             641:    609(ptr) AccessChain 34(data) 636 68
+             642: 24(i16vec4) Load 641
+             643: 24(i16vec4) VectorShuffle 642 640 4 5 2 3
+                              Store 641 643
+             644:      6(int) Load 8(invocation)
+             645:    609(ptr) AccessChain 34(data) 58 68
+             646: 24(i16vec4) Load 645
+             647:618(i16vec3) VectorShuffle 646 646 0 1 2
+             648:618(i16vec3) GroupNonUniformIMul 43 ClusteredReduce 647 42
+             649:    609(ptr) AccessChain 34(data) 644 68
+             650: 24(i16vec4) Load 649
+             651: 24(i16vec4) VectorShuffle 650 648 4 5 6 3
+                              Store 649 651
+             652:      6(int) Load 8(invocation)
+             653:    609(ptr) AccessChain 34(data) 68 68
+             654: 24(i16vec4) Load 653
+             655: 24(i16vec4) GroupNonUniformIMul 43 ClusteredReduce 654 42
+             656:    609(ptr) AccessChain 34(data) 652 68
+                              Store 656 655
+             657:      6(int) Load 8(invocation)
+             658:    602(ptr) AccessChain 34(data) 37 68 38
+             659: 23(int16_t) Load 658
+             660: 23(int16_t) GroupNonUniformUMin 43 ClusteredReduce 659 42
+             661:    602(ptr) AccessChain 34(data) 657 68 38
+                              Store 661 660
+             662:      6(int) Load 8(invocation)
+             663:    609(ptr) AccessChain 34(data) 47 68
+             664: 24(i16vec4) Load 663
+             665:608(i16vec2) VectorShuffle 664 664 0 1
+             666:608(i16vec2) GroupNonUniformUMin 43 ClusteredReduce 665 42
+             667:    609(ptr) AccessChain 34(data) 662 68
+             668: 24(i16vec4) Load 667
+             669: 24(i16vec4) VectorShuffle 668 666 4 5 2 3
+                              Store 667 669
+             670:      6(int) Load 8(invocation)
+             671:    609(ptr) AccessChain 34(data) 58 68
+             672: 24(i16vec4) Load 671
+             673:618(i16vec3) VectorShuffle 672 672 0 1 2
+             674:618(i16vec3) GroupNonUniformUMin 43 ClusteredReduce 673 42
+             675:    609(ptr) AccessChain 34(data) 670 68
+             676: 24(i16vec4) Load 675
+             677: 24(i16vec4) VectorShuffle 676 674 4 5 6 3
+                              Store 675 677
+             678:      6(int) Load 8(invocation)
+             679:    609(ptr) AccessChain 34(data) 68 68
+             680: 24(i16vec4) Load 679
+             681: 24(i16vec4) GroupNonUniformUMin 43 ClusteredReduce 680 42
+             682:    609(ptr) AccessChain 34(data) 678 68
+                              Store 682 681
+             683:      6(int) Load 8(invocation)
+             684:    602(ptr) AccessChain 34(data) 37 68 38
+             685: 23(int16_t) Load 684
+             686: 23(int16_t) GroupNonUniformUMax 43 ClusteredReduce 685 42
+             687:    602(ptr) AccessChain 34(data) 683 68 38
+                              Store 687 686
+             688:      6(int) Load 8(invocation)
+             689:    609(ptr) AccessChain 34(data) 47 68
+             690: 24(i16vec4) Load 689
+             691:608(i16vec2) VectorShuffle 690 690 0 1
+             692:608(i16vec2) GroupNonUniformUMax 43 ClusteredReduce 691 42
+             693:    609(ptr) AccessChain 34(data) 688 68
+             694: 24(i16vec4) Load 693
+             695: 24(i16vec4) VectorShuffle 694 692 4 5 2 3
+                              Store 693 695
+             696:      6(int) Load 8(invocation)
+             697:    609(ptr) AccessChain 34(data) 58 68
+             698: 24(i16vec4) Load 697
+             699:618(i16vec3) VectorShuffle 698 698 0 1 2
+             700:618(i16vec3) GroupNonUniformUMax 43 ClusteredReduce 699 42
+             701:    609(ptr) AccessChain 34(data) 696 68
+             702: 24(i16vec4) Load 701
+             703: 24(i16vec4) VectorShuffle 702 700 4 5 6 3
+                              Store 701 703
+             704:      6(int) Load 8(invocation)
+             705:    609(ptr) AccessChain 34(data) 68 68
+             706: 24(i16vec4) Load 705
+             707: 24(i16vec4) GroupNonUniformUMax 43 ClusteredReduce 706 42
+             708:    609(ptr) AccessChain 34(data) 704 68
+                              Store 708 707
+             709:      6(int) Load 8(invocation)
+             710:    602(ptr) AccessChain 34(data) 37 68 38
+             711: 23(int16_t) Load 710
+             712: 23(int16_t) GroupNonUniformBitwiseAnd 43 ClusteredReduce 711 42
+             713:    602(ptr) AccessChain 34(data) 709 68 38
+                              Store 713 712
+             714:      6(int) Load 8(invocation)
+             715:    609(ptr) AccessChain 34(data) 47 68
+             716: 24(i16vec4) Load 715
+             717:608(i16vec2) VectorShuffle 716 716 0 1
+             718:608(i16vec2) GroupNonUniformBitwiseAnd 43 ClusteredReduce 717 42
+             719:    609(ptr) AccessChain 34(data) 714 68
+             720: 24(i16vec4) Load 719
+             721: 24(i16vec4) VectorShuffle 720 718 4 5 2 3
+                              Store 719 721
+             722:      6(int) Load 8(invocation)
+             723:    609(ptr) AccessChain 34(data) 58 68
+             724: 24(i16vec4) Load 723
+             725:618(i16vec3) VectorShuffle 724 724 0 1 2
+             726:618(i16vec3) GroupNonUniformBitwiseAnd 43 ClusteredReduce 725 42
+             727:    609(ptr) AccessChain 34(data) 722 68
+             728: 24(i16vec4) Load 727
+             729: 24(i16vec4) VectorShuffle 728 726 4 5 6 3
+                              Store 727 729
+             730:      6(int) Load 8(invocation)
+             731:    609(ptr) AccessChain 34(data) 68 68
+             732: 24(i16vec4) Load 731
+             733: 24(i16vec4) GroupNonUniformBitwiseAnd 43 ClusteredReduce 732 42
+             734:    609(ptr) AccessChain 34(data) 730 68
+                              Store 734 733
+             735:      6(int) Load 8(invocation)
+             736:    602(ptr) AccessChain 34(data) 37 68 38
+             737: 23(int16_t) Load 736
+             738: 23(int16_t) GroupNonUniformBitwiseOr 43 ClusteredReduce 737 42
+             739:    602(ptr) AccessChain 34(data) 735 68 38
+                              Store 739 738
+             740:      6(int) Load 8(invocation)
+             741:    609(ptr) AccessChain 34(data) 47 68
+             742: 24(i16vec4) Load 741
+             743:608(i16vec2) VectorShuffle 742 742 0 1
+             744:608(i16vec2) GroupNonUniformBitwiseOr 43 ClusteredReduce 743 42
+             745:    609(ptr) AccessChain 34(data) 740 68
+             746: 24(i16vec4) Load 745
+             747: 24(i16vec4) VectorShuffle 746 744 4 5 2 3
+                              Store 745 747
+             748:      6(int) Load 8(invocation)
+             749:    609(ptr) AccessChain 34(data) 58 68
+             750: 24(i16vec4) Load 749
+             751:618(i16vec3) VectorShuffle 750 750 0 1 2
+             752:618(i16vec3) GroupNonUniformBitwiseOr 43 ClusteredReduce 751 42
+             753:    609(ptr) AccessChain 34(data) 748 68
+             754: 24(i16vec4) Load 753
+             755: 24(i16vec4) VectorShuffle 754 752 4 5 6 3
+                              Store 753 755
+             756:      6(int) Load 8(invocation)
+             757:    609(ptr) AccessChain 34(data) 68 68
+             758: 24(i16vec4) Load 757
+             759: 24(i16vec4) GroupNonUniformBitwiseOr 43 ClusteredReduce 758 42
+             760:    609(ptr) AccessChain 34(data) 756 68
+                              Store 760 759
+             761:      6(int) Load 8(invocation)
+             762:    602(ptr) AccessChain 34(data) 37 68 38
+             763: 23(int16_t) Load 762
+             764: 23(int16_t) GroupNonUniformBitwiseXor 43 ClusteredReduce 763 42
+             765:    602(ptr) AccessChain 34(data) 761 68 38
+                              Store 765 764
+             766:      6(int) Load 8(invocation)
+             767:    609(ptr) AccessChain 34(data) 47 68
+             768: 24(i16vec4) Load 767
+             769:608(i16vec2) VectorShuffle 768 768 0 1
+             770:608(i16vec2) GroupNonUniformBitwiseXor 43 ClusteredReduce 769 42
+             771:    609(ptr) AccessChain 34(data) 766 68
+             772: 24(i16vec4) Load 771
+             773: 24(i16vec4) VectorShuffle 772 770 4 5 2 3
+                              Store 771 773
+             774:      6(int) Load 8(invocation)
+             775:    609(ptr) AccessChain 34(data) 58 68
+             776: 24(i16vec4) Load 775
+             777:618(i16vec3) VectorShuffle 776 776 0 1 2
+             778:618(i16vec3) GroupNonUniformBitwiseXor 43 ClusteredReduce 777 42
+             779:    609(ptr) AccessChain 34(data) 774 68
+             780: 24(i16vec4) Load 779
+             781: 24(i16vec4) VectorShuffle 780 778 4 5 6 3
+                              Store 779 781
+             782:      6(int) Load 8(invocation)
+             783:    609(ptr) AccessChain 34(data) 68 68
+             784: 24(i16vec4) Load 783
+             785: 24(i16vec4) GroupNonUniformBitwiseXor 43 ClusteredReduce 784 42
+             786:    609(ptr) AccessChain 34(data) 782 68
+                              Store 786 785
+             787:      6(int) Load 8(invocation)
+             790:    789(ptr) AccessChain 34(data) 37 788 38
+             791: 25(int64_t) Load 790
+             792: 25(int64_t) GroupNonUniformIAdd 43 ClusteredReduce 791 42
+             793:    789(ptr) AccessChain 34(data) 787 788 38
+                              Store 793 792
+             794:      6(int) Load 8(invocation)
+             797:    796(ptr) AccessChain 34(data) 47 788
+             798: 26(i64vec4) Load 797
+             799:795(i64vec2) VectorShuffle 798 798 0 1
+             800:795(i64vec2) GroupNonUniformIAdd 43 ClusteredReduce 799 42
+             801:    796(ptr) AccessChain 34(data) 794 788
+             802: 26(i64vec4) Load 801
+             803: 26(i64vec4) VectorShuffle 802 800 4 5 2 3
+                              Store 801 803
+             804:      6(int) Load 8(invocation)
+             806:    796(ptr) AccessChain 34(data) 58 788
+             807: 26(i64vec4) Load 806
+             808:805(i64vec3) VectorShuffle 807 807 0 1 2
+             809:805(i64vec3) GroupNonUniformIAdd 43 ClusteredReduce 808 42
+             810:    796(ptr) AccessChain 34(data) 804 788
+             811: 26(i64vec4) Load 810
+             812: 26(i64vec4) VectorShuffle 811 809 4 5 6 3
+                              Store 810 812
+             813:      6(int) Load 8(invocation)
+             814:    796(ptr) AccessChain 34(data) 68 788
+             815: 26(i64vec4) Load 814
+             816: 26(i64vec4) GroupNonUniformIAdd 43 ClusteredReduce 815 42
+             817:    796(ptr) AccessChain 34(data) 813 788
+                              Store 817 816
+             818:      6(int) Load 8(invocation)
+             819:    789(ptr) AccessChain 34(data) 37 788 38
+             820: 25(int64_t) Load 819
+             821: 25(int64_t) GroupNonUniformIMul 43 ClusteredReduce 820 42
+             822:    789(ptr) AccessChain 34(data) 818 788 38
+                              Store 822 821
+             823:      6(int) Load 8(invocation)
+             824:    796(ptr) AccessChain 34(data) 47 788
+             825: 26(i64vec4) Load 824
+             826:795(i64vec2) VectorShuffle 825 825 0 1
+             827:795(i64vec2) GroupNonUniformIMul 43 ClusteredReduce 826 42
+             828:    796(ptr) AccessChain 34(data) 823 788
+             829: 26(i64vec4) Load 828
+             830: 26(i64vec4) VectorShuffle 829 827 4 5 2 3
+                              Store 828 830
+             831:      6(int) Load 8(invocation)
+             832:    796(ptr) AccessChain 34(data) 58 788
+             833: 26(i64vec4) Load 832
+             834:805(i64vec3) VectorShuffle 833 833 0 1 2
+             835:805(i64vec3) GroupNonUniformIMul 43 ClusteredReduce 834 42
+             836:    796(ptr) AccessChain 34(data) 831 788
+             837: 26(i64vec4) Load 836
+             838: 26(i64vec4) VectorShuffle 837 835 4 5 6 3
+                              Store 836 838
+             839:      6(int) Load 8(invocation)
+             840:    796(ptr) AccessChain 34(data) 68 788
+             841: 26(i64vec4) Load 840
+             842: 26(i64vec4) GroupNonUniformIMul 43 ClusteredReduce 841 42
+             843:    796(ptr) AccessChain 34(data) 839 788
+                              Store 843 842
+             844:      6(int) Load 8(invocation)
+             845:    789(ptr) AccessChain 34(data) 37 788 38
+             846: 25(int64_t) Load 845
+             847: 25(int64_t) GroupNonUniformSMin 43 ClusteredReduce 846 42
+             848:    789(ptr) AccessChain 34(data) 844 788 38
+                              Store 848 847
+             849:      6(int) Load 8(invocation)
+             850:    796(ptr) AccessChain 34(data) 47 788
+             851: 26(i64vec4) Load 850
+             852:795(i64vec2) VectorShuffle 851 851 0 1
+             853:795(i64vec2) GroupNonUniformSMin 43 ClusteredReduce 852 42
+             854:    796(ptr) AccessChain 34(data) 849 788
+             855: 26(i64vec4) Load 854
+             856: 26(i64vec4) VectorShuffle 855 853 4 5 2 3
+                              Store 854 856
+             857:      6(int) Load 8(invocation)
+             858:    796(ptr) AccessChain 34(data) 58 788
+             859: 26(i64vec4) Load 858
+             860:805(i64vec3) VectorShuffle 859 859 0 1 2
+             861:805(i64vec3) GroupNonUniformSMin 43 ClusteredReduce 860 42
+             862:    796(ptr) AccessChain 34(data) 857 788
+             863: 26(i64vec4) Load 862
+             864: 26(i64vec4) VectorShuffle 863 861 4 5 6 3
+                              Store 862 864
+             865:      6(int) Load 8(invocation)
+             866:    796(ptr) AccessChain 34(data) 68 788
+             867: 26(i64vec4) Load 866
+             868: 26(i64vec4) GroupNonUniformSMin 43 ClusteredReduce 867 42
+             869:    796(ptr) AccessChain 34(data) 865 788
+                              Store 869 868
+             870:      6(int) Load 8(invocation)
+             871:    789(ptr) AccessChain 34(data) 37 788 38
+             872: 25(int64_t) Load 871
+             873: 25(int64_t) GroupNonUniformSMax 43 ClusteredReduce 872 42
+             874:    789(ptr) AccessChain 34(data) 870 788 38
+                              Store 874 873
+             875:      6(int) Load 8(invocation)
+             876:    796(ptr) AccessChain 34(data) 47 788
+             877: 26(i64vec4) Load 876
+             878:795(i64vec2) VectorShuffle 877 877 0 1
+             879:795(i64vec2) GroupNonUniformSMax 43 ClusteredReduce 878 42
+             880:    796(ptr) AccessChain 34(data) 875 788
+             881: 26(i64vec4) Load 880
+             882: 26(i64vec4) VectorShuffle 881 879 4 5 2 3
+                              Store 880 882
+             883:      6(int) Load 8(invocation)
+             884:    796(ptr) AccessChain 34(data) 58 788
+             885: 26(i64vec4) Load 884
+             886:805(i64vec3) VectorShuffle 885 885 0 1 2
+             887:805(i64vec3) GroupNonUniformSMax 43 ClusteredReduce 886 42
+             888:    796(ptr) AccessChain 34(data) 883 788
+             889: 26(i64vec4) Load 888
+             890: 26(i64vec4) VectorShuffle 889 887 4 5 6 3
+                              Store 888 890
+             891:      6(int) Load 8(invocation)
+             892:    796(ptr) AccessChain 34(data) 68 788
+             893: 26(i64vec4) Load 892
+             894: 26(i64vec4) GroupNonUniformSMax 43 ClusteredReduce 893 42
+             895:    796(ptr) AccessChain 34(data) 891 788
+                              Store 895 894
+             896:      6(int) Load 8(invocation)
+             897:    789(ptr) AccessChain 34(data) 37 788 38
+             898: 25(int64_t) Load 897
+             899: 25(int64_t) GroupNonUniformBitwiseAnd 43 ClusteredReduce 898 42
+             900:    789(ptr) AccessChain 34(data) 896 788 38
+                              Store 900 899
+             901:      6(int) Load 8(invocation)
+             902:    796(ptr) AccessChain 34(data) 47 788
+             903: 26(i64vec4) Load 902
+             904:795(i64vec2) VectorShuffle 903 903 0 1
+             905:795(i64vec2) GroupNonUniformBitwiseAnd 43 ClusteredReduce 904 42
+             906:    796(ptr) AccessChain 34(data) 901 788
+             907: 26(i64vec4) Load 906
+             908: 26(i64vec4) VectorShuffle 907 905 4 5 2 3
+                              Store 906 908
+             909:      6(int) Load 8(invocation)
+             910:    796(ptr) AccessChain 34(data) 58 788
+             911: 26(i64vec4) Load 910
+             912:805(i64vec3) VectorShuffle 911 911 0 1 2
+             913:805(i64vec3) GroupNonUniformBitwiseAnd 43 ClusteredReduce 912 42
+             914:    796(ptr) AccessChain 34(data) 909 788
+             915: 26(i64vec4) Load 914
+             916: 26(i64vec4) VectorShuffle 915 913 4 5 6 3
+                              Store 914 916
+             917:      6(int) Load 8(invocation)
+             918:    796(ptr) AccessChain 34(data) 68 788
+             919: 26(i64vec4) Load 918
+             920: 26(i64vec4) GroupNonUniformBitwiseAnd 43 ClusteredReduce 919 42
+             921:    796(ptr) AccessChain 34(data) 917 788
+                              Store 921 920
+             922:      6(int) Load 8(invocation)
+             923:    789(ptr) AccessChain 34(data) 37 788 38
+             924: 25(int64_t) Load 923
+             925: 25(int64_t) GroupNonUniformBitwiseOr 43 ClusteredReduce 924 42
+             926:    789(ptr) AccessChain 34(data) 922 788 38
+                              Store 926 925
+             927:      6(int) Load 8(invocation)
+             928:    796(ptr) AccessChain 34(data) 47 788
+             929: 26(i64vec4) Load 928
+             930:795(i64vec2) VectorShuffle 929 929 0 1
+             931:795(i64vec2) GroupNonUniformBitwiseOr 43 ClusteredReduce 930 42
+             932:    796(ptr) AccessChain 34(data) 927 788
+             933: 26(i64vec4) Load 932
+             934: 26(i64vec4) VectorShuffle 933 931 4 5 2 3
+                              Store 932 934
+             935:      6(int) Load 8(invocation)
+             936:    796(ptr) AccessChain 34(data) 58 788
+             937: 26(i64vec4) Load 936
+             938:805(i64vec3) VectorShuffle 937 937 0 1 2
+             939:805(i64vec3) GroupNonUniformBitwiseOr 43 ClusteredReduce 938 42
+             940:    796(ptr) AccessChain 34(data) 935 788
+             941: 26(i64vec4) Load 940
+             942: 26(i64vec4) VectorShuffle 941 939 4 5 6 3
+                              Store 940 942
+             943:      6(int) Load 8(invocation)
+             944:    796(ptr) AccessChain 34(data) 68 788
+             945: 26(i64vec4) Load 944
+             946: 26(i64vec4) GroupNonUniformBitwiseOr 43 ClusteredReduce 945 42
+             947:    796(ptr) AccessChain 34(data) 943 788
+                              Store 947 946
+             948:      6(int) Load 8(invocation)
+             949:    789(ptr) AccessChain 34(data) 37 788 38
+             950: 25(int64_t) Load 949
+             951: 25(int64_t) GroupNonUniformBitwiseXor 43 ClusteredReduce 950 42
+             952:    789(ptr) AccessChain 34(data) 948 788 38
+                              Store 952 951
+             953:      6(int) Load 8(invocation)
+             954:    796(ptr) AccessChain 34(data) 47 788
+             955: 26(i64vec4) Load 954
+             956:795(i64vec2) VectorShuffle 955 955 0 1
+             957:795(i64vec2) GroupNonUniformBitwiseXor 43 ClusteredReduce 956 42
+             958:    796(ptr) AccessChain 34(data) 953 788
+             959: 26(i64vec4) Load 958
+             960: 26(i64vec4) VectorShuffle 959 957 4 5 2 3
+                              Store 958 960
+             961:      6(int) Load 8(invocation)
+             962:    796(ptr) AccessChain 34(data) 58 788
+             963: 26(i64vec4) Load 962
+             964:805(i64vec3) VectorShuffle 963 963 0 1 2
+             965:805(i64vec3) GroupNonUniformBitwiseXor 43 ClusteredReduce 964 42
+             966:    796(ptr) AccessChain 34(data) 961 788
+             967: 26(i64vec4) Load 966
+             968: 26(i64vec4) VectorShuffle 967 965 4 5 6 3
+                              Store 966 968
+             969:      6(int) Load 8(invocation)
+             970:    796(ptr) AccessChain 34(data) 68 788
+             971: 26(i64vec4) Load 970
+             972: 26(i64vec4) GroupNonUniformBitwiseXor 43 ClusteredReduce 971 42
+             973:    796(ptr) AccessChain 34(data) 969 788
+                              Store 973 972
+             974:      6(int) Load 8(invocation)
+             977:    976(ptr) AccessChain 34(data) 37 975 38
+             978: 27(int64_t) Load 977
+             979: 27(int64_t) GroupNonUniformIAdd 43 ClusteredReduce 978 42
+             980:    976(ptr) AccessChain 34(data) 974 975 38
+                              Store 980 979
+             981:      6(int) Load 8(invocation)
+             984:    983(ptr) AccessChain 34(data) 47 975
+             985: 28(i64vec4) Load 984
+             986:982(i64vec2) VectorShuffle 985 985 0 1
+             987:982(i64vec2) GroupNonUniformIAdd 43 ClusteredReduce 986 42
+             988:    983(ptr) AccessChain 34(data) 981 975
+             989: 28(i64vec4) Load 988
+             990: 28(i64vec4) VectorShuffle 989 987 4 5 2 3
+                              Store 988 990
+             991:      6(int) Load 8(invocation)
+             993:    983(ptr) AccessChain 34(data) 58 975
+             994: 28(i64vec4) Load 993
+             995:992(i64vec3) VectorShuffle 994 994 0 1 2
+             996:992(i64vec3) GroupNonUniformIAdd 43 ClusteredReduce 995 42
+             997:    983(ptr) AccessChain 34(data) 991 975
+             998: 28(i64vec4) Load 997
+             999: 28(i64vec4) VectorShuffle 998 996 4 5 6 3
+                              Store 997 999
+            1000:      6(int) Load 8(invocation)
+            1001:    983(ptr) AccessChain 34(data) 68 975
+            1002: 28(i64vec4) Load 1001
+            1003: 28(i64vec4) GroupNonUniformIAdd 43 ClusteredReduce 1002 42
+            1004:    983(ptr) AccessChain 34(data) 1000 975
+                              Store 1004 1003
+            1005:      6(int) Load 8(invocation)
+            1006:    976(ptr) AccessChain 34(data) 37 975 38
+            1007: 27(int64_t) Load 1006
+            1008: 27(int64_t) GroupNonUniformIMul 43 ClusteredReduce 1007 42
+            1009:    976(ptr) AccessChain 34(data) 1005 975 38
+                              Store 1009 1008
+            1010:      6(int) Load 8(invocation)
+            1011:    983(ptr) AccessChain 34(data) 47 975
+            1012: 28(i64vec4) Load 1011
+            1013:982(i64vec2) VectorShuffle 1012 1012 0 1
+            1014:982(i64vec2) GroupNonUniformIMul 43 ClusteredReduce 1013 42
+            1015:    983(ptr) AccessChain 34(data) 1010 975
+            1016: 28(i64vec4) Load 1015
+            1017: 28(i64vec4) VectorShuffle 1016 1014 4 5 2 3
+                              Store 1015 1017
+            1018:      6(int) Load 8(invocation)
+            1019:    983(ptr) AccessChain 34(data) 58 975
+            1020: 28(i64vec4) Load 1019
+            1021:992(i64vec3) VectorShuffle 1020 1020 0 1 2
+            1022:992(i64vec3) GroupNonUniformIMul 43 ClusteredReduce 1021 42
+            1023:    983(ptr) AccessChain 34(data) 1018 975
+            1024: 28(i64vec4) Load 1023
+            1025: 28(i64vec4) VectorShuffle 1024 1022 4 5 6 3
+                              Store 1023 1025
+            1026:      6(int) Load 8(invocation)
+            1027:    983(ptr) AccessChain 34(data) 68 975
+            1028: 28(i64vec4) Load 1027
+            1029: 28(i64vec4) GroupNonUniformIMul 43 ClusteredReduce 1028 42
+            1030:    983(ptr) AccessChain 34(data) 1026 975
+                              Store 1030 1029
+            1031:      6(int) Load 8(invocation)
+            1032:    976(ptr) AccessChain 34(data) 37 975 38
+            1033: 27(int64_t) Load 1032
+            1034: 27(int64_t) GroupNonUniformUMin 43 ClusteredReduce 1033 42
+            1035:    976(ptr) AccessChain 34(data) 1031 975 38
+                              Store 1035 1034
+            1036:      6(int) Load 8(invocation)
+            1037:    983(ptr) AccessChain 34(data) 47 975
+            1038: 28(i64vec4) Load 1037
+            1039:982(i64vec2) VectorShuffle 1038 1038 0 1
+            1040:982(i64vec2) GroupNonUniformUMin 43 ClusteredReduce 1039 42
+            1041:    983(ptr) AccessChain 34(data) 1036 975
+            1042: 28(i64vec4) Load 1041
+            1043: 28(i64vec4) VectorShuffle 1042 1040 4 5 2 3
+                              Store 1041 1043
+            1044:      6(int) Load 8(invocation)
+            1045:    983(ptr) AccessChain 34(data) 58 975
+            1046: 28(i64vec4) Load 1045
+            1047:992(i64vec3) VectorShuffle 1046 1046 0 1 2
+            1048:992(i64vec3) GroupNonUniformUMin 43 ClusteredReduce 1047 42
+            1049:    983(ptr) AccessChain 34(data) 1044 975
+            1050: 28(i64vec4) Load 1049
+            1051: 28(i64vec4) VectorShuffle 1050 1048 4 5 6 3
+                              Store 1049 1051
+            1052:      6(int) Load 8(invocation)
+            1053:    983(ptr) AccessChain 34(data) 68 975
+            1054: 28(i64vec4) Load 1053
+            1055: 28(i64vec4) GroupNonUniformUMin 43 ClusteredReduce 1054 42
+            1056:    983(ptr) AccessChain 34(data) 1052 975
+                              Store 1056 1055
+            1057:      6(int) Load 8(invocation)
+            1058:    976(ptr) AccessChain 34(data) 37 975 38
+            1059: 27(int64_t) Load 1058
+            1060: 27(int64_t) GroupNonUniformUMax 43 ClusteredReduce 1059 42
+            1061:    976(ptr) AccessChain 34(data) 1057 975 38
+                              Store 1061 1060
+            1062:      6(int) Load 8(invocation)
+            1063:    983(ptr) AccessChain 34(data) 47 975
+            1064: 28(i64vec4) Load 1063
+            1065:982(i64vec2) VectorShuffle 1064 1064 0 1
+            1066:982(i64vec2) GroupNonUniformUMax 43 ClusteredReduce 1065 42
+            1067:    983(ptr) AccessChain 34(data) 1062 975
+            1068: 28(i64vec4) Load 1067
+            1069: 28(i64vec4) VectorShuffle 1068 1066 4 5 2 3
+                              Store 1067 1069
+            1070:      6(int) Load 8(invocation)
+            1071:    983(ptr) AccessChain 34(data) 58 975
+            1072: 28(i64vec4) Load 1071
+            1073:992(i64vec3) VectorShuffle 1072 1072 0 1 2
+            1074:992(i64vec3) GroupNonUniformUMax 43 ClusteredReduce 1073 42
+            1075:    983(ptr) AccessChain 34(data) 1070 975
+            1076: 28(i64vec4) Load 1075
+            1077: 28(i64vec4) VectorShuffle 1076 1074 4 5 6 3
+                              Store 1075 1077
+            1078:      6(int) Load 8(invocation)
+            1079:    983(ptr) AccessChain 34(data) 68 975
+            1080: 28(i64vec4) Load 1079
+            1081: 28(i64vec4) GroupNonUniformUMax 43 ClusteredReduce 1080 42
+            1082:    983(ptr) AccessChain 34(data) 1078 975
+                              Store 1082 1081
+            1083:      6(int) Load 8(invocation)
+            1084:    976(ptr) AccessChain 34(data) 37 975 38
+            1085: 27(int64_t) Load 1084
+            1086: 27(int64_t) GroupNonUniformBitwiseAnd 43 ClusteredReduce 1085 42
+            1087:    976(ptr) AccessChain 34(data) 1083 975 38
+                              Store 1087 1086
+            1088:      6(int) Load 8(invocation)
+            1089:    983(ptr) AccessChain 34(data) 47 975
+            1090: 28(i64vec4) Load 1089
+            1091:982(i64vec2) VectorShuffle 1090 1090 0 1
+            1092:982(i64vec2) GroupNonUniformBitwiseAnd 43 ClusteredReduce 1091 42
+            1093:    983(ptr) AccessChain 34(data) 1088 975
+            1094: 28(i64vec4) Load 1093
+            1095: 28(i64vec4) VectorShuffle 1094 1092 4 5 2 3
+                              Store 1093 1095
+            1096:      6(int) Load 8(invocation)
+            1097:    983(ptr) AccessChain 34(data) 58 975
+            1098: 28(i64vec4) Load 1097
+            1099:992(i64vec3) VectorShuffle 1098 1098 0 1 2
+            1100:992(i64vec3) GroupNonUniformBitwiseAnd 43 ClusteredReduce 1099 42
+            1101:    983(ptr) AccessChain 34(data) 1096 975
+            1102: 28(i64vec4) Load 1101
+            1103: 28(i64vec4) VectorShuffle 1102 1100 4 5 6 3
+                              Store 1101 1103
+            1104:      6(int) Load 8(invocation)
+            1105:    983(ptr) AccessChain 34(data) 68 975
+            1106: 28(i64vec4) Load 1105
+            1107: 28(i64vec4) GroupNonUniformBitwiseAnd 43 ClusteredReduce 1106 42
+            1108:    983(ptr) AccessChain 34(data) 1104 975
+                              Store 1108 1107
+            1109:      6(int) Load 8(invocation)
+            1110:    976(ptr) AccessChain 34(data) 37 975 38
+            1111: 27(int64_t) Load 1110
+            1112: 27(int64_t) GroupNonUniformBitwiseOr 43 ClusteredReduce 1111 42
+            1113:    976(ptr) AccessChain 34(data) 1109 975 38
+                              Store 1113 1112
+            1114:      6(int) Load 8(invocation)
+            1115:    983(ptr) AccessChain 34(data) 47 975
+            1116: 28(i64vec4) Load 1115
+            1117:982(i64vec2) VectorShuffle 1116 1116 0 1
+            1118:982(i64vec2) GroupNonUniformBitwiseOr 43 ClusteredReduce 1117 42
+            1119:    983(ptr) AccessChain 34(data) 1114 975
+            1120: 28(i64vec4) Load 1119
+            1121: 28(i64vec4) VectorShuffle 1120 1118 4 5 2 3
+                              Store 1119 1121
+            1122:      6(int) Load 8(invocation)
+            1123:    983(ptr) AccessChain 34(data) 58 975
+            1124: 28(i64vec4) Load 1123
+            1125:992(i64vec3) VectorShuffle 1124 1124 0 1 2
+            1126:992(i64vec3) GroupNonUniformBitwiseOr 43 ClusteredReduce 1125 42
+            1127:    983(ptr) AccessChain 34(data) 1122 975
+            1128: 28(i64vec4) Load 1127
+            1129: 28(i64vec4) VectorShuffle 1128 1126 4 5 6 3
+                              Store 1127 1129
+            1130:      6(int) Load 8(invocation)
+            1131:    983(ptr) AccessChain 34(data) 68 975
+            1132: 28(i64vec4) Load 1131
+            1133: 28(i64vec4) GroupNonUniformBitwiseOr 43 ClusteredReduce 1132 42
+            1134:    983(ptr) AccessChain 34(data) 1130 975
+                              Store 1134 1133
+            1135:      6(int) Load 8(invocation)
+            1136:    976(ptr) AccessChain 34(data) 37 975 38
+            1137: 27(int64_t) Load 1136
+            1138: 27(int64_t) GroupNonUniformBitwiseXor 43 ClusteredReduce 1137 42
+            1139:    976(ptr) AccessChain 34(data) 1135 975 38
+                              Store 1139 1138
+            1140:      6(int) Load 8(invocation)
+            1141:    983(ptr) AccessChain 34(data) 47 975
+            1142: 28(i64vec4) Load 1141
+            1143:982(i64vec2) VectorShuffle 1142 1142 0 1
+            1144:982(i64vec2) GroupNonUniformBitwiseXor 43 ClusteredReduce 1143 42
+            1145:    983(ptr) AccessChain 34(data) 1140 975
+            1146: 28(i64vec4) Load 1145
+            1147: 28(i64vec4) VectorShuffle 1146 1144 4 5 2 3
+                              Store 1145 1147
+            1148:      6(int) Load 8(invocation)
+            1149:    983(ptr) AccessChain 34(data) 58 975
+            1150: 28(i64vec4) Load 1149
+            1151:992(i64vec3) VectorShuffle 1150 1150 0 1 2
+            1152:992(i64vec3) GroupNonUniformBitwiseXor 43 ClusteredReduce 1151 42
+            1153:    983(ptr) AccessChain 34(data) 1148 975
+            1154: 28(i64vec4) Load 1153
+            1155: 28(i64vec4) VectorShuffle 1154 1152 4 5 6 3
+                              Store 1153 1155
+            1156:      6(int) Load 8(invocation)
+            1157:    983(ptr) AccessChain 34(data) 68 975
+            1158: 28(i64vec4) Load 1157
+            1159: 28(i64vec4) GroupNonUniformBitwiseXor 43 ClusteredReduce 1158 42
+            1160:    983(ptr) AccessChain 34(data) 1156 975
+                              Store 1160 1159
+            1161:      6(int) Load 8(invocation)
+            1164:   1163(ptr) AccessChain 34(data) 37 1162 38
+            1165:29(float16_t) Load 1164
+            1166:29(float16_t) GroupNonUniformFAdd 43 ClusteredReduce 1165 42
+            1167:   1163(ptr) AccessChain 34(data) 1161 1162 38
+                              Store 1167 1166
+            1168:      6(int) Load 8(invocation)
+            1171:   1170(ptr) AccessChain 34(data) 47 1162
+            1172: 30(f16vec4) Load 1171
+            1173:1169(f16vec2) VectorShuffle 1172 1172 0 1
+            1174:1169(f16vec2) GroupNonUniformFAdd 43 ClusteredReduce 1173 42
+            1175:   1170(ptr) AccessChain 34(data) 1168 1162
+            1176: 30(f16vec4) Load 1175
+            1177: 30(f16vec4) VectorShuffle 1176 1174 4 5 2 3
+                              Store 1175 1177
+            1178:      6(int) Load 8(invocation)
+            1180:   1170(ptr) AccessChain 34(data) 58 1162
+            1181: 30(f16vec4) Load 1180
+            1182:1179(f16vec3) VectorShuffle 1181 1181 0 1 2
+            1183:1179(f16vec3) GroupNonUniformFAdd 43 ClusteredReduce 1182 42
+            1184:   1170(ptr) AccessChain 34(data) 1178 1162
+            1185: 30(f16vec4) Load 1184
+            1186: 30(f16vec4) VectorShuffle 1185 1183 4 5 6 3
+                              Store 1184 1186
+            1187:      6(int) Load 8(invocation)
+            1188:   1170(ptr) AccessChain 34(data) 68 1162
+            1189: 30(f16vec4) Load 1188
+            1190: 30(f16vec4) GroupNonUniformFAdd 43 ClusteredReduce 1189 42
+            1191:   1170(ptr) AccessChain 34(data) 1187 1162
+                              Store 1191 1190
+            1192:      6(int) Load 8(invocation)
+            1193:   1163(ptr) AccessChain 34(data) 37 1162 38
+            1194:29(float16_t) Load 1193
+            1195:29(float16_t) GroupNonUniformFMul 43 ClusteredReduce 1194 42
+            1196:   1163(ptr) AccessChain 34(data) 1192 1162 38
+                              Store 1196 1195
+            1197:      6(int) Load 8(invocation)
+            1198:   1170(ptr) AccessChain 34(data) 47 1162
+            1199: 30(f16vec4) Load 1198
+            1200:1169(f16vec2) VectorShuffle 1199 1199 0 1
+            1201:1169(f16vec2) GroupNonUniformFMul 43 ClusteredReduce 1200 42
+            1202:   1170(ptr) AccessChain 34(data) 1197 1162
+            1203: 30(f16vec4) Load 1202
+            1204: 30(f16vec4) VectorShuffle 1203 1201 4 5 2 3
+                              Store 1202 1204
+            1205:      6(int) Load 8(invocation)
+            1206:   1170(ptr) AccessChain 34(data) 58 1162
+            1207: 30(f16vec4) Load 1206
+            1208:1179(f16vec3) VectorShuffle 1207 1207 0 1 2
+            1209:1179(f16vec3) GroupNonUniformFMul 43 ClusteredReduce 1208 42
+            1210:   1170(ptr) AccessChain 34(data) 1205 1162
+            1211: 30(f16vec4) Load 1210
+            1212: 30(f16vec4) VectorShuffle 1211 1209 4 5 6 3
+                              Store 1210 1212
+            1213:      6(int) Load 8(invocation)
+            1214:   1170(ptr) AccessChain 34(data) 68 1162
+            1215: 30(f16vec4) Load 1214
+            1216: 30(f16vec4) GroupNonUniformFMul 43 ClusteredReduce 1215 42
+            1217:   1170(ptr) AccessChain 34(data) 1213 1162
+                              Store 1217 1216
+            1218:      6(int) Load 8(invocation)
+            1219:   1163(ptr) AccessChain 34(data) 37 1162 38
+            1220:29(float16_t) Load 1219
+            1221:29(float16_t) GroupNonUniformFMin 43 ClusteredReduce 1220 42
+            1222:   1163(ptr) AccessChain 34(data) 1218 1162 38
+                              Store 1222 1221
+            1223:      6(int) Load 8(invocation)
+            1224:   1170(ptr) AccessChain 34(data) 47 1162
+            1225: 30(f16vec4) Load 1224
+            1226:1169(f16vec2) VectorShuffle 1225 1225 0 1
+            1227:1169(f16vec2) GroupNonUniformFMin 43 ClusteredReduce 1226 42
+            1228:   1170(ptr) AccessChain 34(data) 1223 1162
+            1229: 30(f16vec4) Load 1228
+            1230: 30(f16vec4) VectorShuffle 1229 1227 4 5 2 3
+                              Store 1228 1230
+            1231:      6(int) Load 8(invocation)
+            1232:   1170(ptr) AccessChain 34(data) 58 1162
+            1233: 30(f16vec4) Load 1232
+            1234:1179(f16vec3) VectorShuffle 1233 1233 0 1 2
+            1235:1179(f16vec3) GroupNonUniformFMin 43 ClusteredReduce 1234 42
+            1236:   1170(ptr) AccessChain 34(data) 1231 1162
+            1237: 30(f16vec4) Load 1236
+            1238: 30(f16vec4) VectorShuffle 1237 1235 4 5 6 3
+                              Store 1236 1238
+            1239:      6(int) Load 8(invocation)
+            1240:   1170(ptr) AccessChain 34(data) 68 1162
+            1241: 30(f16vec4) Load 1240
+            1242: 30(f16vec4) GroupNonUniformFMin 43 ClusteredReduce 1241 42
+            1243:   1170(ptr) AccessChain 34(data) 1239 1162
+                              Store 1243 1242
+            1244:      6(int) Load 8(invocation)
+            1245:   1163(ptr) AccessChain 34(data) 37 1162 38
+            1246:29(float16_t) Load 1245
+            1247:29(float16_t) GroupNonUniformFMax 43 ClusteredReduce 1246 42
+            1248:   1163(ptr) AccessChain 34(data) 1244 1162 38
+                              Store 1248 1247
+            1249:      6(int) Load 8(invocation)
+            1250:   1170(ptr) AccessChain 34(data) 47 1162
+            1251: 30(f16vec4) Load 1250
+            1252:1169(f16vec2) VectorShuffle 1251 1251 0 1
+            1253:1169(f16vec2) GroupNonUniformFMax 43 ClusteredReduce 1252 42
+            1254:   1170(ptr) AccessChain 34(data) 1249 1162
+            1255: 30(f16vec4) Load 1254
+            1256: 30(f16vec4) VectorShuffle 1255 1253 4 5 2 3
+                              Store 1254 1256
+            1257:      6(int) Load 8(invocation)
+            1258:   1170(ptr) AccessChain 34(data) 58 1162
+            1259: 30(f16vec4) Load 1258
+            1260:1179(f16vec3) VectorShuffle 1259 1259 0 1 2
+            1261:1179(f16vec3) GroupNonUniformFMax 43 ClusteredReduce 1260 42
+            1262:   1170(ptr) AccessChain 34(data) 1257 1162
+            1263: 30(f16vec4) Load 1262
+            1264: 30(f16vec4) VectorShuffle 1263 1261 4 5 6 3
+                              Store 1262 1264
+            1265:      6(int) Load 8(invocation)
+            1266:   1170(ptr) AccessChain 34(data) 68 1162
+            1267: 30(f16vec4) Load 1266
+            1268: 30(f16vec4) GroupNonUniformFMax 43 ClusteredReduce 1267 42
+            1269:   1170(ptr) AccessChain 34(data) 1265 1162
+                              Store 1269 1268
+                              Return
+                              FunctionEnd
diff --git a/Test/baseResults/spv.subgroupExtendedTypesClusteredNeg.comp.out b/Test/baseResults/spv.subgroupExtendedTypesClusteredNeg.comp.out
new file mode 100644
index 0000000..fd01ef3
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesClusteredNeg.comp.out
@@ -0,0 +1,189 @@
+spv.subgroupExtendedTypesClusteredNeg.comp
+ERROR: 0:26: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:27: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:28: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:29: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:31: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:32: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:33: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:34: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:36: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:37: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:38: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:39: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:41: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:42: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:43: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:44: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:46: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:47: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:48: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:49: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:51: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:52: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:53: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:54: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:56: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:57: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:58: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:59: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:61: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:62: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:63: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:64: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:66: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:67: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:68: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:69: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:71: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:72: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:73: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:74: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:76: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:77: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:78: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:79: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:81: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:82: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:83: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:84: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:86: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:87: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:88: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:89: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:91: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:92: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:93: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:94: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:96: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:97: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:98: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:99: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:101: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:102: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:103: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:104: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:106: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:107: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:108: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:109: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:111: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:112: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:113: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:114: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:116: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:117: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:118: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:119: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:121: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:122: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:123: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:124: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:126: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:127: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:128: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:129: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:131: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:132: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:133: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:134: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:136: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:137: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:138: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:139: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:141: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:142: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:143: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:144: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:146: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:147: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:148: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:149: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:151: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:152: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:153: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:154: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:156: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:157: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:158: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:159: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:161: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:162: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:163: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:164: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:166: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:167: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:168: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:169: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:171: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:172: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:173: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:174: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:176: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:177: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:178: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:179: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:181: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:182: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:183: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:184: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:186: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:187: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:188: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:189: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:191: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:192: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:193: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:194: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:196: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:197: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:198: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:199: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:201: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:202: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:203: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:204: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:206: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:207: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:208: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:209: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:211: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:212: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:213: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:214: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:216: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:217: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:218: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:219: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:221: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:222: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:223: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:224: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:226: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:227: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:228: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:229: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:231: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:232: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:233: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:234: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:236: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:237: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:238: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:239: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:241: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:242: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:243: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:244: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:246: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:247: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:248: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:249: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:251: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:252: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:253: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:254: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 184 compilation errors.  No code generated.
+
+
+SPIR-V is not generated for failed compile or link
diff --git a/Test/baseResults/spv.subgroupExtendedTypesPartitioned.comp.out b/Test/baseResults/spv.subgroupExtendedTypesPartitioned.comp.out
new file mode 100644
index 0000000..2f5a570
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesPartitioned.comp.out
@@ -0,0 +1,1835 @@
+spv.subgroupExtendedTypesPartitioned.comp
+// Module Version 10300
+// Generated by (magic number): 80007
+// Id's are bound by 1558
+
+                              Capability Shader
+                              Capability Float16
+                              Capability Int64
+                              Capability Int16
+                              Capability Int8
+                              Capability GroupNonUniform
+                              Capability StorageUniformBufferBlock16
+                              Capability StorageBuffer8BitAccess
+                              Capability GroupNonUniformPartitionedNV
+                              Extension  "SPV_KHR_8bit_storage"
+                              Extension  "SPV_NV_shader_subgroup_partitioned"
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint GLCompute 4  "main" 10 12
+                              ExecutionMode 4 LocalSize 8 1 1
+                              Source GLSL 450
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_float16"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int16"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int64"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int8"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_float16"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int16"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int64"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int8"
+                              SourceExtension  "GL_KHR_shader_subgroup_basic"
+                              SourceExtension  "GL_NV_shader_subgroup_partitioned"
+                              Name 4  "main"
+                              Name 8  "invocation"
+                              Name 10  "gl_SubgroupInvocationID"
+                              Name 12  "gl_SubgroupSize"
+                              Name 19  "ballot"
+                              Name 34  "Buffers"
+                              MemberName 34(Buffers) 0  "i8"
+                              MemberName 34(Buffers) 1  "u8"
+                              MemberName 34(Buffers) 2  "i16"
+                              MemberName 34(Buffers) 3  "u16"
+                              MemberName 34(Buffers) 4  "i64"
+                              MemberName 34(Buffers) 5  "u64"
+                              MemberName 34(Buffers) 6  "f16"
+                              Name 37  "data"
+                              Decorate 10(gl_SubgroupInvocationID) RelaxedPrecision
+                              Decorate 10(gl_SubgroupInvocationID) BuiltIn SubgroupLocalInvocationId
+                              Decorate 11 RelaxedPrecision
+                              Decorate 12(gl_SubgroupSize) RelaxedPrecision
+                              Decorate 12(gl_SubgroupSize) BuiltIn SubgroupSize
+                              Decorate 13 RelaxedPrecision
+                              Decorate 14 RelaxedPrecision
+                              Decorate 16 RelaxedPrecision
+                              MemberDecorate 34(Buffers) 0 Offset 0
+                              MemberDecorate 34(Buffers) 1 Offset 4
+                              MemberDecorate 34(Buffers) 2 Offset 8
+                              MemberDecorate 34(Buffers) 3 Offset 16
+                              MemberDecorate 34(Buffers) 4 Offset 32
+                              MemberDecorate 34(Buffers) 5 Offset 64
+                              MemberDecorate 34(Buffers) 6 Offset 96
+                              Decorate 34(Buffers) Block
+                              Decorate 37(data) DescriptorSet 0
+                              Decorate 37(data) Binding 0
+                              Decorate 1557 BuiltIn WorkgroupSize
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeInt 32 0
+               7:             TypePointer Function 6(int)
+               9:             TypePointer Input 6(int)
+10(gl_SubgroupInvocationID):      9(ptr) Variable Input
+12(gl_SubgroupSize):      9(ptr) Variable Input
+              15:      6(int) Constant 4
+              17:             TypeVector 6(int) 4
+              18:             TypePointer Function 17(ivec4)
+              20:             TypeInt 8 1
+              21:             TypeVector 20(int8_t) 4
+              22:             TypeInt 8 0
+              23:             TypeVector 22(int8_t) 4
+              24:             TypeInt 16 1
+              25:             TypeVector 24(int16_t) 4
+              26:             TypeInt 16 0
+              27:             TypeVector 26(int16_t) 4
+              28:             TypeInt 64 1
+              29:             TypeVector 28(int64_t) 4
+              30:             TypeInt 64 0
+              31:             TypeVector 30(int64_t) 4
+              32:             TypeFloat 16
+              33:             TypeVector 32(float16_t) 4
+     34(Buffers):             TypeStruct 21(i8vec4) 23(i8vec4) 25(i16vec4) 27(i16vec4) 29(i64vec4) 31(i64vec4) 33(f16vec4)
+              35:             TypeArray 34(Buffers) 15
+              36:             TypePointer StorageBuffer 35
+        37(data):     36(ptr) Variable StorageBuffer
+              38:             TypeInt 32 1
+              39:     38(int) Constant 0
+              40:      6(int) Constant 0
+              41:             TypePointer StorageBuffer 20(int8_t)
+              45:     38(int) Constant 1
+              46:             TypeVector 20(int8_t) 2
+              47:             TypePointer StorageBuffer 21(i8vec4)
+              52:     38(int) Constant 2
+              53:             TypeVector 20(int8_t) 3
+              58:     38(int) Constant 3
+              62:             TypePointer StorageBuffer 22(int8_t)
+              66:             TypeVector 22(int8_t) 2
+              67:             TypePointer StorageBuffer 23(i8vec4)
+              72:             TypeVector 22(int8_t) 3
+              80:             TypePointer StorageBuffer 24(int16_t)
+              84:             TypeVector 24(int16_t) 2
+              85:             TypePointer StorageBuffer 25(i16vec4)
+              90:             TypeVector 24(int16_t) 3
+              98:             TypePointer StorageBuffer 26(int16_t)
+             102:             TypeVector 26(int16_t) 2
+             103:             TypePointer StorageBuffer 27(i16vec4)
+             108:             TypeVector 26(int16_t) 3
+             116:     38(int) Constant 4
+             117:             TypePointer StorageBuffer 28(int64_t)
+             121:             TypeVector 28(int64_t) 2
+             122:             TypePointer StorageBuffer 29(i64vec4)
+             127:             TypeVector 28(int64_t) 3
+             135:     38(int) Constant 5
+             136:             TypePointer StorageBuffer 30(int64_t)
+             140:             TypeVector 30(int64_t) 2
+             141:             TypePointer StorageBuffer 31(i64vec4)
+             146:             TypeVector 30(int64_t) 3
+             154:     38(int) Constant 6
+             155:             TypePointer StorageBuffer 32(float16_t)
+             159:             TypeVector 32(float16_t) 2
+             160:             TypePointer StorageBuffer 33(f16vec4)
+             165:             TypeVector 32(float16_t) 3
+             177:      6(int) Constant 3
+            1554:             TypeVector 6(int) 3
+            1555:      6(int) Constant 8
+            1556:      6(int) Constant 1
+            1557: 1554(ivec3) ConstantComposite 1555 1556 1556
+         4(main):           2 Function None 3
+               5:             Label
+   8(invocation):      7(ptr) Variable Function
+      19(ballot):     18(ptr) Variable Function
+              11:      6(int) Load 10(gl_SubgroupInvocationID)
+              13:      6(int) Load 12(gl_SubgroupSize)
+              14:      6(int) IAdd 11 13
+              16:      6(int) UMod 14 15
+                              Store 8(invocation) 16
+              42:     41(ptr) AccessChain 37(data) 39 39 40
+              43:  20(int8_t) Load 42
+              44:   17(ivec4) GroupNonUniformPartitionNV 43
+                              Store 19(ballot) 44
+              48:     47(ptr) AccessChain 37(data) 45 39
+              49:  21(i8vec4) Load 48
+              50:  46(i8vec2) VectorShuffle 49 49 0 1
+              51:   17(ivec4) GroupNonUniformPartitionNV 50
+                              Store 19(ballot) 51
+              54:     47(ptr) AccessChain 37(data) 52 39
+              55:  21(i8vec4) Load 54
+              56:  53(i8vec3) VectorShuffle 55 55 0 1 2
+              57:   17(ivec4) GroupNonUniformPartitionNV 56
+                              Store 19(ballot) 57
+              59:     47(ptr) AccessChain 37(data) 58 39
+              60:  21(i8vec4) Load 59
+              61:   17(ivec4) GroupNonUniformPartitionNV 60
+                              Store 19(ballot) 61
+              63:     62(ptr) AccessChain 37(data) 39 45 40
+              64:  22(int8_t) Load 63
+              65:   17(ivec4) GroupNonUniformPartitionNV 64
+                              Store 19(ballot) 65
+              68:     67(ptr) AccessChain 37(data) 45 45
+              69:  23(i8vec4) Load 68
+              70:  66(i8vec2) VectorShuffle 69 69 0 1
+              71:   17(ivec4) GroupNonUniformPartitionNV 70
+                              Store 19(ballot) 71
+              73:     67(ptr) AccessChain 37(data) 52 45
+              74:  23(i8vec4) Load 73
+              75:  72(i8vec3) VectorShuffle 74 74 0 1 2
+              76:   17(ivec4) GroupNonUniformPartitionNV 75
+                              Store 19(ballot) 76
+              77:     67(ptr) AccessChain 37(data) 58 45
+              78:  23(i8vec4) Load 77
+              79:   17(ivec4) GroupNonUniformPartitionNV 78
+                              Store 19(ballot) 79
+              81:     80(ptr) AccessChain 37(data) 39 52 40
+              82: 24(int16_t) Load 81
+              83:   17(ivec4) GroupNonUniformPartitionNV 82
+                              Store 19(ballot) 83
+              86:     85(ptr) AccessChain 37(data) 45 52
+              87: 25(i16vec4) Load 86
+              88: 84(i16vec2) VectorShuffle 87 87 0 1
+              89:   17(ivec4) GroupNonUniformPartitionNV 88
+                              Store 19(ballot) 89
+              91:     85(ptr) AccessChain 37(data) 52 52
+              92: 25(i16vec4) Load 91
+              93: 90(i16vec3) VectorShuffle 92 92 0 1 2
+              94:   17(ivec4) GroupNonUniformPartitionNV 93
+                              Store 19(ballot) 94
+              95:     85(ptr) AccessChain 37(data) 58 52
+              96: 25(i16vec4) Load 95
+              97:   17(ivec4) GroupNonUniformPartitionNV 96
+                              Store 19(ballot) 97
+              99:     98(ptr) AccessChain 37(data) 39 58 40
+             100: 26(int16_t) Load 99
+             101:   17(ivec4) GroupNonUniformPartitionNV 100
+                              Store 19(ballot) 101
+             104:    103(ptr) AccessChain 37(data) 45 58
+             105: 27(i16vec4) Load 104
+             106:102(i16vec2) VectorShuffle 105 105 0 1
+             107:   17(ivec4) GroupNonUniformPartitionNV 106
+                              Store 19(ballot) 107
+             109:    103(ptr) AccessChain 37(data) 52 58
+             110: 27(i16vec4) Load 109
+             111:108(i16vec3) VectorShuffle 110 110 0 1 2
+             112:   17(ivec4) GroupNonUniformPartitionNV 111
+                              Store 19(ballot) 112
+             113:    103(ptr) AccessChain 37(data) 58 58
+             114: 27(i16vec4) Load 113
+             115:   17(ivec4) GroupNonUniformPartitionNV 114
+                              Store 19(ballot) 115
+             118:    117(ptr) AccessChain 37(data) 39 116 40
+             119: 28(int64_t) Load 118
+             120:   17(ivec4) GroupNonUniformPartitionNV 119
+                              Store 19(ballot) 120
+             123:    122(ptr) AccessChain 37(data) 45 116
+             124: 29(i64vec4) Load 123
+             125:121(i64vec2) VectorShuffle 124 124 0 1
+             126:   17(ivec4) GroupNonUniformPartitionNV 125
+                              Store 19(ballot) 126
+             128:    122(ptr) AccessChain 37(data) 52 116
+             129: 29(i64vec4) Load 128
+             130:127(i64vec3) VectorShuffle 129 129 0 1 2
+             131:   17(ivec4) GroupNonUniformPartitionNV 130
+                              Store 19(ballot) 131
+             132:    122(ptr) AccessChain 37(data) 58 116
+             133: 29(i64vec4) Load 132
+             134:   17(ivec4) GroupNonUniformPartitionNV 133
+                              Store 19(ballot) 134
+             137:    136(ptr) AccessChain 37(data) 39 135 40
+             138: 30(int64_t) Load 137
+             139:   17(ivec4) GroupNonUniformPartitionNV 138
+                              Store 19(ballot) 139
+             142:    141(ptr) AccessChain 37(data) 45 135
+             143: 31(i64vec4) Load 142
+             144:140(i64vec2) VectorShuffle 143 143 0 1
+             145:   17(ivec4) GroupNonUniformPartitionNV 144
+                              Store 19(ballot) 145
+             147:    141(ptr) AccessChain 37(data) 52 135
+             148: 31(i64vec4) Load 147
+             149:146(i64vec3) VectorShuffle 148 148 0 1 2
+             150:   17(ivec4) GroupNonUniformPartitionNV 149
+                              Store 19(ballot) 150
+             151:    141(ptr) AccessChain 37(data) 58 135
+             152: 31(i64vec4) Load 151
+             153:   17(ivec4) GroupNonUniformPartitionNV 152
+                              Store 19(ballot) 153
+             156:    155(ptr) AccessChain 37(data) 39 154 40
+             157:32(float16_t) Load 156
+             158:   17(ivec4) GroupNonUniformPartitionNV 157
+                              Store 19(ballot) 158
+             161:    160(ptr) AccessChain 37(data) 45 154
+             162: 33(f16vec4) Load 161
+             163:159(f16vec2) VectorShuffle 162 162 0 1
+             164:   17(ivec4) GroupNonUniformPartitionNV 163
+                              Store 19(ballot) 164
+             166:    160(ptr) AccessChain 37(data) 52 154
+             167: 33(f16vec4) Load 166
+             168:165(f16vec3) VectorShuffle 167 167 0 1 2
+             169:   17(ivec4) GroupNonUniformPartitionNV 168
+                              Store 19(ballot) 169
+             170:    160(ptr) AccessChain 37(data) 58 154
+             171: 33(f16vec4) Load 170
+             172:   17(ivec4) GroupNonUniformPartitionNV 171
+                              Store 19(ballot) 172
+             173:      6(int) Load 8(invocation)
+             174:     41(ptr) AccessChain 37(data) 39 39 40
+             175:  20(int8_t) Load 174
+             176:   17(ivec4) Load 19(ballot)
+             178:  20(int8_t) GroupNonUniformIAdd 177 PartitionedReduceNV 175 176
+             179:     41(ptr) AccessChain 37(data) 173 39 40
+                              Store 179 178
+             180:      6(int) Load 8(invocation)
+             181:     47(ptr) AccessChain 37(data) 45 39
+             182:  21(i8vec4) Load 181
+             183:  46(i8vec2) VectorShuffle 182 182 0 1
+             184:   17(ivec4) Load 19(ballot)
+             185:  46(i8vec2) GroupNonUniformIAdd 177 PartitionedReduceNV 183 184
+             186:     47(ptr) AccessChain 37(data) 180 39
+             187:  21(i8vec4) Load 186
+             188:  21(i8vec4) VectorShuffle 187 185 4 5 2 3
+                              Store 186 188
+             189:      6(int) Load 8(invocation)
+             190:     47(ptr) AccessChain 37(data) 52 39
+             191:  21(i8vec4) Load 190
+             192:  53(i8vec3) VectorShuffle 191 191 0 1 2
+             193:   17(ivec4) Load 19(ballot)
+             194:  53(i8vec3) GroupNonUniformIAdd 177 PartitionedReduceNV 192 193
+             195:     47(ptr) AccessChain 37(data) 189 39
+             196:  21(i8vec4) Load 195
+             197:  21(i8vec4) VectorShuffle 196 194 4 5 6 3
+                              Store 195 197
+             198:      6(int) Load 8(invocation)
+             199:     47(ptr) AccessChain 37(data) 58 39
+             200:  21(i8vec4) Load 199
+             201:   17(ivec4) Load 19(ballot)
+             202:  21(i8vec4) GroupNonUniformIAdd 177 PartitionedReduceNV 200 201
+             203:     47(ptr) AccessChain 37(data) 198 39
+                              Store 203 202
+             204:      6(int) Load 8(invocation)
+             205:     41(ptr) AccessChain 37(data) 39 39 40
+             206:  20(int8_t) Load 205
+             207:   17(ivec4) Load 19(ballot)
+             208:  20(int8_t) GroupNonUniformIMul 177 PartitionedReduceNV 206 207
+             209:     41(ptr) AccessChain 37(data) 204 39 40
+                              Store 209 208
+             210:      6(int) Load 8(invocation)
+             211:     47(ptr) AccessChain 37(data) 45 39
+             212:  21(i8vec4) Load 211
+             213:  46(i8vec2) VectorShuffle 212 212 0 1
+             214:   17(ivec4) Load 19(ballot)
+             215:  46(i8vec2) GroupNonUniformIMul 177 PartitionedReduceNV 213 214
+             216:     47(ptr) AccessChain 37(data) 210 39
+             217:  21(i8vec4) Load 216
+             218:  21(i8vec4) VectorShuffle 217 215 4 5 2 3
+                              Store 216 218
+             219:      6(int) Load 8(invocation)
+             220:     47(ptr) AccessChain 37(data) 52 39
+             221:  21(i8vec4) Load 220
+             222:  53(i8vec3) VectorShuffle 221 221 0 1 2
+             223:   17(ivec4) Load 19(ballot)
+             224:  53(i8vec3) GroupNonUniformIMul 177 PartitionedReduceNV 222 223
+             225:     47(ptr) AccessChain 37(data) 219 39
+             226:  21(i8vec4) Load 225
+             227:  21(i8vec4) VectorShuffle 226 224 4 5 6 3
+                              Store 225 227
+             228:      6(int) Load 8(invocation)
+             229:     47(ptr) AccessChain 37(data) 58 39
+             230:  21(i8vec4) Load 229
+             231:   17(ivec4) Load 19(ballot)
+             232:  21(i8vec4) GroupNonUniformIMul 177 PartitionedReduceNV 230 231
+             233:     47(ptr) AccessChain 37(data) 228 39
+                              Store 233 232
+             234:      6(int) Load 8(invocation)
+             235:     41(ptr) AccessChain 37(data) 39 39 40
+             236:  20(int8_t) Load 235
+             237:   17(ivec4) Load 19(ballot)
+             238:  20(int8_t) GroupNonUniformSMin 177 PartitionedReduceNV 236 237
+             239:     41(ptr) AccessChain 37(data) 234 39 40
+                              Store 239 238
+             240:      6(int) Load 8(invocation)
+             241:     47(ptr) AccessChain 37(data) 45 39
+             242:  21(i8vec4) Load 241
+             243:  46(i8vec2) VectorShuffle 242 242 0 1
+             244:   17(ivec4) Load 19(ballot)
+             245:  46(i8vec2) GroupNonUniformSMin 177 PartitionedReduceNV 243 244
+             246:     47(ptr) AccessChain 37(data) 240 39
+             247:  21(i8vec4) Load 246
+             248:  21(i8vec4) VectorShuffle 247 245 4 5 2 3
+                              Store 246 248
+             249:      6(int) Load 8(invocation)
+             250:     47(ptr) AccessChain 37(data) 52 39
+             251:  21(i8vec4) Load 250
+             252:  53(i8vec3) VectorShuffle 251 251 0 1 2
+             253:   17(ivec4) Load 19(ballot)
+             254:  53(i8vec3) GroupNonUniformSMin 177 PartitionedReduceNV 252 253
+             255:     47(ptr) AccessChain 37(data) 249 39
+             256:  21(i8vec4) Load 255
+             257:  21(i8vec4) VectorShuffle 256 254 4 5 6 3
+                              Store 255 257
+             258:      6(int) Load 8(invocation)
+             259:     47(ptr) AccessChain 37(data) 58 39
+             260:  21(i8vec4) Load 259
+             261:   17(ivec4) Load 19(ballot)
+             262:  21(i8vec4) GroupNonUniformSMin 177 PartitionedReduceNV 260 261
+             263:     47(ptr) AccessChain 37(data) 258 39
+                              Store 263 262
+             264:      6(int) Load 8(invocation)
+             265:     41(ptr) AccessChain 37(data) 39 39 40
+             266:  20(int8_t) Load 265
+             267:   17(ivec4) Load 19(ballot)
+             268:  20(int8_t) GroupNonUniformSMax 177 PartitionedReduceNV 266 267
+             269:     41(ptr) AccessChain 37(data) 264 39 40
+                              Store 269 268
+             270:      6(int) Load 8(invocation)
+             271:     47(ptr) AccessChain 37(data) 45 39
+             272:  21(i8vec4) Load 271
+             273:  46(i8vec2) VectorShuffle 272 272 0 1
+             274:   17(ivec4) Load 19(ballot)
+             275:  46(i8vec2) GroupNonUniformSMax 177 PartitionedReduceNV 273 274
+             276:     47(ptr) AccessChain 37(data) 270 39
+             277:  21(i8vec4) Load 276
+             278:  21(i8vec4) VectorShuffle 277 275 4 5 2 3
+                              Store 276 278
+             279:      6(int) Load 8(invocation)
+             280:     47(ptr) AccessChain 37(data) 52 39
+             281:  21(i8vec4) Load 280
+             282:  53(i8vec3) VectorShuffle 281 281 0 1 2
+             283:   17(ivec4) Load 19(ballot)
+             284:  53(i8vec3) GroupNonUniformSMax 177 PartitionedReduceNV 282 283
+             285:     47(ptr) AccessChain 37(data) 279 39
+             286:  21(i8vec4) Load 285
+             287:  21(i8vec4) VectorShuffle 286 284 4 5 6 3
+                              Store 285 287
+             288:      6(int) Load 8(invocation)
+             289:     47(ptr) AccessChain 37(data) 58 39
+             290:  21(i8vec4) Load 289
+             291:   17(ivec4) Load 19(ballot)
+             292:  21(i8vec4) GroupNonUniformSMax 177 PartitionedReduceNV 290 291
+             293:     47(ptr) AccessChain 37(data) 288 39
+                              Store 293 292
+             294:      6(int) Load 8(invocation)
+             295:     41(ptr) AccessChain 37(data) 39 39 40
+             296:  20(int8_t) Load 295
+             297:   17(ivec4) Load 19(ballot)
+             298:  20(int8_t) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 296 297
+             299:     41(ptr) AccessChain 37(data) 294 39 40
+                              Store 299 298
+             300:      6(int) Load 8(invocation)
+             301:     47(ptr) AccessChain 37(data) 45 39
+             302:  21(i8vec4) Load 301
+             303:  46(i8vec2) VectorShuffle 302 302 0 1
+             304:   17(ivec4) Load 19(ballot)
+             305:  46(i8vec2) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 303 304
+             306:     47(ptr) AccessChain 37(data) 300 39
+             307:  21(i8vec4) Load 306
+             308:  21(i8vec4) VectorShuffle 307 305 4 5 2 3
+                              Store 306 308
+             309:      6(int) Load 8(invocation)
+             310:     47(ptr) AccessChain 37(data) 52 39
+             311:  21(i8vec4) Load 310
+             312:  53(i8vec3) VectorShuffle 311 311 0 1 2
+             313:   17(ivec4) Load 19(ballot)
+             314:  53(i8vec3) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 312 313
+             315:     47(ptr) AccessChain 37(data) 309 39
+             316:  21(i8vec4) Load 315
+             317:  21(i8vec4) VectorShuffle 316 314 4 5 6 3
+                              Store 315 317
+             318:      6(int) Load 8(invocation)
+             319:     47(ptr) AccessChain 37(data) 58 39
+             320:  21(i8vec4) Load 319
+             321:   17(ivec4) Load 19(ballot)
+             322:  21(i8vec4) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 320 321
+             323:     47(ptr) AccessChain 37(data) 318 39
+                              Store 323 322
+             324:      6(int) Load 8(invocation)
+             325:     41(ptr) AccessChain 37(data) 39 39 40
+             326:  20(int8_t) Load 325
+             327:   17(ivec4) Load 19(ballot)
+             328:  20(int8_t) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 326 327
+             329:     41(ptr) AccessChain 37(data) 324 39 40
+                              Store 329 328
+             330:      6(int) Load 8(invocation)
+             331:     47(ptr) AccessChain 37(data) 45 39
+             332:  21(i8vec4) Load 331
+             333:  46(i8vec2) VectorShuffle 332 332 0 1
+             334:   17(ivec4) Load 19(ballot)
+             335:  46(i8vec2) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 333 334
+             336:     47(ptr) AccessChain 37(data) 330 39
+             337:  21(i8vec4) Load 336
+             338:  21(i8vec4) VectorShuffle 337 335 4 5 2 3
+                              Store 336 338
+             339:      6(int) Load 8(invocation)
+             340:     47(ptr) AccessChain 37(data) 52 39
+             341:  21(i8vec4) Load 340
+             342:  53(i8vec3) VectorShuffle 341 341 0 1 2
+             343:   17(ivec4) Load 19(ballot)
+             344:  53(i8vec3) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 342 343
+             345:     47(ptr) AccessChain 37(data) 339 39
+             346:  21(i8vec4) Load 345
+             347:  21(i8vec4) VectorShuffle 346 344 4 5 6 3
+                              Store 345 347
+             348:      6(int) Load 8(invocation)
+             349:     47(ptr) AccessChain 37(data) 58 39
+             350:  21(i8vec4) Load 349
+             351:   17(ivec4) Load 19(ballot)
+             352:  21(i8vec4) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 350 351
+             353:     47(ptr) AccessChain 37(data) 348 39
+                              Store 353 352
+             354:      6(int) Load 8(invocation)
+             355:     41(ptr) AccessChain 37(data) 39 39 40
+             356:  20(int8_t) Load 355
+             357:   17(ivec4) Load 19(ballot)
+             358:  20(int8_t) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 356 357
+             359:     41(ptr) AccessChain 37(data) 354 39 40
+                              Store 359 358
+             360:      6(int) Load 8(invocation)
+             361:     47(ptr) AccessChain 37(data) 45 39
+             362:  21(i8vec4) Load 361
+             363:  46(i8vec2) VectorShuffle 362 362 0 1
+             364:   17(ivec4) Load 19(ballot)
+             365:  46(i8vec2) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 363 364
+             366:     47(ptr) AccessChain 37(data) 360 39
+             367:  21(i8vec4) Load 366
+             368:  21(i8vec4) VectorShuffle 367 365 4 5 2 3
+                              Store 366 368
+             369:      6(int) Load 8(invocation)
+             370:     47(ptr) AccessChain 37(data) 52 39
+             371:  21(i8vec4) Load 370
+             372:  53(i8vec3) VectorShuffle 371 371 0 1 2
+             373:   17(ivec4) Load 19(ballot)
+             374:  53(i8vec3) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 372 373
+             375:     47(ptr) AccessChain 37(data) 369 39
+             376:  21(i8vec4) Load 375
+             377:  21(i8vec4) VectorShuffle 376 374 4 5 6 3
+                              Store 375 377
+             378:      6(int) Load 8(invocation)
+             379:     47(ptr) AccessChain 37(data) 58 39
+             380:  21(i8vec4) Load 379
+             381:   17(ivec4) Load 19(ballot)
+             382:  21(i8vec4) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 380 381
+             383:     47(ptr) AccessChain 37(data) 378 39
+                              Store 383 382
+             384:      6(int) Load 8(invocation)
+             385:     62(ptr) AccessChain 37(data) 39 45 40
+             386:  22(int8_t) Load 385
+             387:   17(ivec4) Load 19(ballot)
+             388:  22(int8_t) GroupNonUniformIAdd 177 PartitionedReduceNV 386 387
+             389:     62(ptr) AccessChain 37(data) 384 45 40
+                              Store 389 388
+             390:      6(int) Load 8(invocation)
+             391:     67(ptr) AccessChain 37(data) 45 45
+             392:  23(i8vec4) Load 391
+             393:  66(i8vec2) VectorShuffle 392 392 0 1
+             394:   17(ivec4) Load 19(ballot)
+             395:  66(i8vec2) GroupNonUniformIAdd 177 PartitionedReduceNV 393 394
+             396:     67(ptr) AccessChain 37(data) 390 45
+             397:  23(i8vec4) Load 396
+             398:  23(i8vec4) VectorShuffle 397 395 4 5 2 3
+                              Store 396 398
+             399:      6(int) Load 8(invocation)
+             400:     67(ptr) AccessChain 37(data) 52 45
+             401:  23(i8vec4) Load 400
+             402:  72(i8vec3) VectorShuffle 401 401 0 1 2
+             403:   17(ivec4) Load 19(ballot)
+             404:  72(i8vec3) GroupNonUniformIAdd 177 PartitionedReduceNV 402 403
+             405:     67(ptr) AccessChain 37(data) 399 45
+             406:  23(i8vec4) Load 405
+             407:  23(i8vec4) VectorShuffle 406 404 4 5 6 3
+                              Store 405 407
+             408:      6(int) Load 8(invocation)
+             409:     67(ptr) AccessChain 37(data) 58 45
+             410:  23(i8vec4) Load 409
+             411:   17(ivec4) Load 19(ballot)
+             412:  23(i8vec4) GroupNonUniformIAdd 177 PartitionedReduceNV 410 411
+             413:     67(ptr) AccessChain 37(data) 408 45
+                              Store 413 412
+             414:      6(int) Load 8(invocation)
+             415:     62(ptr) AccessChain 37(data) 39 45 40
+             416:  22(int8_t) Load 415
+             417:   17(ivec4) Load 19(ballot)
+             418:  22(int8_t) GroupNonUniformIMul 177 PartitionedReduceNV 416 417
+             419:     62(ptr) AccessChain 37(data) 414 45 40
+                              Store 419 418
+             420:      6(int) Load 8(invocation)
+             421:     67(ptr) AccessChain 37(data) 45 45
+             422:  23(i8vec4) Load 421
+             423:  66(i8vec2) VectorShuffle 422 422 0 1
+             424:   17(ivec4) Load 19(ballot)
+             425:  66(i8vec2) GroupNonUniformIMul 177 PartitionedReduceNV 423 424
+             426:     67(ptr) AccessChain 37(data) 420 45
+             427:  23(i8vec4) Load 426
+             428:  23(i8vec4) VectorShuffle 427 425 4 5 2 3
+                              Store 426 428
+             429:      6(int) Load 8(invocation)
+             430:     67(ptr) AccessChain 37(data) 52 45
+             431:  23(i8vec4) Load 430
+             432:  72(i8vec3) VectorShuffle 431 431 0 1 2
+             433:   17(ivec4) Load 19(ballot)
+             434:  72(i8vec3) GroupNonUniformIMul 177 PartitionedReduceNV 432 433
+             435:     67(ptr) AccessChain 37(data) 429 45
+             436:  23(i8vec4) Load 435
+             437:  23(i8vec4) VectorShuffle 436 434 4 5 6 3
+                              Store 435 437
+             438:      6(int) Load 8(invocation)
+             439:     67(ptr) AccessChain 37(data) 58 45
+             440:  23(i8vec4) Load 439
+             441:   17(ivec4) Load 19(ballot)
+             442:  23(i8vec4) GroupNonUniformIMul 177 PartitionedReduceNV 440 441
+             443:     67(ptr) AccessChain 37(data) 438 45
+                              Store 443 442
+             444:      6(int) Load 8(invocation)
+             445:     62(ptr) AccessChain 37(data) 39 45 40
+             446:  22(int8_t) Load 445
+             447:   17(ivec4) Load 19(ballot)
+             448:  22(int8_t) GroupNonUniformUMin 177 PartitionedReduceNV 446 447
+             449:     62(ptr) AccessChain 37(data) 444 45 40
+                              Store 449 448
+             450:      6(int) Load 8(invocation)
+             451:     67(ptr) AccessChain 37(data) 45 45
+             452:  23(i8vec4) Load 451
+             453:  66(i8vec2) VectorShuffle 452 452 0 1
+             454:   17(ivec4) Load 19(ballot)
+             455:  66(i8vec2) GroupNonUniformUMin 177 PartitionedReduceNV 453 454
+             456:     67(ptr) AccessChain 37(data) 450 45
+             457:  23(i8vec4) Load 456
+             458:  23(i8vec4) VectorShuffle 457 455 4 5 2 3
+                              Store 456 458
+             459:      6(int) Load 8(invocation)
+             460:     67(ptr) AccessChain 37(data) 52 45
+             461:  23(i8vec4) Load 460
+             462:  72(i8vec3) VectorShuffle 461 461 0 1 2
+             463:   17(ivec4) Load 19(ballot)
+             464:  72(i8vec3) GroupNonUniformUMin 177 PartitionedReduceNV 462 463
+             465:     67(ptr) AccessChain 37(data) 459 45
+             466:  23(i8vec4) Load 465
+             467:  23(i8vec4) VectorShuffle 466 464 4 5 6 3
+                              Store 465 467
+             468:      6(int) Load 8(invocation)
+             469:     67(ptr) AccessChain 37(data) 58 45
+             470:  23(i8vec4) Load 469
+             471:   17(ivec4) Load 19(ballot)
+             472:  23(i8vec4) GroupNonUniformUMin 177 PartitionedReduceNV 470 471
+             473:     67(ptr) AccessChain 37(data) 468 45
+                              Store 473 472
+             474:      6(int) Load 8(invocation)
+             475:     62(ptr) AccessChain 37(data) 39 45 40
+             476:  22(int8_t) Load 475
+             477:   17(ivec4) Load 19(ballot)
+             478:  22(int8_t) GroupNonUniformUMax 177 PartitionedReduceNV 476 477
+             479:     62(ptr) AccessChain 37(data) 474 45 40
+                              Store 479 478
+             480:      6(int) Load 8(invocation)
+             481:     67(ptr) AccessChain 37(data) 45 45
+             482:  23(i8vec4) Load 481
+             483:  66(i8vec2) VectorShuffle 482 482 0 1
+             484:   17(ivec4) Load 19(ballot)
+             485:  66(i8vec2) GroupNonUniformUMax 177 PartitionedReduceNV 483 484
+             486:     67(ptr) AccessChain 37(data) 480 45
+             487:  23(i8vec4) Load 486
+             488:  23(i8vec4) VectorShuffle 487 485 4 5 2 3
+                              Store 486 488
+             489:      6(int) Load 8(invocation)
+             490:     67(ptr) AccessChain 37(data) 52 45
+             491:  23(i8vec4) Load 490
+             492:  72(i8vec3) VectorShuffle 491 491 0 1 2
+             493:   17(ivec4) Load 19(ballot)
+             494:  72(i8vec3) GroupNonUniformUMax 177 PartitionedReduceNV 492 493
+             495:     67(ptr) AccessChain 37(data) 489 45
+             496:  23(i8vec4) Load 495
+             497:  23(i8vec4) VectorShuffle 496 494 4 5 6 3
+                              Store 495 497
+             498:      6(int) Load 8(invocation)
+             499:     67(ptr) AccessChain 37(data) 58 45
+             500:  23(i8vec4) Load 499
+             501:   17(ivec4) Load 19(ballot)
+             502:  23(i8vec4) GroupNonUniformUMax 177 PartitionedReduceNV 500 501
+             503:     67(ptr) AccessChain 37(data) 498 45
+                              Store 503 502
+             504:      6(int) Load 8(invocation)
+             505:     62(ptr) AccessChain 37(data) 39 45 40
+             506:  22(int8_t) Load 505
+             507:   17(ivec4) Load 19(ballot)
+             508:  22(int8_t) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 506 507
+             509:     62(ptr) AccessChain 37(data) 504 45 40
+                              Store 509 508
+             510:      6(int) Load 8(invocation)
+             511:     67(ptr) AccessChain 37(data) 45 45
+             512:  23(i8vec4) Load 511
+             513:  66(i8vec2) VectorShuffle 512 512 0 1
+             514:   17(ivec4) Load 19(ballot)
+             515:  66(i8vec2) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 513 514
+             516:     67(ptr) AccessChain 37(data) 510 45
+             517:  23(i8vec4) Load 516
+             518:  23(i8vec4) VectorShuffle 517 515 4 5 2 3
+                              Store 516 518
+             519:      6(int) Load 8(invocation)
+             520:     67(ptr) AccessChain 37(data) 52 45
+             521:  23(i8vec4) Load 520
+             522:  72(i8vec3) VectorShuffle 521 521 0 1 2
+             523:   17(ivec4) Load 19(ballot)
+             524:  72(i8vec3) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 522 523
+             525:     67(ptr) AccessChain 37(data) 519 45
+             526:  23(i8vec4) Load 525
+             527:  23(i8vec4) VectorShuffle 526 524 4 5 6 3
+                              Store 525 527
+             528:      6(int) Load 8(invocation)
+             529:     67(ptr) AccessChain 37(data) 58 45
+             530:  23(i8vec4) Load 529
+             531:   17(ivec4) Load 19(ballot)
+             532:  23(i8vec4) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 530 531
+             533:     67(ptr) AccessChain 37(data) 528 45
+                              Store 533 532
+             534:      6(int) Load 8(invocation)
+             535:     62(ptr) AccessChain 37(data) 39 45 40
+             536:  22(int8_t) Load 535
+             537:   17(ivec4) Load 19(ballot)
+             538:  22(int8_t) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 536 537
+             539:     62(ptr) AccessChain 37(data) 534 45 40
+                              Store 539 538
+             540:      6(int) Load 8(invocation)
+             541:     67(ptr) AccessChain 37(data) 45 45
+             542:  23(i8vec4) Load 541
+             543:  66(i8vec2) VectorShuffle 542 542 0 1
+             544:   17(ivec4) Load 19(ballot)
+             545:  66(i8vec2) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 543 544
+             546:     67(ptr) AccessChain 37(data) 540 45
+             547:  23(i8vec4) Load 546
+             548:  23(i8vec4) VectorShuffle 547 545 4 5 2 3
+                              Store 546 548
+             549:      6(int) Load 8(invocation)
+             550:     67(ptr) AccessChain 37(data) 52 45
+             551:  23(i8vec4) Load 550
+             552:  72(i8vec3) VectorShuffle 551 551 0 1 2
+             553:   17(ivec4) Load 19(ballot)
+             554:  72(i8vec3) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 552 553
+             555:     67(ptr) AccessChain 37(data) 549 45
+             556:  23(i8vec4) Load 555
+             557:  23(i8vec4) VectorShuffle 556 554 4 5 6 3
+                              Store 555 557
+             558:      6(int) Load 8(invocation)
+             559:     67(ptr) AccessChain 37(data) 58 45
+             560:  23(i8vec4) Load 559
+             561:   17(ivec4) Load 19(ballot)
+             562:  23(i8vec4) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 560 561
+             563:     67(ptr) AccessChain 37(data) 558 45
+                              Store 563 562
+             564:      6(int) Load 8(invocation)
+             565:     62(ptr) AccessChain 37(data) 39 45 40
+             566:  22(int8_t) Load 565
+             567:   17(ivec4) Load 19(ballot)
+             568:  22(int8_t) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 566 567
+             569:     62(ptr) AccessChain 37(data) 564 45 40
+                              Store 569 568
+             570:      6(int) Load 8(invocation)
+             571:     67(ptr) AccessChain 37(data) 45 45
+             572:  23(i8vec4) Load 571
+             573:  66(i8vec2) VectorShuffle 572 572 0 1
+             574:   17(ivec4) Load 19(ballot)
+             575:  66(i8vec2) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 573 574
+             576:     67(ptr) AccessChain 37(data) 570 45
+             577:  23(i8vec4) Load 576
+             578:  23(i8vec4) VectorShuffle 577 575 4 5 2 3
+                              Store 576 578
+             579:      6(int) Load 8(invocation)
+             580:     67(ptr) AccessChain 37(data) 52 45
+             581:  23(i8vec4) Load 580
+             582:  72(i8vec3) VectorShuffle 581 581 0 1 2
+             583:   17(ivec4) Load 19(ballot)
+             584:  72(i8vec3) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 582 583
+             585:     67(ptr) AccessChain 37(data) 579 45
+             586:  23(i8vec4) Load 585
+             587:  23(i8vec4) VectorShuffle 586 584 4 5 6 3
+                              Store 585 587
+             588:      6(int) Load 8(invocation)
+             589:     67(ptr) AccessChain 37(data) 58 45
+             590:  23(i8vec4) Load 589
+             591:   17(ivec4) Load 19(ballot)
+             592:  23(i8vec4) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 590 591
+             593:     67(ptr) AccessChain 37(data) 588 45
+                              Store 593 592
+             594:      6(int) Load 8(invocation)
+             595:     80(ptr) AccessChain 37(data) 39 52 40
+             596: 24(int16_t) Load 595
+             597:   17(ivec4) Load 19(ballot)
+             598: 24(int16_t) GroupNonUniformIAdd 177 PartitionedReduceNV 596 597
+             599:     80(ptr) AccessChain 37(data) 594 52 40
+                              Store 599 598
+             600:      6(int) Load 8(invocation)
+             601:     85(ptr) AccessChain 37(data) 45 52
+             602: 25(i16vec4) Load 601
+             603: 84(i16vec2) VectorShuffle 602 602 0 1
+             604:   17(ivec4) Load 19(ballot)
+             605: 84(i16vec2) GroupNonUniformIAdd 177 PartitionedReduceNV 603 604
+             606:     85(ptr) AccessChain 37(data) 600 52
+             607: 25(i16vec4) Load 606
+             608: 25(i16vec4) VectorShuffle 607 605 4 5 2 3
+                              Store 606 608
+             609:      6(int) Load 8(invocation)
+             610:     85(ptr) AccessChain 37(data) 52 52
+             611: 25(i16vec4) Load 610
+             612: 90(i16vec3) VectorShuffle 611 611 0 1 2
+             613:   17(ivec4) Load 19(ballot)
+             614: 90(i16vec3) GroupNonUniformIAdd 177 PartitionedReduceNV 612 613
+             615:     85(ptr) AccessChain 37(data) 609 52
+             616: 25(i16vec4) Load 615
+             617: 25(i16vec4) VectorShuffle 616 614 4 5 6 3
+                              Store 615 617
+             618:      6(int) Load 8(invocation)
+             619:     85(ptr) AccessChain 37(data) 58 52
+             620: 25(i16vec4) Load 619
+             621:   17(ivec4) Load 19(ballot)
+             622: 25(i16vec4) GroupNonUniformIAdd 177 PartitionedReduceNV 620 621
+             623:     85(ptr) AccessChain 37(data) 618 52
+                              Store 623 622
+             624:      6(int) Load 8(invocation)
+             625:     80(ptr) AccessChain 37(data) 39 52 40
+             626: 24(int16_t) Load 625
+             627:   17(ivec4) Load 19(ballot)
+             628: 24(int16_t) GroupNonUniformIMul 177 PartitionedReduceNV 626 627
+             629:     80(ptr) AccessChain 37(data) 624 52 40
+                              Store 629 628
+             630:      6(int) Load 8(invocation)
+             631:     85(ptr) AccessChain 37(data) 45 52
+             632: 25(i16vec4) Load 631
+             633: 84(i16vec2) VectorShuffle 632 632 0 1
+             634:   17(ivec4) Load 19(ballot)
+             635: 84(i16vec2) GroupNonUniformIMul 177 PartitionedReduceNV 633 634
+             636:     85(ptr) AccessChain 37(data) 630 52
+             637: 25(i16vec4) Load 636
+             638: 25(i16vec4) VectorShuffle 637 635 4 5 2 3
+                              Store 636 638
+             639:      6(int) Load 8(invocation)
+             640:     85(ptr) AccessChain 37(data) 52 52
+             641: 25(i16vec4) Load 640
+             642: 90(i16vec3) VectorShuffle 641 641 0 1 2
+             643:   17(ivec4) Load 19(ballot)
+             644: 90(i16vec3) GroupNonUniformIMul 177 PartitionedReduceNV 642 643
+             645:     85(ptr) AccessChain 37(data) 639 52
+             646: 25(i16vec4) Load 645
+             647: 25(i16vec4) VectorShuffle 646 644 4 5 6 3
+                              Store 645 647
+             648:      6(int) Load 8(invocation)
+             649:     85(ptr) AccessChain 37(data) 58 52
+             650: 25(i16vec4) Load 649
+             651:   17(ivec4) Load 19(ballot)
+             652: 25(i16vec4) GroupNonUniformIMul 177 PartitionedReduceNV 650 651
+             653:     85(ptr) AccessChain 37(data) 648 52
+                              Store 653 652
+             654:      6(int) Load 8(invocation)
+             655:     80(ptr) AccessChain 37(data) 39 52 40
+             656: 24(int16_t) Load 655
+             657:   17(ivec4) Load 19(ballot)
+             658: 24(int16_t) GroupNonUniformSMin 177 PartitionedReduceNV 656 657
+             659:     80(ptr) AccessChain 37(data) 654 52 40
+                              Store 659 658
+             660:      6(int) Load 8(invocation)
+             661:     85(ptr) AccessChain 37(data) 45 52
+             662: 25(i16vec4) Load 661
+             663: 84(i16vec2) VectorShuffle 662 662 0 1
+             664:   17(ivec4) Load 19(ballot)
+             665: 84(i16vec2) GroupNonUniformSMin 177 PartitionedReduceNV 663 664
+             666:     85(ptr) AccessChain 37(data) 660 52
+             667: 25(i16vec4) Load 666
+             668: 25(i16vec4) VectorShuffle 667 665 4 5 2 3
+                              Store 666 668
+             669:      6(int) Load 8(invocation)
+             670:     85(ptr) AccessChain 37(data) 52 52
+             671: 25(i16vec4) Load 670
+             672: 90(i16vec3) VectorShuffle 671 671 0 1 2
+             673:   17(ivec4) Load 19(ballot)
+             674: 90(i16vec3) GroupNonUniformSMin 177 PartitionedReduceNV 672 673
+             675:     85(ptr) AccessChain 37(data) 669 52
+             676: 25(i16vec4) Load 675
+             677: 25(i16vec4) VectorShuffle 676 674 4 5 6 3
+                              Store 675 677
+             678:      6(int) Load 8(invocation)
+             679:     85(ptr) AccessChain 37(data) 58 52
+             680: 25(i16vec4) Load 679
+             681:   17(ivec4) Load 19(ballot)
+             682: 25(i16vec4) GroupNonUniformSMin 177 PartitionedReduceNV 680 681
+             683:     85(ptr) AccessChain 37(data) 678 52
+                              Store 683 682
+             684:      6(int) Load 8(invocation)
+             685:     80(ptr) AccessChain 37(data) 39 52 40
+             686: 24(int16_t) Load 685
+             687:   17(ivec4) Load 19(ballot)
+             688: 24(int16_t) GroupNonUniformSMax 177 PartitionedReduceNV 686 687
+             689:     80(ptr) AccessChain 37(data) 684 52 40
+                              Store 689 688
+             690:      6(int) Load 8(invocation)
+             691:     85(ptr) AccessChain 37(data) 45 52
+             692: 25(i16vec4) Load 691
+             693: 84(i16vec2) VectorShuffle 692 692 0 1
+             694:   17(ivec4) Load 19(ballot)
+             695: 84(i16vec2) GroupNonUniformSMax 177 PartitionedReduceNV 693 694
+             696:     85(ptr) AccessChain 37(data) 690 52
+             697: 25(i16vec4) Load 696
+             698: 25(i16vec4) VectorShuffle 697 695 4 5 2 3
+                              Store 696 698
+             699:      6(int) Load 8(invocation)
+             700:     85(ptr) AccessChain 37(data) 52 52
+             701: 25(i16vec4) Load 700
+             702: 90(i16vec3) VectorShuffle 701 701 0 1 2
+             703:   17(ivec4) Load 19(ballot)
+             704: 90(i16vec3) GroupNonUniformSMax 177 PartitionedReduceNV 702 703
+             705:     85(ptr) AccessChain 37(data) 699 52
+             706: 25(i16vec4) Load 705
+             707: 25(i16vec4) VectorShuffle 706 704 4 5 6 3
+                              Store 705 707
+             708:      6(int) Load 8(invocation)
+             709:     85(ptr) AccessChain 37(data) 58 52
+             710: 25(i16vec4) Load 709
+             711:   17(ivec4) Load 19(ballot)
+             712: 25(i16vec4) GroupNonUniformSMax 177 PartitionedReduceNV 710 711
+             713:     85(ptr) AccessChain 37(data) 708 52
+                              Store 713 712
+             714:      6(int) Load 8(invocation)
+             715:     80(ptr) AccessChain 37(data) 39 52 40
+             716: 24(int16_t) Load 715
+             717:   17(ivec4) Load 19(ballot)
+             718: 24(int16_t) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 716 717
+             719:     80(ptr) AccessChain 37(data) 714 52 40
+                              Store 719 718
+             720:      6(int) Load 8(invocation)
+             721:     85(ptr) AccessChain 37(data) 45 52
+             722: 25(i16vec4) Load 721
+             723: 84(i16vec2) VectorShuffle 722 722 0 1
+             724:   17(ivec4) Load 19(ballot)
+             725: 84(i16vec2) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 723 724
+             726:     85(ptr) AccessChain 37(data) 720 52
+             727: 25(i16vec4) Load 726
+             728: 25(i16vec4) VectorShuffle 727 725 4 5 2 3
+                              Store 726 728
+             729:      6(int) Load 8(invocation)
+             730:     85(ptr) AccessChain 37(data) 52 52
+             731: 25(i16vec4) Load 730
+             732: 90(i16vec3) VectorShuffle 731 731 0 1 2
+             733:   17(ivec4) Load 19(ballot)
+             734: 90(i16vec3) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 732 733
+             735:     85(ptr) AccessChain 37(data) 729 52
+             736: 25(i16vec4) Load 735
+             737: 25(i16vec4) VectorShuffle 736 734 4 5 6 3
+                              Store 735 737
+             738:      6(int) Load 8(invocation)
+             739:     85(ptr) AccessChain 37(data) 58 52
+             740: 25(i16vec4) Load 739
+             741:   17(ivec4) Load 19(ballot)
+             742: 25(i16vec4) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 740 741
+             743:     85(ptr) AccessChain 37(data) 738 52
+                              Store 743 742
+             744:      6(int) Load 8(invocation)
+             745:     80(ptr) AccessChain 37(data) 39 52 40
+             746: 24(int16_t) Load 745
+             747:   17(ivec4) Load 19(ballot)
+             748: 24(int16_t) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 746 747
+             749:     80(ptr) AccessChain 37(data) 744 52 40
+                              Store 749 748
+             750:      6(int) Load 8(invocation)
+             751:     85(ptr) AccessChain 37(data) 45 52
+             752: 25(i16vec4) Load 751
+             753: 84(i16vec2) VectorShuffle 752 752 0 1
+             754:   17(ivec4) Load 19(ballot)
+             755: 84(i16vec2) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 753 754
+             756:     85(ptr) AccessChain 37(data) 750 52
+             757: 25(i16vec4) Load 756
+             758: 25(i16vec4) VectorShuffle 757 755 4 5 2 3
+                              Store 756 758
+             759:      6(int) Load 8(invocation)
+             760:     85(ptr) AccessChain 37(data) 52 52
+             761: 25(i16vec4) Load 760
+             762: 90(i16vec3) VectorShuffle 761 761 0 1 2
+             763:   17(ivec4) Load 19(ballot)
+             764: 90(i16vec3) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 762 763
+             765:     85(ptr) AccessChain 37(data) 759 52
+             766: 25(i16vec4) Load 765
+             767: 25(i16vec4) VectorShuffle 766 764 4 5 6 3
+                              Store 765 767
+             768:      6(int) Load 8(invocation)
+             769:     85(ptr) AccessChain 37(data) 58 52
+             770: 25(i16vec4) Load 769
+             771:   17(ivec4) Load 19(ballot)
+             772: 25(i16vec4) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 770 771
+             773:     85(ptr) AccessChain 37(data) 768 52
+                              Store 773 772
+             774:      6(int) Load 8(invocation)
+             775:     80(ptr) AccessChain 37(data) 39 52 40
+             776: 24(int16_t) Load 775
+             777:   17(ivec4) Load 19(ballot)
+             778: 24(int16_t) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 776 777
+             779:     80(ptr) AccessChain 37(data) 774 52 40
+                              Store 779 778
+             780:      6(int) Load 8(invocation)
+             781:     85(ptr) AccessChain 37(data) 45 52
+             782: 25(i16vec4) Load 781
+             783: 84(i16vec2) VectorShuffle 782 782 0 1
+             784:   17(ivec4) Load 19(ballot)
+             785: 84(i16vec2) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 783 784
+             786:     85(ptr) AccessChain 37(data) 780 52
+             787: 25(i16vec4) Load 786
+             788: 25(i16vec4) VectorShuffle 787 785 4 5 2 3
+                              Store 786 788
+             789:      6(int) Load 8(invocation)
+             790:     85(ptr) AccessChain 37(data) 52 52
+             791: 25(i16vec4) Load 790
+             792: 90(i16vec3) VectorShuffle 791 791 0 1 2
+             793:   17(ivec4) Load 19(ballot)
+             794: 90(i16vec3) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 792 793
+             795:     85(ptr) AccessChain 37(data) 789 52
+             796: 25(i16vec4) Load 795
+             797: 25(i16vec4) VectorShuffle 796 794 4 5 6 3
+                              Store 795 797
+             798:      6(int) Load 8(invocation)
+             799:     85(ptr) AccessChain 37(data) 58 52
+             800: 25(i16vec4) Load 799
+             801:   17(ivec4) Load 19(ballot)
+             802: 25(i16vec4) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 800 801
+             803:     85(ptr) AccessChain 37(data) 798 52
+                              Store 803 802
+             804:      6(int) Load 8(invocation)
+             805:     98(ptr) AccessChain 37(data) 39 58 40
+             806: 26(int16_t) Load 805
+             807:   17(ivec4) Load 19(ballot)
+             808: 26(int16_t) GroupNonUniformIAdd 177 PartitionedReduceNV 806 807
+             809:     98(ptr) AccessChain 37(data) 804 58 40
+                              Store 809 808
+             810:      6(int) Load 8(invocation)
+             811:    103(ptr) AccessChain 37(data) 45 58
+             812: 27(i16vec4) Load 811
+             813:102(i16vec2) VectorShuffle 812 812 0 1
+             814:   17(ivec4) Load 19(ballot)
+             815:102(i16vec2) GroupNonUniformIAdd 177 PartitionedReduceNV 813 814
+             816:    103(ptr) AccessChain 37(data) 810 58
+             817: 27(i16vec4) Load 816
+             818: 27(i16vec4) VectorShuffle 817 815 4 5 2 3
+                              Store 816 818
+             819:      6(int) Load 8(invocation)
+             820:    103(ptr) AccessChain 37(data) 52 58
+             821: 27(i16vec4) Load 820
+             822:108(i16vec3) VectorShuffle 821 821 0 1 2
+             823:   17(ivec4) Load 19(ballot)
+             824:108(i16vec3) GroupNonUniformIAdd 177 PartitionedReduceNV 822 823
+             825:    103(ptr) AccessChain 37(data) 819 58
+             826: 27(i16vec4) Load 825
+             827: 27(i16vec4) VectorShuffle 826 824 4 5 6 3
+                              Store 825 827
+             828:      6(int) Load 8(invocation)
+             829:    103(ptr) AccessChain 37(data) 58 58
+             830: 27(i16vec4) Load 829
+             831:   17(ivec4) Load 19(ballot)
+             832: 27(i16vec4) GroupNonUniformIAdd 177 PartitionedReduceNV 830 831
+             833:    103(ptr) AccessChain 37(data) 828 58
+                              Store 833 832
+             834:      6(int) Load 8(invocation)
+             835:     98(ptr) AccessChain 37(data) 39 58 40
+             836: 26(int16_t) Load 835
+             837:   17(ivec4) Load 19(ballot)
+             838: 26(int16_t) GroupNonUniformIMul 177 PartitionedReduceNV 836 837
+             839:     98(ptr) AccessChain 37(data) 834 58 40
+                              Store 839 838
+             840:      6(int) Load 8(invocation)
+             841:    103(ptr) AccessChain 37(data) 45 58
+             842: 27(i16vec4) Load 841
+             843:102(i16vec2) VectorShuffle 842 842 0 1
+             844:   17(ivec4) Load 19(ballot)
+             845:102(i16vec2) GroupNonUniformIMul 177 PartitionedReduceNV 843 844
+             846:    103(ptr) AccessChain 37(data) 840 58
+             847: 27(i16vec4) Load 846
+             848: 27(i16vec4) VectorShuffle 847 845 4 5 2 3
+                              Store 846 848
+             849:      6(int) Load 8(invocation)
+             850:    103(ptr) AccessChain 37(data) 52 58
+             851: 27(i16vec4) Load 850
+             852:108(i16vec3) VectorShuffle 851 851 0 1 2
+             853:   17(ivec4) Load 19(ballot)
+             854:108(i16vec3) GroupNonUniformIMul 177 PartitionedReduceNV 852 853
+             855:    103(ptr) AccessChain 37(data) 849 58
+             856: 27(i16vec4) Load 855
+             857: 27(i16vec4) VectorShuffle 856 854 4 5 6 3
+                              Store 855 857
+             858:      6(int) Load 8(invocation)
+             859:    103(ptr) AccessChain 37(data) 58 58
+             860: 27(i16vec4) Load 859
+             861:   17(ivec4) Load 19(ballot)
+             862: 27(i16vec4) GroupNonUniformIMul 177 PartitionedReduceNV 860 861
+             863:    103(ptr) AccessChain 37(data) 858 58
+                              Store 863 862
+             864:      6(int) Load 8(invocation)
+             865:     98(ptr) AccessChain 37(data) 39 58 40
+             866: 26(int16_t) Load 865
+             867:   17(ivec4) Load 19(ballot)
+             868: 26(int16_t) GroupNonUniformUMin 177 PartitionedReduceNV 866 867
+             869:     98(ptr) AccessChain 37(data) 864 58 40
+                              Store 869 868
+             870:      6(int) Load 8(invocation)
+             871:    103(ptr) AccessChain 37(data) 45 58
+             872: 27(i16vec4) Load 871
+             873:102(i16vec2) VectorShuffle 872 872 0 1
+             874:   17(ivec4) Load 19(ballot)
+             875:102(i16vec2) GroupNonUniformUMin 177 PartitionedReduceNV 873 874
+             876:    103(ptr) AccessChain 37(data) 870 58
+             877: 27(i16vec4) Load 876
+             878: 27(i16vec4) VectorShuffle 877 875 4 5 2 3
+                              Store 876 878
+             879:      6(int) Load 8(invocation)
+             880:    103(ptr) AccessChain 37(data) 52 58
+             881: 27(i16vec4) Load 880
+             882:108(i16vec3) VectorShuffle 881 881 0 1 2
+             883:   17(ivec4) Load 19(ballot)
+             884:108(i16vec3) GroupNonUniformUMin 177 PartitionedReduceNV 882 883
+             885:    103(ptr) AccessChain 37(data) 879 58
+             886: 27(i16vec4) Load 885
+             887: 27(i16vec4) VectorShuffle 886 884 4 5 6 3
+                              Store 885 887
+             888:      6(int) Load 8(invocation)
+             889:    103(ptr) AccessChain 37(data) 58 58
+             890: 27(i16vec4) Load 889
+             891:   17(ivec4) Load 19(ballot)
+             892: 27(i16vec4) GroupNonUniformUMin 177 PartitionedReduceNV 890 891
+             893:    103(ptr) AccessChain 37(data) 888 58
+                              Store 893 892
+             894:      6(int) Load 8(invocation)
+             895:     98(ptr) AccessChain 37(data) 39 58 40
+             896: 26(int16_t) Load 895
+             897:   17(ivec4) Load 19(ballot)
+             898: 26(int16_t) GroupNonUniformUMax 177 PartitionedReduceNV 896 897
+             899:     98(ptr) AccessChain 37(data) 894 58 40
+                              Store 899 898
+             900:      6(int) Load 8(invocation)
+             901:    103(ptr) AccessChain 37(data) 45 58
+             902: 27(i16vec4) Load 901
+             903:102(i16vec2) VectorShuffle 902 902 0 1
+             904:   17(ivec4) Load 19(ballot)
+             905:102(i16vec2) GroupNonUniformUMax 177 PartitionedReduceNV 903 904
+             906:    103(ptr) AccessChain 37(data) 900 58
+             907: 27(i16vec4) Load 906
+             908: 27(i16vec4) VectorShuffle 907 905 4 5 2 3
+                              Store 906 908
+             909:      6(int) Load 8(invocation)
+             910:    103(ptr) AccessChain 37(data) 52 58
+             911: 27(i16vec4) Load 910
+             912:108(i16vec3) VectorShuffle 911 911 0 1 2
+             913:   17(ivec4) Load 19(ballot)
+             914:108(i16vec3) GroupNonUniformUMax 177 PartitionedReduceNV 912 913
+             915:    103(ptr) AccessChain 37(data) 909 58
+             916: 27(i16vec4) Load 915
+             917: 27(i16vec4) VectorShuffle 916 914 4 5 6 3
+                              Store 915 917
+             918:      6(int) Load 8(invocation)
+             919:    103(ptr) AccessChain 37(data) 58 58
+             920: 27(i16vec4) Load 919
+             921:   17(ivec4) Load 19(ballot)
+             922: 27(i16vec4) GroupNonUniformUMax 177 PartitionedReduceNV 920 921
+             923:    103(ptr) AccessChain 37(data) 918 58
+                              Store 923 922
+             924:      6(int) Load 8(invocation)
+             925:     98(ptr) AccessChain 37(data) 39 58 40
+             926: 26(int16_t) Load 925
+             927:   17(ivec4) Load 19(ballot)
+             928: 26(int16_t) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 926 927
+             929:     98(ptr) AccessChain 37(data) 924 58 40
+                              Store 929 928
+             930:      6(int) Load 8(invocation)
+             931:    103(ptr) AccessChain 37(data) 45 58
+             932: 27(i16vec4) Load 931
+             933:102(i16vec2) VectorShuffle 932 932 0 1
+             934:   17(ivec4) Load 19(ballot)
+             935:102(i16vec2) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 933 934
+             936:    103(ptr) AccessChain 37(data) 930 58
+             937: 27(i16vec4) Load 936
+             938: 27(i16vec4) VectorShuffle 937 935 4 5 2 3
+                              Store 936 938
+             939:      6(int) Load 8(invocation)
+             940:    103(ptr) AccessChain 37(data) 52 58
+             941: 27(i16vec4) Load 940
+             942:108(i16vec3) VectorShuffle 941 941 0 1 2
+             943:   17(ivec4) Load 19(ballot)
+             944:108(i16vec3) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 942 943
+             945:    103(ptr) AccessChain 37(data) 939 58
+             946: 27(i16vec4) Load 945
+             947: 27(i16vec4) VectorShuffle 946 944 4 5 6 3
+                              Store 945 947
+             948:      6(int) Load 8(invocation)
+             949:    103(ptr) AccessChain 37(data) 58 58
+             950: 27(i16vec4) Load 949
+             951:   17(ivec4) Load 19(ballot)
+             952: 27(i16vec4) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 950 951
+             953:    103(ptr) AccessChain 37(data) 948 58
+                              Store 953 952
+             954:      6(int) Load 8(invocation)
+             955:     98(ptr) AccessChain 37(data) 39 58 40
+             956: 26(int16_t) Load 955
+             957:   17(ivec4) Load 19(ballot)
+             958: 26(int16_t) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 956 957
+             959:     98(ptr) AccessChain 37(data) 954 58 40
+                              Store 959 958
+             960:      6(int) Load 8(invocation)
+             961:    103(ptr) AccessChain 37(data) 45 58
+             962: 27(i16vec4) Load 961
+             963:102(i16vec2) VectorShuffle 962 962 0 1
+             964:   17(ivec4) Load 19(ballot)
+             965:102(i16vec2) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 963 964
+             966:    103(ptr) AccessChain 37(data) 960 58
+             967: 27(i16vec4) Load 966
+             968: 27(i16vec4) VectorShuffle 967 965 4 5 2 3
+                              Store 966 968
+             969:      6(int) Load 8(invocation)
+             970:    103(ptr) AccessChain 37(data) 52 58
+             971: 27(i16vec4) Load 970
+             972:108(i16vec3) VectorShuffle 971 971 0 1 2
+             973:   17(ivec4) Load 19(ballot)
+             974:108(i16vec3) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 972 973
+             975:    103(ptr) AccessChain 37(data) 969 58
+             976: 27(i16vec4) Load 975
+             977: 27(i16vec4) VectorShuffle 976 974 4 5 6 3
+                              Store 975 977
+             978:      6(int) Load 8(invocation)
+             979:    103(ptr) AccessChain 37(data) 58 58
+             980: 27(i16vec4) Load 979
+             981:   17(ivec4) Load 19(ballot)
+             982: 27(i16vec4) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 980 981
+             983:    103(ptr) AccessChain 37(data) 978 58
+                              Store 983 982
+             984:      6(int) Load 8(invocation)
+             985:     98(ptr) AccessChain 37(data) 39 58 40
+             986: 26(int16_t) Load 985
+             987:   17(ivec4) Load 19(ballot)
+             988: 26(int16_t) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 986 987
+             989:     98(ptr) AccessChain 37(data) 984 58 40
+                              Store 989 988
+             990:      6(int) Load 8(invocation)
+             991:    103(ptr) AccessChain 37(data) 45 58
+             992: 27(i16vec4) Load 991
+             993:102(i16vec2) VectorShuffle 992 992 0 1
+             994:   17(ivec4) Load 19(ballot)
+             995:102(i16vec2) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 993 994
+             996:    103(ptr) AccessChain 37(data) 990 58
+             997: 27(i16vec4) Load 996
+             998: 27(i16vec4) VectorShuffle 997 995 4 5 2 3
+                              Store 996 998
+             999:      6(int) Load 8(invocation)
+            1000:    103(ptr) AccessChain 37(data) 52 58
+            1001: 27(i16vec4) Load 1000
+            1002:108(i16vec3) VectorShuffle 1001 1001 0 1 2
+            1003:   17(ivec4) Load 19(ballot)
+            1004:108(i16vec3) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1002 1003
+            1005:    103(ptr) AccessChain 37(data) 999 58
+            1006: 27(i16vec4) Load 1005
+            1007: 27(i16vec4) VectorShuffle 1006 1004 4 5 6 3
+                              Store 1005 1007
+            1008:      6(int) Load 8(invocation)
+            1009:    103(ptr) AccessChain 37(data) 58 58
+            1010: 27(i16vec4) Load 1009
+            1011:   17(ivec4) Load 19(ballot)
+            1012: 27(i16vec4) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1010 1011
+            1013:    103(ptr) AccessChain 37(data) 1008 58
+                              Store 1013 1012
+            1014:      6(int) Load 8(invocation)
+            1015:    117(ptr) AccessChain 37(data) 39 116 40
+            1016: 28(int64_t) Load 1015
+            1017:   17(ivec4) Load 19(ballot)
+            1018: 28(int64_t) GroupNonUniformIAdd 177 PartitionedReduceNV 1016 1017
+            1019:    117(ptr) AccessChain 37(data) 1014 116 40
+                              Store 1019 1018
+            1020:      6(int) Load 8(invocation)
+            1021:    122(ptr) AccessChain 37(data) 45 116
+            1022: 29(i64vec4) Load 1021
+            1023:121(i64vec2) VectorShuffle 1022 1022 0 1
+            1024:   17(ivec4) Load 19(ballot)
+            1025:121(i64vec2) GroupNonUniformIAdd 177 PartitionedReduceNV 1023 1024
+            1026:    122(ptr) AccessChain 37(data) 1020 116
+            1027: 29(i64vec4) Load 1026
+            1028: 29(i64vec4) VectorShuffle 1027 1025 4 5 2 3
+                              Store 1026 1028
+            1029:      6(int) Load 8(invocation)
+            1030:    122(ptr) AccessChain 37(data) 52 116
+            1031: 29(i64vec4) Load 1030
+            1032:127(i64vec3) VectorShuffle 1031 1031 0 1 2
+            1033:   17(ivec4) Load 19(ballot)
+            1034:127(i64vec3) GroupNonUniformIAdd 177 PartitionedReduceNV 1032 1033
+            1035:    122(ptr) AccessChain 37(data) 1029 116
+            1036: 29(i64vec4) Load 1035
+            1037: 29(i64vec4) VectorShuffle 1036 1034 4 5 6 3
+                              Store 1035 1037
+            1038:      6(int) Load 8(invocation)
+            1039:    122(ptr) AccessChain 37(data) 58 116
+            1040: 29(i64vec4) Load 1039
+            1041:   17(ivec4) Load 19(ballot)
+            1042: 29(i64vec4) GroupNonUniformIAdd 177 PartitionedReduceNV 1040 1041
+            1043:    122(ptr) AccessChain 37(data) 1038 116
+                              Store 1043 1042
+            1044:      6(int) Load 8(invocation)
+            1045:    117(ptr) AccessChain 37(data) 39 116 40
+            1046: 28(int64_t) Load 1045
+            1047:   17(ivec4) Load 19(ballot)
+            1048: 28(int64_t) GroupNonUniformIMul 177 PartitionedReduceNV 1046 1047
+            1049:    117(ptr) AccessChain 37(data) 1044 116 40
+                              Store 1049 1048
+            1050:      6(int) Load 8(invocation)
+            1051:    122(ptr) AccessChain 37(data) 45 116
+            1052: 29(i64vec4) Load 1051
+            1053:121(i64vec2) VectorShuffle 1052 1052 0 1
+            1054:   17(ivec4) Load 19(ballot)
+            1055:121(i64vec2) GroupNonUniformIMul 177 PartitionedReduceNV 1053 1054
+            1056:    122(ptr) AccessChain 37(data) 1050 116
+            1057: 29(i64vec4) Load 1056
+            1058: 29(i64vec4) VectorShuffle 1057 1055 4 5 2 3
+                              Store 1056 1058
+            1059:      6(int) Load 8(invocation)
+            1060:    122(ptr) AccessChain 37(data) 52 116
+            1061: 29(i64vec4) Load 1060
+            1062:127(i64vec3) VectorShuffle 1061 1061 0 1 2
+            1063:   17(ivec4) Load 19(ballot)
+            1064:127(i64vec3) GroupNonUniformIMul 177 PartitionedReduceNV 1062 1063
+            1065:    122(ptr) AccessChain 37(data) 1059 116
+            1066: 29(i64vec4) Load 1065
+            1067: 29(i64vec4) VectorShuffle 1066 1064 4 5 6 3
+                              Store 1065 1067
+            1068:      6(int) Load 8(invocation)
+            1069:    122(ptr) AccessChain 37(data) 58 116
+            1070: 29(i64vec4) Load 1069
+            1071:   17(ivec4) Load 19(ballot)
+            1072: 29(i64vec4) GroupNonUniformIMul 177 PartitionedReduceNV 1070 1071
+            1073:    122(ptr) AccessChain 37(data) 1068 116
+                              Store 1073 1072
+            1074:      6(int) Load 8(invocation)
+            1075:    117(ptr) AccessChain 37(data) 39 116 40
+            1076: 28(int64_t) Load 1075
+            1077:   17(ivec4) Load 19(ballot)
+            1078: 28(int64_t) GroupNonUniformSMin 177 PartitionedReduceNV 1076 1077
+            1079:    117(ptr) AccessChain 37(data) 1074 116 40
+                              Store 1079 1078
+            1080:      6(int) Load 8(invocation)
+            1081:    122(ptr) AccessChain 37(data) 45 116
+            1082: 29(i64vec4) Load 1081
+            1083:121(i64vec2) VectorShuffle 1082 1082 0 1
+            1084:   17(ivec4) Load 19(ballot)
+            1085:121(i64vec2) GroupNonUniformSMin 177 PartitionedReduceNV 1083 1084
+            1086:    122(ptr) AccessChain 37(data) 1080 116
+            1087: 29(i64vec4) Load 1086
+            1088: 29(i64vec4) VectorShuffle 1087 1085 4 5 2 3
+                              Store 1086 1088
+            1089:      6(int) Load 8(invocation)
+            1090:    122(ptr) AccessChain 37(data) 52 116
+            1091: 29(i64vec4) Load 1090
+            1092:127(i64vec3) VectorShuffle 1091 1091 0 1 2
+            1093:   17(ivec4) Load 19(ballot)
+            1094:127(i64vec3) GroupNonUniformSMin 177 PartitionedReduceNV 1092 1093
+            1095:    122(ptr) AccessChain 37(data) 1089 116
+            1096: 29(i64vec4) Load 1095
+            1097: 29(i64vec4) VectorShuffle 1096 1094 4 5 6 3
+                              Store 1095 1097
+            1098:      6(int) Load 8(invocation)
+            1099:    122(ptr) AccessChain 37(data) 58 116
+            1100: 29(i64vec4) Load 1099
+            1101:   17(ivec4) Load 19(ballot)
+            1102: 29(i64vec4) GroupNonUniformSMin 177 PartitionedReduceNV 1100 1101
+            1103:    122(ptr) AccessChain 37(data) 1098 116
+                              Store 1103 1102
+            1104:      6(int) Load 8(invocation)
+            1105:    117(ptr) AccessChain 37(data) 39 116 40
+            1106: 28(int64_t) Load 1105
+            1107:   17(ivec4) Load 19(ballot)
+            1108: 28(int64_t) GroupNonUniformSMax 177 PartitionedReduceNV 1106 1107
+            1109:    117(ptr) AccessChain 37(data) 1104 116 40
+                              Store 1109 1108
+            1110:      6(int) Load 8(invocation)
+            1111:    122(ptr) AccessChain 37(data) 45 116
+            1112: 29(i64vec4) Load 1111
+            1113:121(i64vec2) VectorShuffle 1112 1112 0 1
+            1114:   17(ivec4) Load 19(ballot)
+            1115:121(i64vec2) GroupNonUniformSMax 177 PartitionedReduceNV 1113 1114
+            1116:    122(ptr) AccessChain 37(data) 1110 116
+            1117: 29(i64vec4) Load 1116
+            1118: 29(i64vec4) VectorShuffle 1117 1115 4 5 2 3
+                              Store 1116 1118
+            1119:      6(int) Load 8(invocation)
+            1120:    122(ptr) AccessChain 37(data) 52 116
+            1121: 29(i64vec4) Load 1120
+            1122:127(i64vec3) VectorShuffle 1121 1121 0 1 2
+            1123:   17(ivec4) Load 19(ballot)
+            1124:127(i64vec3) GroupNonUniformSMax 177 PartitionedReduceNV 1122 1123
+            1125:    122(ptr) AccessChain 37(data) 1119 116
+            1126: 29(i64vec4) Load 1125
+            1127: 29(i64vec4) VectorShuffle 1126 1124 4 5 6 3
+                              Store 1125 1127
+            1128:      6(int) Load 8(invocation)
+            1129:    122(ptr) AccessChain 37(data) 58 116
+            1130: 29(i64vec4) Load 1129
+            1131:   17(ivec4) Load 19(ballot)
+            1132: 29(i64vec4) GroupNonUniformSMax 177 PartitionedReduceNV 1130 1131
+            1133:    122(ptr) AccessChain 37(data) 1128 116
+                              Store 1133 1132
+            1134:      6(int) Load 8(invocation)
+            1135:    117(ptr) AccessChain 37(data) 39 116 40
+            1136: 28(int64_t) Load 1135
+            1137:   17(ivec4) Load 19(ballot)
+            1138: 28(int64_t) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 1136 1137
+            1139:    117(ptr) AccessChain 37(data) 1134 116 40
+                              Store 1139 1138
+            1140:      6(int) Load 8(invocation)
+            1141:    122(ptr) AccessChain 37(data) 45 116
+            1142: 29(i64vec4) Load 1141
+            1143:121(i64vec2) VectorShuffle 1142 1142 0 1
+            1144:   17(ivec4) Load 19(ballot)
+            1145:121(i64vec2) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 1143 1144
+            1146:    122(ptr) AccessChain 37(data) 1140 116
+            1147: 29(i64vec4) Load 1146
+            1148: 29(i64vec4) VectorShuffle 1147 1145 4 5 2 3
+                              Store 1146 1148
+            1149:      6(int) Load 8(invocation)
+            1150:    122(ptr) AccessChain 37(data) 52 116
+            1151: 29(i64vec4) Load 1150
+            1152:127(i64vec3) VectorShuffle 1151 1151 0 1 2
+            1153:   17(ivec4) Load 19(ballot)
+            1154:127(i64vec3) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 1152 1153
+            1155:    122(ptr) AccessChain 37(data) 1149 116
+            1156: 29(i64vec4) Load 1155
+            1157: 29(i64vec4) VectorShuffle 1156 1154 4 5 6 3
+                              Store 1155 1157
+            1158:      6(int) Load 8(invocation)
+            1159:    122(ptr) AccessChain 37(data) 58 116
+            1160: 29(i64vec4) Load 1159
+            1161:   17(ivec4) Load 19(ballot)
+            1162: 29(i64vec4) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 1160 1161
+            1163:    122(ptr) AccessChain 37(data) 1158 116
+                              Store 1163 1162
+            1164:      6(int) Load 8(invocation)
+            1165:    117(ptr) AccessChain 37(data) 39 116 40
+            1166: 28(int64_t) Load 1165
+            1167:   17(ivec4) Load 19(ballot)
+            1168: 28(int64_t) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 1166 1167
+            1169:    117(ptr) AccessChain 37(data) 1164 116 40
+                              Store 1169 1168
+            1170:      6(int) Load 8(invocation)
+            1171:    122(ptr) AccessChain 37(data) 45 116
+            1172: 29(i64vec4) Load 1171
+            1173:121(i64vec2) VectorShuffle 1172 1172 0 1
+            1174:   17(ivec4) Load 19(ballot)
+            1175:121(i64vec2) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 1173 1174
+            1176:    122(ptr) AccessChain 37(data) 1170 116
+            1177: 29(i64vec4) Load 1176
+            1178: 29(i64vec4) VectorShuffle 1177 1175 4 5 2 3
+                              Store 1176 1178
+            1179:      6(int) Load 8(invocation)
+            1180:    122(ptr) AccessChain 37(data) 52 116
+            1181: 29(i64vec4) Load 1180
+            1182:127(i64vec3) VectorShuffle 1181 1181 0 1 2
+            1183:   17(ivec4) Load 19(ballot)
+            1184:127(i64vec3) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 1182 1183
+            1185:    122(ptr) AccessChain 37(data) 1179 116
+            1186: 29(i64vec4) Load 1185
+            1187: 29(i64vec4) VectorShuffle 1186 1184 4 5 6 3
+                              Store 1185 1187
+            1188:      6(int) Load 8(invocation)
+            1189:    122(ptr) AccessChain 37(data) 58 116
+            1190: 29(i64vec4) Load 1189
+            1191:   17(ivec4) Load 19(ballot)
+            1192: 29(i64vec4) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 1190 1191
+            1193:    122(ptr) AccessChain 37(data) 1188 116
+                              Store 1193 1192
+            1194:      6(int) Load 8(invocation)
+            1195:    117(ptr) AccessChain 37(data) 39 116 40
+            1196: 28(int64_t) Load 1195
+            1197:   17(ivec4) Load 19(ballot)
+            1198: 28(int64_t) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1196 1197
+            1199:    117(ptr) AccessChain 37(data) 1194 116 40
+                              Store 1199 1198
+            1200:      6(int) Load 8(invocation)
+            1201:    122(ptr) AccessChain 37(data) 45 116
+            1202: 29(i64vec4) Load 1201
+            1203:121(i64vec2) VectorShuffle 1202 1202 0 1
+            1204:   17(ivec4) Load 19(ballot)
+            1205:121(i64vec2) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1203 1204
+            1206:    122(ptr) AccessChain 37(data) 1200 116
+            1207: 29(i64vec4) Load 1206
+            1208: 29(i64vec4) VectorShuffle 1207 1205 4 5 2 3
+                              Store 1206 1208
+            1209:      6(int) Load 8(invocation)
+            1210:    122(ptr) AccessChain 37(data) 52 116
+            1211: 29(i64vec4) Load 1210
+            1212:127(i64vec3) VectorShuffle 1211 1211 0 1 2
+            1213:   17(ivec4) Load 19(ballot)
+            1214:127(i64vec3) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1212 1213
+            1215:    122(ptr) AccessChain 37(data) 1209 116
+            1216: 29(i64vec4) Load 1215
+            1217: 29(i64vec4) VectorShuffle 1216 1214 4 5 6 3
+                              Store 1215 1217
+            1218:      6(int) Load 8(invocation)
+            1219:    122(ptr) AccessChain 37(data) 58 116
+            1220: 29(i64vec4) Load 1219
+            1221:   17(ivec4) Load 19(ballot)
+            1222: 29(i64vec4) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1220 1221
+            1223:    122(ptr) AccessChain 37(data) 1218 116
+                              Store 1223 1222
+            1224:      6(int) Load 8(invocation)
+            1225:    136(ptr) AccessChain 37(data) 39 135 40
+            1226: 30(int64_t) Load 1225
+            1227:   17(ivec4) Load 19(ballot)
+            1228: 30(int64_t) GroupNonUniformIAdd 177 PartitionedReduceNV 1226 1227
+            1229:    136(ptr) AccessChain 37(data) 1224 135 40
+                              Store 1229 1228
+            1230:      6(int) Load 8(invocation)
+            1231:    141(ptr) AccessChain 37(data) 45 135
+            1232: 31(i64vec4) Load 1231
+            1233:140(i64vec2) VectorShuffle 1232 1232 0 1
+            1234:   17(ivec4) Load 19(ballot)
+            1235:140(i64vec2) GroupNonUniformIAdd 177 PartitionedReduceNV 1233 1234
+            1236:    141(ptr) AccessChain 37(data) 1230 135
+            1237: 31(i64vec4) Load 1236
+            1238: 31(i64vec4) VectorShuffle 1237 1235 4 5 2 3
+                              Store 1236 1238
+            1239:      6(int) Load 8(invocation)
+            1240:    141(ptr) AccessChain 37(data) 52 135
+            1241: 31(i64vec4) Load 1240
+            1242:146(i64vec3) VectorShuffle 1241 1241 0 1 2
+            1243:   17(ivec4) Load 19(ballot)
+            1244:146(i64vec3) GroupNonUniformIAdd 177 PartitionedReduceNV 1242 1243
+            1245:    141(ptr) AccessChain 37(data) 1239 135
+            1246: 31(i64vec4) Load 1245
+            1247: 31(i64vec4) VectorShuffle 1246 1244 4 5 6 3
+                              Store 1245 1247
+            1248:      6(int) Load 8(invocation)
+            1249:    141(ptr) AccessChain 37(data) 58 135
+            1250: 31(i64vec4) Load 1249
+            1251:   17(ivec4) Load 19(ballot)
+            1252: 31(i64vec4) GroupNonUniformIAdd 177 PartitionedReduceNV 1250 1251
+            1253:    141(ptr) AccessChain 37(data) 1248 135
+                              Store 1253 1252
+            1254:      6(int) Load 8(invocation)
+            1255:    136(ptr) AccessChain 37(data) 39 135 40
+            1256: 30(int64_t) Load 1255
+            1257:   17(ivec4) Load 19(ballot)
+            1258: 30(int64_t) GroupNonUniformIMul 177 PartitionedReduceNV 1256 1257
+            1259:    136(ptr) AccessChain 37(data) 1254 135 40
+                              Store 1259 1258
+            1260:      6(int) Load 8(invocation)
+            1261:    141(ptr) AccessChain 37(data) 45 135
+            1262: 31(i64vec4) Load 1261
+            1263:140(i64vec2) VectorShuffle 1262 1262 0 1
+            1264:   17(ivec4) Load 19(ballot)
+            1265:140(i64vec2) GroupNonUniformIMul 177 PartitionedReduceNV 1263 1264
+            1266:    141(ptr) AccessChain 37(data) 1260 135
+            1267: 31(i64vec4) Load 1266
+            1268: 31(i64vec4) VectorShuffle 1267 1265 4 5 2 3
+                              Store 1266 1268
+            1269:      6(int) Load 8(invocation)
+            1270:    141(ptr) AccessChain 37(data) 52 135
+            1271: 31(i64vec4) Load 1270
+            1272:146(i64vec3) VectorShuffle 1271 1271 0 1 2
+            1273:   17(ivec4) Load 19(ballot)
+            1274:146(i64vec3) GroupNonUniformIMul 177 PartitionedReduceNV 1272 1273
+            1275:    141(ptr) AccessChain 37(data) 1269 135
+            1276: 31(i64vec4) Load 1275
+            1277: 31(i64vec4) VectorShuffle 1276 1274 4 5 6 3
+                              Store 1275 1277
+            1278:      6(int) Load 8(invocation)
+            1279:    141(ptr) AccessChain 37(data) 58 135
+            1280: 31(i64vec4) Load 1279
+            1281:   17(ivec4) Load 19(ballot)
+            1282: 31(i64vec4) GroupNonUniformIMul 177 PartitionedReduceNV 1280 1281
+            1283:    141(ptr) AccessChain 37(data) 1278 135
+                              Store 1283 1282
+            1284:      6(int) Load 8(invocation)
+            1285:    136(ptr) AccessChain 37(data) 39 135 40
+            1286: 30(int64_t) Load 1285
+            1287:   17(ivec4) Load 19(ballot)
+            1288: 30(int64_t) GroupNonUniformUMin 177 PartitionedReduceNV 1286 1287
+            1289:    136(ptr) AccessChain 37(data) 1284 135 40
+                              Store 1289 1288
+            1290:      6(int) Load 8(invocation)
+            1291:    141(ptr) AccessChain 37(data) 45 135
+            1292: 31(i64vec4) Load 1291
+            1293:140(i64vec2) VectorShuffle 1292 1292 0 1
+            1294:   17(ivec4) Load 19(ballot)
+            1295:140(i64vec2) GroupNonUniformUMin 177 PartitionedReduceNV 1293 1294
+            1296:    141(ptr) AccessChain 37(data) 1290 135
+            1297: 31(i64vec4) Load 1296
+            1298: 31(i64vec4) VectorShuffle 1297 1295 4 5 2 3
+                              Store 1296 1298
+            1299:      6(int) Load 8(invocation)
+            1300:    141(ptr) AccessChain 37(data) 52 135
+            1301: 31(i64vec4) Load 1300
+            1302:146(i64vec3) VectorShuffle 1301 1301 0 1 2
+            1303:   17(ivec4) Load 19(ballot)
+            1304:146(i64vec3) GroupNonUniformUMin 177 PartitionedReduceNV 1302 1303
+            1305:    141(ptr) AccessChain 37(data) 1299 135
+            1306: 31(i64vec4) Load 1305
+            1307: 31(i64vec4) VectorShuffle 1306 1304 4 5 6 3
+                              Store 1305 1307
+            1308:      6(int) Load 8(invocation)
+            1309:    141(ptr) AccessChain 37(data) 58 135
+            1310: 31(i64vec4) Load 1309
+            1311:   17(ivec4) Load 19(ballot)
+            1312: 31(i64vec4) GroupNonUniformUMin 177 PartitionedReduceNV 1310 1311
+            1313:    141(ptr) AccessChain 37(data) 1308 135
+                              Store 1313 1312
+            1314:      6(int) Load 8(invocation)
+            1315:    136(ptr) AccessChain 37(data) 39 135 40
+            1316: 30(int64_t) Load 1315
+            1317:   17(ivec4) Load 19(ballot)
+            1318: 30(int64_t) GroupNonUniformUMax 177 PartitionedReduceNV 1316 1317
+            1319:    136(ptr) AccessChain 37(data) 1314 135 40
+                              Store 1319 1318
+            1320:      6(int) Load 8(invocation)
+            1321:    141(ptr) AccessChain 37(data) 45 135
+            1322: 31(i64vec4) Load 1321
+            1323:140(i64vec2) VectorShuffle 1322 1322 0 1
+            1324:   17(ivec4) Load 19(ballot)
+            1325:140(i64vec2) GroupNonUniformUMax 177 PartitionedReduceNV 1323 1324
+            1326:    141(ptr) AccessChain 37(data) 1320 135
+            1327: 31(i64vec4) Load 1326
+            1328: 31(i64vec4) VectorShuffle 1327 1325 4 5 2 3
+                              Store 1326 1328
+            1329:      6(int) Load 8(invocation)
+            1330:    141(ptr) AccessChain 37(data) 52 135
+            1331: 31(i64vec4) Load 1330
+            1332:146(i64vec3) VectorShuffle 1331 1331 0 1 2
+            1333:   17(ivec4) Load 19(ballot)
+            1334:146(i64vec3) GroupNonUniformUMax 177 PartitionedReduceNV 1332 1333
+            1335:    141(ptr) AccessChain 37(data) 1329 135
+            1336: 31(i64vec4) Load 1335
+            1337: 31(i64vec4) VectorShuffle 1336 1334 4 5 6 3
+                              Store 1335 1337
+            1338:      6(int) Load 8(invocation)
+            1339:    141(ptr) AccessChain 37(data) 58 135
+            1340: 31(i64vec4) Load 1339
+            1341:   17(ivec4) Load 19(ballot)
+            1342: 31(i64vec4) GroupNonUniformUMax 177 PartitionedReduceNV 1340 1341
+            1343:    141(ptr) AccessChain 37(data) 1338 135
+                              Store 1343 1342
+            1344:      6(int) Load 8(invocation)
+            1345:    136(ptr) AccessChain 37(data) 39 135 40
+            1346: 30(int64_t) Load 1345
+            1347:   17(ivec4) Load 19(ballot)
+            1348: 30(int64_t) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 1346 1347
+            1349:    136(ptr) AccessChain 37(data) 1344 135 40
+                              Store 1349 1348
+            1350:      6(int) Load 8(invocation)
+            1351:    141(ptr) AccessChain 37(data) 45 135
+            1352: 31(i64vec4) Load 1351
+            1353:140(i64vec2) VectorShuffle 1352 1352 0 1
+            1354:   17(ivec4) Load 19(ballot)
+            1355:140(i64vec2) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 1353 1354
+            1356:    141(ptr) AccessChain 37(data) 1350 135
+            1357: 31(i64vec4) Load 1356
+            1358: 31(i64vec4) VectorShuffle 1357 1355 4 5 2 3
+                              Store 1356 1358
+            1359:      6(int) Load 8(invocation)
+            1360:    141(ptr) AccessChain 37(data) 52 135
+            1361: 31(i64vec4) Load 1360
+            1362:146(i64vec3) VectorShuffle 1361 1361 0 1 2
+            1363:   17(ivec4) Load 19(ballot)
+            1364:146(i64vec3) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 1362 1363
+            1365:    141(ptr) AccessChain 37(data) 1359 135
+            1366: 31(i64vec4) Load 1365
+            1367: 31(i64vec4) VectorShuffle 1366 1364 4 5 6 3
+                              Store 1365 1367
+            1368:      6(int) Load 8(invocation)
+            1369:    141(ptr) AccessChain 37(data) 58 135
+            1370: 31(i64vec4) Load 1369
+            1371:   17(ivec4) Load 19(ballot)
+            1372: 31(i64vec4) GroupNonUniformBitwiseAnd 177 PartitionedReduceNV 1370 1371
+            1373:    141(ptr) AccessChain 37(data) 1368 135
+                              Store 1373 1372
+            1374:      6(int) Load 8(invocation)
+            1375:    136(ptr) AccessChain 37(data) 39 135 40
+            1376: 30(int64_t) Load 1375
+            1377:   17(ivec4) Load 19(ballot)
+            1378: 30(int64_t) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 1376 1377
+            1379:    136(ptr) AccessChain 37(data) 1374 135 40
+                              Store 1379 1378
+            1380:      6(int) Load 8(invocation)
+            1381:    141(ptr) AccessChain 37(data) 45 135
+            1382: 31(i64vec4) Load 1381
+            1383:140(i64vec2) VectorShuffle 1382 1382 0 1
+            1384:   17(ivec4) Load 19(ballot)
+            1385:140(i64vec2) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 1383 1384
+            1386:    141(ptr) AccessChain 37(data) 1380 135
+            1387: 31(i64vec4) Load 1386
+            1388: 31(i64vec4) VectorShuffle 1387 1385 4 5 2 3
+                              Store 1386 1388
+            1389:      6(int) Load 8(invocation)
+            1390:    141(ptr) AccessChain 37(data) 52 135
+            1391: 31(i64vec4) Load 1390
+            1392:146(i64vec3) VectorShuffle 1391 1391 0 1 2
+            1393:   17(ivec4) Load 19(ballot)
+            1394:146(i64vec3) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 1392 1393
+            1395:    141(ptr) AccessChain 37(data) 1389 135
+            1396: 31(i64vec4) Load 1395
+            1397: 31(i64vec4) VectorShuffle 1396 1394 4 5 6 3
+                              Store 1395 1397
+            1398:      6(int) Load 8(invocation)
+            1399:    141(ptr) AccessChain 37(data) 58 135
+            1400: 31(i64vec4) Load 1399
+            1401:   17(ivec4) Load 19(ballot)
+            1402: 31(i64vec4) GroupNonUniformBitwiseOr 177 PartitionedReduceNV 1400 1401
+            1403:    141(ptr) AccessChain 37(data) 1398 135
+                              Store 1403 1402
+            1404:      6(int) Load 8(invocation)
+            1405:    136(ptr) AccessChain 37(data) 39 135 40
+            1406: 30(int64_t) Load 1405
+            1407:   17(ivec4) Load 19(ballot)
+            1408: 30(int64_t) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1406 1407
+            1409:    136(ptr) AccessChain 37(data) 1404 135 40
+                              Store 1409 1408
+            1410:      6(int) Load 8(invocation)
+            1411:    141(ptr) AccessChain 37(data) 45 135
+            1412: 31(i64vec4) Load 1411
+            1413:140(i64vec2) VectorShuffle 1412 1412 0 1
+            1414:   17(ivec4) Load 19(ballot)
+            1415:140(i64vec2) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1413 1414
+            1416:    141(ptr) AccessChain 37(data) 1410 135
+            1417: 31(i64vec4) Load 1416
+            1418: 31(i64vec4) VectorShuffle 1417 1415 4 5 2 3
+                              Store 1416 1418
+            1419:      6(int) Load 8(invocation)
+            1420:    141(ptr) AccessChain 37(data) 52 135
+            1421: 31(i64vec4) Load 1420
+            1422:146(i64vec3) VectorShuffle 1421 1421 0 1 2
+            1423:   17(ivec4) Load 19(ballot)
+            1424:146(i64vec3) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1422 1423
+            1425:    141(ptr) AccessChain 37(data) 1419 135
+            1426: 31(i64vec4) Load 1425
+            1427: 31(i64vec4) VectorShuffle 1426 1424 4 5 6 3
+                              Store 1425 1427
+            1428:      6(int) Load 8(invocation)
+            1429:    141(ptr) AccessChain 37(data) 58 135
+            1430: 31(i64vec4) Load 1429
+            1431:   17(ivec4) Load 19(ballot)
+            1432: 31(i64vec4) GroupNonUniformBitwiseXor 177 PartitionedReduceNV 1430 1431
+            1433:    141(ptr) AccessChain 37(data) 1428 135
+                              Store 1433 1432
+            1434:      6(int) Load 8(invocation)
+            1435:    155(ptr) AccessChain 37(data) 39 154 40
+            1436:32(float16_t) Load 1435
+            1437:   17(ivec4) Load 19(ballot)
+            1438:32(float16_t) GroupNonUniformFAdd 177 PartitionedReduceNV 1436 1437
+            1439:    155(ptr) AccessChain 37(data) 1434 154 40
+                              Store 1439 1438
+            1440:      6(int) Load 8(invocation)
+            1441:    160(ptr) AccessChain 37(data) 45 154
+            1442: 33(f16vec4) Load 1441
+            1443:159(f16vec2) VectorShuffle 1442 1442 0 1
+            1444:   17(ivec4) Load 19(ballot)
+            1445:159(f16vec2) GroupNonUniformFAdd 177 PartitionedReduceNV 1443 1444
+            1446:    160(ptr) AccessChain 37(data) 1440 154
+            1447: 33(f16vec4) Load 1446
+            1448: 33(f16vec4) VectorShuffle 1447 1445 4 5 2 3
+                              Store 1446 1448
+            1449:      6(int) Load 8(invocation)
+            1450:    160(ptr) AccessChain 37(data) 52 154
+            1451: 33(f16vec4) Load 1450
+            1452:165(f16vec3) VectorShuffle 1451 1451 0 1 2
+            1453:   17(ivec4) Load 19(ballot)
+            1454:165(f16vec3) GroupNonUniformFAdd 177 PartitionedReduceNV 1452 1453
+            1455:    160(ptr) AccessChain 37(data) 1449 154
+            1456: 33(f16vec4) Load 1455
+            1457: 33(f16vec4) VectorShuffle 1456 1454 4 5 6 3
+                              Store 1455 1457
+            1458:      6(int) Load 8(invocation)
+            1459:    160(ptr) AccessChain 37(data) 58 154
+            1460: 33(f16vec4) Load 1459
+            1461:   17(ivec4) Load 19(ballot)
+            1462: 33(f16vec4) GroupNonUniformFAdd 177 PartitionedReduceNV 1460 1461
+            1463:    160(ptr) AccessChain 37(data) 1458 154
+                              Store 1463 1462
+            1464:      6(int) Load 8(invocation)
+            1465:    155(ptr) AccessChain 37(data) 39 154 40
+            1466:32(float16_t) Load 1465
+            1467:   17(ivec4) Load 19(ballot)
+            1468:32(float16_t) GroupNonUniformFMul 177 PartitionedReduceNV 1466 1467
+            1469:    155(ptr) AccessChain 37(data) 1464 154 40
+                              Store 1469 1468
+            1470:      6(int) Load 8(invocation)
+            1471:    160(ptr) AccessChain 37(data) 45 154
+            1472: 33(f16vec4) Load 1471
+            1473:159(f16vec2) VectorShuffle 1472 1472 0 1
+            1474:   17(ivec4) Load 19(ballot)
+            1475:159(f16vec2) GroupNonUniformFMul 177 PartitionedReduceNV 1473 1474
+            1476:    160(ptr) AccessChain 37(data) 1470 154
+            1477: 33(f16vec4) Load 1476
+            1478: 33(f16vec4) VectorShuffle 1477 1475 4 5 2 3
+                              Store 1476 1478
+            1479:      6(int) Load 8(invocation)
+            1480:    160(ptr) AccessChain 37(data) 52 154
+            1481: 33(f16vec4) Load 1480
+            1482:165(f16vec3) VectorShuffle 1481 1481 0 1 2
+            1483:   17(ivec4) Load 19(ballot)
+            1484:165(f16vec3) GroupNonUniformFMul 177 PartitionedReduceNV 1482 1483
+            1485:    160(ptr) AccessChain 37(data) 1479 154
+            1486: 33(f16vec4) Load 1485
+            1487: 33(f16vec4) VectorShuffle 1486 1484 4 5 6 3
+                              Store 1485 1487
+            1488:      6(int) Load 8(invocation)
+            1489:    160(ptr) AccessChain 37(data) 58 154
+            1490: 33(f16vec4) Load 1489
+            1491:   17(ivec4) Load 19(ballot)
+            1492: 33(f16vec4) GroupNonUniformFMul 177 PartitionedReduceNV 1490 1491
+            1493:    160(ptr) AccessChain 37(data) 1488 154
+                              Store 1493 1492
+            1494:      6(int) Load 8(invocation)
+            1495:    155(ptr) AccessChain 37(data) 39 154 40
+            1496:32(float16_t) Load 1495
+            1497:   17(ivec4) Load 19(ballot)
+            1498:32(float16_t) GroupNonUniformFMin 177 PartitionedReduceNV 1496 1497
+            1499:    155(ptr) AccessChain 37(data) 1494 154 40
+                              Store 1499 1498
+            1500:      6(int) Load 8(invocation)
+            1501:    160(ptr) AccessChain 37(data) 45 154
+            1502: 33(f16vec4) Load 1501
+            1503:159(f16vec2) VectorShuffle 1502 1502 0 1
+            1504:   17(ivec4) Load 19(ballot)
+            1505:159(f16vec2) GroupNonUniformFMin 177 PartitionedReduceNV 1503 1504
+            1506:    160(ptr) AccessChain 37(data) 1500 154
+            1507: 33(f16vec4) Load 1506
+            1508: 33(f16vec4) VectorShuffle 1507 1505 4 5 2 3
+                              Store 1506 1508
+            1509:      6(int) Load 8(invocation)
+            1510:    160(ptr) AccessChain 37(data) 52 154
+            1511: 33(f16vec4) Load 1510
+            1512:165(f16vec3) VectorShuffle 1511 1511 0 1 2
+            1513:   17(ivec4) Load 19(ballot)
+            1514:165(f16vec3) GroupNonUniformFMin 177 PartitionedReduceNV 1512 1513
+            1515:    160(ptr) AccessChain 37(data) 1509 154
+            1516: 33(f16vec4) Load 1515
+            1517: 33(f16vec4) VectorShuffle 1516 1514 4 5 6 3
+                              Store 1515 1517
+            1518:      6(int) Load 8(invocation)
+            1519:    160(ptr) AccessChain 37(data) 58 154
+            1520: 33(f16vec4) Load 1519
+            1521:   17(ivec4) Load 19(ballot)
+            1522: 33(f16vec4) GroupNonUniformFMin 177 PartitionedReduceNV 1520 1521
+            1523:    160(ptr) AccessChain 37(data) 1518 154
+                              Store 1523 1522
+            1524:      6(int) Load 8(invocation)
+            1525:    155(ptr) AccessChain 37(data) 39 154 40
+            1526:32(float16_t) Load 1525
+            1527:   17(ivec4) Load 19(ballot)
+            1528:32(float16_t) GroupNonUniformFMax 177 PartitionedReduceNV 1526 1527
+            1529:    155(ptr) AccessChain 37(data) 1524 154 40
+                              Store 1529 1528
+            1530:      6(int) Load 8(invocation)
+            1531:    160(ptr) AccessChain 37(data) 45 154
+            1532: 33(f16vec4) Load 1531
+            1533:159(f16vec2) VectorShuffle 1532 1532 0 1
+            1534:   17(ivec4) Load 19(ballot)
+            1535:159(f16vec2) GroupNonUniformFMax 177 PartitionedReduceNV 1533 1534
+            1536:    160(ptr) AccessChain 37(data) 1530 154
+            1537: 33(f16vec4) Load 1536
+            1538: 33(f16vec4) VectorShuffle 1537 1535 4 5 2 3
+                              Store 1536 1538
+            1539:      6(int) Load 8(invocation)
+            1540:    160(ptr) AccessChain 37(data) 52 154
+            1541: 33(f16vec4) Load 1540
+            1542:165(f16vec3) VectorShuffle 1541 1541 0 1 2
+            1543:   17(ivec4) Load 19(ballot)
+            1544:165(f16vec3) GroupNonUniformFMax 177 PartitionedReduceNV 1542 1543
+            1545:    160(ptr) AccessChain 37(data) 1539 154
+            1546: 33(f16vec4) Load 1545
+            1547: 33(f16vec4) VectorShuffle 1546 1544 4 5 6 3
+                              Store 1545 1547
+            1548:      6(int) Load 8(invocation)
+            1549:    160(ptr) AccessChain 37(data) 58 154
+            1550: 33(f16vec4) Load 1549
+            1551:   17(ivec4) Load 19(ballot)
+            1552: 33(f16vec4) GroupNonUniformFMax 177 PartitionedReduceNV 1550 1551
+            1553:    160(ptr) AccessChain 37(data) 1548 154
+                              Store 1553 1552
+                              Return
+                              FunctionEnd
diff --git a/Test/baseResults/spv.subgroupExtendedTypesPartitionedNeg.comp.out b/Test/baseResults/spv.subgroupExtendedTypesPartitionedNeg.comp.out
new file mode 100644
index 0000000..c029617
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesPartitionedNeg.comp.out
@@ -0,0 +1,217 @@
+spv.subgroupExtendedTypesPartitionedNeg.comp
+ERROR: 0:27: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:28: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:29: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:30: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:32: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:33: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:34: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:35: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:37: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:38: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:39: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:40: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:42: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:43: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:44: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:45: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:47: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:48: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:49: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:50: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:52: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:53: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:54: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:55: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:57: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:58: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:59: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:60: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:62: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:63: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:64: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:65: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:67: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:68: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:69: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:70: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:72: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:73: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:74: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:75: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:77: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:78: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:79: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:80: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:82: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:83: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:84: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:85: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:87: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:88: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:89: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:90: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:92: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:93: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:94: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:95: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:97: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:98: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:99: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:100: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:102: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:103: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:104: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:105: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:107: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:108: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:109: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:110: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:112: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:113: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:114: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:115: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:117: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:118: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:119: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:120: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:122: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:123: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:124: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:125: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:127: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:128: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:129: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:130: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:132: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:133: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:134: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:135: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:137: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:138: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:139: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:140: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:142: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:143: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:144: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:145: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:147: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:148: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:149: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:150: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:152: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:153: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:154: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:155: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:157: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:158: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:159: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:160: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:162: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:163: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:164: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:165: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:167: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:168: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:169: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:170: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:172: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:173: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:174: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:175: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:177: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:178: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:179: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:180: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:182: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:183: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:184: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:185: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:187: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:188: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:189: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:190: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:192: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:193: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:194: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:195: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:197: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:198: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:199: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:200: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:202: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:203: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:204: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:205: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:207: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:208: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:209: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:210: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:212: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:213: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:214: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:215: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:217: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:218: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:219: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:220: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:222: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:223: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:224: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:225: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:227: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:228: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:229: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:230: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:232: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:233: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:234: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:235: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:237: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:238: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:239: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:240: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:242: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:243: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:244: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:245: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:247: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:248: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:249: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:250: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:252: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:253: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:254: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:255: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:257: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:258: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:259: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:260: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:262: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:263: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:264: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:265: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:267: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:268: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:269: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:270: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:272: ' temp highp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:273: ' temp highp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:274: ' temp highp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:275: 'layout( column_major std430) buffer highp 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:277: ' temp highp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:278: ' temp highp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:279: ' temp highp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:280: 'layout( column_major std430) buffer highp 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:282: ' temp highp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:283: ' temp highp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:284: ' temp highp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:285: 'layout( column_major std430) buffer highp 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:287: ' temp highp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:288: ' temp highp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:289: ' temp highp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:290: 'layout( column_major std430) buffer highp 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 212 compilation errors.  No code generated.
+
+
+SPIR-V is not generated for failed compile or link
diff --git a/Test/baseResults/spv.subgroupExtendedTypesQuad.comp.out b/Test/baseResults/spv.subgroupExtendedTypesQuad.comp.out
new file mode 100644
index 0000000..7d37a41
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesQuad.comp.out
@@ -0,0 +1,981 @@
+spv.subgroupExtendedTypesQuad.comp
+// Module Version 10300
+// Generated by (magic number): 80007
+// Id's are bound by 806
+
+                              Capability Shader
+                              Capability Float16
+                              Capability Int64
+                              Capability Int16
+                              Capability Int8
+                              Capability GroupNonUniform
+                              Capability GroupNonUniformQuad
+                              Capability StorageUniformBufferBlock16
+                              Capability StorageBuffer8BitAccess
+                              Extension  "SPV_KHR_8bit_storage"
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint GLCompute 4  "main" 10 12
+                              ExecutionMode 4 LocalSize 8 1 1
+                              Source GLSL 450
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_float16"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int16"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int64"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int8"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_float16"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int16"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int64"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int8"
+                              SourceExtension  "GL_KHR_shader_subgroup_basic"
+                              SourceExtension  "GL_KHR_shader_subgroup_quad"
+                              Name 4  "main"
+                              Name 8  "invocation"
+                              Name 10  "gl_SubgroupInvocationID"
+                              Name 12  "gl_SubgroupSize"
+                              Name 31  "Buffers"
+                              MemberName 31(Buffers) 0  "i8"
+                              MemberName 31(Buffers) 1  "u8"
+                              MemberName 31(Buffers) 2  "i16"
+                              MemberName 31(Buffers) 3  "u16"
+                              MemberName 31(Buffers) 4  "i64"
+                              MemberName 31(Buffers) 5  "u64"
+                              MemberName 31(Buffers) 6  "f16"
+                              Name 34  "data"
+                              Decorate 10(gl_SubgroupInvocationID) RelaxedPrecision
+                              Decorate 10(gl_SubgroupInvocationID) BuiltIn SubgroupLocalInvocationId
+                              Decorate 11 RelaxedPrecision
+                              Decorate 12(gl_SubgroupSize) RelaxedPrecision
+                              Decorate 12(gl_SubgroupSize) BuiltIn SubgroupSize
+                              Decorate 13 RelaxedPrecision
+                              Decorate 14 RelaxedPrecision
+                              Decorate 16 RelaxedPrecision
+                              MemberDecorate 31(Buffers) 0 Offset 0
+                              MemberDecorate 31(Buffers) 1 Offset 4
+                              MemberDecorate 31(Buffers) 2 Offset 8
+                              MemberDecorate 31(Buffers) 3 Offset 16
+                              MemberDecorate 31(Buffers) 4 Offset 32
+                              MemberDecorate 31(Buffers) 5 Offset 64
+                              MemberDecorate 31(Buffers) 6 Offset 96
+                              Decorate 31(Buffers) Block
+                              Decorate 34(data) DescriptorSet 0
+                              Decorate 34(data) Binding 0
+                              Decorate 805 BuiltIn WorkgroupSize
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeInt 32 0
+               7:             TypePointer Function 6(int)
+               9:             TypePointer Input 6(int)
+10(gl_SubgroupInvocationID):      9(ptr) Variable Input
+12(gl_SubgroupSize):      9(ptr) Variable Input
+              15:      6(int) Constant 4
+              17:             TypeInt 8 1
+              18:             TypeVector 17(int8_t) 4
+              19:             TypeInt 8 0
+              20:             TypeVector 19(int8_t) 4
+              21:             TypeInt 16 1
+              22:             TypeVector 21(int16_t) 4
+              23:             TypeInt 16 0
+              24:             TypeVector 23(int16_t) 4
+              25:             TypeInt 64 1
+              26:             TypeVector 25(int64_t) 4
+              27:             TypeInt 64 0
+              28:             TypeVector 27(int64_t) 4
+              29:             TypeFloat 16
+              30:             TypeVector 29(float16_t) 4
+     31(Buffers):             TypeStruct 18(i8vec4) 20(i8vec4) 22(i16vec4) 24(i16vec4) 26(i64vec4) 28(i64vec4) 30(f16vec4)
+              32:             TypeArray 31(Buffers) 15
+              33:             TypePointer StorageBuffer 32
+        34(data):     33(ptr) Variable StorageBuffer
+              36:             TypeInt 32 1
+              37:     36(int) Constant 0
+              38:      6(int) Constant 0
+              39:             TypePointer StorageBuffer 17(int8_t)
+              42:      6(int) Constant 1
+              43:      6(int) Constant 3
+              47:     36(int) Constant 1
+              48:             TypeVector 17(int8_t) 2
+              49:             TypePointer StorageBuffer 18(i8vec4)
+              58:     36(int) Constant 2
+              59:             TypeVector 17(int8_t) 3
+              68:     36(int) Constant 3
+             128:      6(int) Constant 2
+             153:             TypePointer StorageBuffer 19(int8_t)
+             159:             TypeVector 19(int8_t) 2
+             160:             TypePointer StorageBuffer 20(i8vec4)
+             169:             TypeVector 19(int8_t) 3
+             261:             TypePointer StorageBuffer 21(int16_t)
+             267:             TypeVector 21(int16_t) 2
+             268:             TypePointer StorageBuffer 22(i16vec4)
+             277:             TypeVector 21(int16_t) 3
+             369:             TypePointer StorageBuffer 23(int16_t)
+             375:             TypeVector 23(int16_t) 2
+             376:             TypePointer StorageBuffer 24(i16vec4)
+             385:             TypeVector 23(int16_t) 3
+             477:     36(int) Constant 4
+             478:             TypePointer StorageBuffer 25(int64_t)
+             484:             TypeVector 25(int64_t) 2
+             485:             TypePointer StorageBuffer 26(i64vec4)
+             494:             TypeVector 25(int64_t) 3
+             586:     36(int) Constant 5
+             587:             TypePointer StorageBuffer 27(int64_t)
+             593:             TypeVector 27(int64_t) 2
+             594:             TypePointer StorageBuffer 28(i64vec4)
+             603:             TypeVector 27(int64_t) 3
+             695:     36(int) Constant 6
+             696:             TypePointer StorageBuffer 29(float16_t)
+             702:             TypeVector 29(float16_t) 2
+             703:             TypePointer StorageBuffer 30(f16vec4)
+             712:             TypeVector 29(float16_t) 3
+             803:             TypeVector 6(int) 3
+             804:      6(int) Constant 8
+             805:  803(ivec3) ConstantComposite 804 42 42
+         4(main):           2 Function None 3
+               5:             Label
+   8(invocation):      7(ptr) Variable Function
+              11:      6(int) Load 10(gl_SubgroupInvocationID)
+              13:      6(int) Load 12(gl_SubgroupSize)
+              14:      6(int) IAdd 11 13
+              16:      6(int) UMod 14 15
+                              Store 8(invocation) 16
+              35:      6(int) Load 8(invocation)
+              40:     39(ptr) AccessChain 34(data) 37 37 38
+              41:  17(int8_t) Load 40
+              44:  17(int8_t) GroupNonUniformQuadBroadcast 43 41 42
+              45:     39(ptr) AccessChain 34(data) 35 37 38
+                              Store 45 44
+              46:      6(int) Load 8(invocation)
+              50:     49(ptr) AccessChain 34(data) 47 37
+              51:  18(i8vec4) Load 50
+              52:  48(i8vec2) VectorShuffle 51 51 0 1
+              53:  48(i8vec2) GroupNonUniformQuadBroadcast 43 52 42
+              54:     49(ptr) AccessChain 34(data) 46 37
+              55:  18(i8vec4) Load 54
+              56:  18(i8vec4) VectorShuffle 55 53 4 5 2 3
+                              Store 54 56
+              57:      6(int) Load 8(invocation)
+              60:     49(ptr) AccessChain 34(data) 58 37
+              61:  18(i8vec4) Load 60
+              62:  59(i8vec3) VectorShuffle 61 61 0 1 2
+              63:  59(i8vec3) GroupNonUniformQuadBroadcast 43 62 42
+              64:     49(ptr) AccessChain 34(data) 57 37
+              65:  18(i8vec4) Load 64
+              66:  18(i8vec4) VectorShuffle 65 63 4 5 6 3
+                              Store 64 66
+              67:      6(int) Load 8(invocation)
+              69:     49(ptr) AccessChain 34(data) 68 37
+              70:  18(i8vec4) Load 69
+              71:  18(i8vec4) GroupNonUniformQuadBroadcast 43 70 42
+              72:     49(ptr) AccessChain 34(data) 67 37
+                              Store 72 71
+              73:      6(int) Load 8(invocation)
+              74:     39(ptr) AccessChain 34(data) 37 37 38
+              75:  17(int8_t) Load 74
+              76:  17(int8_t) GroupNonUniformQuadSwap 43 75 38
+              77:     39(ptr) AccessChain 34(data) 73 37 38
+                              Store 77 76
+              78:      6(int) Load 8(invocation)
+              79:     49(ptr) AccessChain 34(data) 47 37
+              80:  18(i8vec4) Load 79
+              81:  48(i8vec2) VectorShuffle 80 80 0 1
+              82:  48(i8vec2) GroupNonUniformQuadSwap 43 81 38
+              83:     49(ptr) AccessChain 34(data) 78 37
+              84:  18(i8vec4) Load 83
+              85:  18(i8vec4) VectorShuffle 84 82 4 5 2 3
+                              Store 83 85
+              86:      6(int) Load 8(invocation)
+              87:     49(ptr) AccessChain 34(data) 58 37
+              88:  18(i8vec4) Load 87
+              89:  59(i8vec3) VectorShuffle 88 88 0 1 2
+              90:  59(i8vec3) GroupNonUniformQuadSwap 43 89 38
+              91:     49(ptr) AccessChain 34(data) 86 37
+              92:  18(i8vec4) Load 91
+              93:  18(i8vec4) VectorShuffle 92 90 4 5 6 3
+                              Store 91 93
+              94:      6(int) Load 8(invocation)
+              95:     49(ptr) AccessChain 34(data) 68 37
+              96:  18(i8vec4) Load 95
+              97:  18(i8vec4) GroupNonUniformQuadSwap 43 96 38
+              98:     49(ptr) AccessChain 34(data) 94 37
+                              Store 98 97
+              99:      6(int) Load 8(invocation)
+             100:     39(ptr) AccessChain 34(data) 37 37 38
+             101:  17(int8_t) Load 100
+             102:  17(int8_t) GroupNonUniformQuadSwap 43 101 42
+             103:     39(ptr) AccessChain 34(data) 99 37 38
+                              Store 103 102
+             104:      6(int) Load 8(invocation)
+             105:     49(ptr) AccessChain 34(data) 47 37
+             106:  18(i8vec4) Load 105
+             107:  48(i8vec2) VectorShuffle 106 106 0 1
+             108:  48(i8vec2) GroupNonUniformQuadSwap 43 107 42
+             109:     49(ptr) AccessChain 34(data) 104 37
+             110:  18(i8vec4) Load 109
+             111:  18(i8vec4) VectorShuffle 110 108 4 5 2 3
+                              Store 109 111
+             112:      6(int) Load 8(invocation)
+             113:     49(ptr) AccessChain 34(data) 58 37
+             114:  18(i8vec4) Load 113
+             115:  59(i8vec3) VectorShuffle 114 114 0 1 2
+             116:  59(i8vec3) GroupNonUniformQuadSwap 43 115 42
+             117:     49(ptr) AccessChain 34(data) 112 37
+             118:  18(i8vec4) Load 117
+             119:  18(i8vec4) VectorShuffle 118 116 4 5 6 3
+                              Store 117 119
+             120:      6(int) Load 8(invocation)
+             121:     49(ptr) AccessChain 34(data) 68 37
+             122:  18(i8vec4) Load 121
+             123:  18(i8vec4) GroupNonUniformQuadSwap 43 122 42
+             124:     49(ptr) AccessChain 34(data) 120 37
+                              Store 124 123
+             125:      6(int) Load 8(invocation)
+             126:     39(ptr) AccessChain 34(data) 37 37 38
+             127:  17(int8_t) Load 126
+             129:  17(int8_t) GroupNonUniformQuadSwap 43 127 128
+             130:     39(ptr) AccessChain 34(data) 125 37 38
+                              Store 130 129
+             131:      6(int) Load 8(invocation)
+             132:     49(ptr) AccessChain 34(data) 47 37
+             133:  18(i8vec4) Load 132
+             134:  48(i8vec2) VectorShuffle 133 133 0 1
+             135:  48(i8vec2) GroupNonUniformQuadSwap 43 134 128
+             136:     49(ptr) AccessChain 34(data) 131 37
+             137:  18(i8vec4) Load 136
+             138:  18(i8vec4) VectorShuffle 137 135 4 5 2 3
+                              Store 136 138
+             139:      6(int) Load 8(invocation)
+             140:     49(ptr) AccessChain 34(data) 58 37
+             141:  18(i8vec4) Load 140
+             142:  59(i8vec3) VectorShuffle 141 141 0 1 2
+             143:  59(i8vec3) GroupNonUniformQuadSwap 43 142 128
+             144:     49(ptr) AccessChain 34(data) 139 37
+             145:  18(i8vec4) Load 144
+             146:  18(i8vec4) VectorShuffle 145 143 4 5 6 3
+                              Store 144 146
+             147:      6(int) Load 8(invocation)
+             148:     49(ptr) AccessChain 34(data) 68 37
+             149:  18(i8vec4) Load 148
+             150:  18(i8vec4) GroupNonUniformQuadSwap 43 149 128
+             151:     49(ptr) AccessChain 34(data) 147 37
+                              Store 151 150
+             152:      6(int) Load 8(invocation)
+             154:    153(ptr) AccessChain 34(data) 37 47 38
+             155:  19(int8_t) Load 154
+             156:  19(int8_t) GroupNonUniformQuadBroadcast 43 155 42
+             157:    153(ptr) AccessChain 34(data) 152 47 38
+                              Store 157 156
+             158:      6(int) Load 8(invocation)
+             161:    160(ptr) AccessChain 34(data) 47 47
+             162:  20(i8vec4) Load 161
+             163: 159(i8vec2) VectorShuffle 162 162 0 1
+             164: 159(i8vec2) GroupNonUniformQuadBroadcast 43 163 42
+             165:    160(ptr) AccessChain 34(data) 158 47
+             166:  20(i8vec4) Load 165
+             167:  20(i8vec4) VectorShuffle 166 164 4 5 2 3
+                              Store 165 167
+             168:      6(int) Load 8(invocation)
+             170:    160(ptr) AccessChain 34(data) 58 47
+             171:  20(i8vec4) Load 170
+             172: 169(i8vec3) VectorShuffle 171 171 0 1 2
+             173: 169(i8vec3) GroupNonUniformQuadBroadcast 43 172 42
+             174:    160(ptr) AccessChain 34(data) 168 47
+             175:  20(i8vec4) Load 174
+             176:  20(i8vec4) VectorShuffle 175 173 4 5 6 3
+                              Store 174 176
+             177:      6(int) Load 8(invocation)
+             178:    160(ptr) AccessChain 34(data) 68 47
+             179:  20(i8vec4) Load 178
+             180:  20(i8vec4) GroupNonUniformQuadBroadcast 43 179 42
+             181:    160(ptr) AccessChain 34(data) 177 47
+                              Store 181 180
+             182:      6(int) Load 8(invocation)
+             183:    153(ptr) AccessChain 34(data) 37 47 38
+             184:  19(int8_t) Load 183
+             185:  19(int8_t) GroupNonUniformQuadSwap 43 184 38
+             186:    153(ptr) AccessChain 34(data) 182 47 38
+                              Store 186 185
+             187:      6(int) Load 8(invocation)
+             188:    160(ptr) AccessChain 34(data) 47 47
+             189:  20(i8vec4) Load 188
+             190: 159(i8vec2) VectorShuffle 189 189 0 1
+             191: 159(i8vec2) GroupNonUniformQuadSwap 43 190 38
+             192:    160(ptr) AccessChain 34(data) 187 47
+             193:  20(i8vec4) Load 192
+             194:  20(i8vec4) VectorShuffle 193 191 4 5 2 3
+                              Store 192 194
+             195:      6(int) Load 8(invocation)
+             196:    160(ptr) AccessChain 34(data) 58 47
+             197:  20(i8vec4) Load 196
+             198: 169(i8vec3) VectorShuffle 197 197 0 1 2
+             199: 169(i8vec3) GroupNonUniformQuadSwap 43 198 38
+             200:    160(ptr) AccessChain 34(data) 195 47
+             201:  20(i8vec4) Load 200
+             202:  20(i8vec4) VectorShuffle 201 199 4 5 6 3
+                              Store 200 202
+             203:      6(int) Load 8(invocation)
+             204:    160(ptr) AccessChain 34(data) 68 47
+             205:  20(i8vec4) Load 204
+             206:  20(i8vec4) GroupNonUniformQuadSwap 43 205 38
+             207:    160(ptr) AccessChain 34(data) 203 47
+                              Store 207 206
+             208:      6(int) Load 8(invocation)
+             209:    153(ptr) AccessChain 34(data) 37 47 38
+             210:  19(int8_t) Load 209
+             211:  19(int8_t) GroupNonUniformQuadSwap 43 210 42
+             212:    153(ptr) AccessChain 34(data) 208 47 38
+                              Store 212 211
+             213:      6(int) Load 8(invocation)
+             214:    160(ptr) AccessChain 34(data) 47 47
+             215:  20(i8vec4) Load 214
+             216: 159(i8vec2) VectorShuffle 215 215 0 1
+             217: 159(i8vec2) GroupNonUniformQuadSwap 43 216 42
+             218:    160(ptr) AccessChain 34(data) 213 47
+             219:  20(i8vec4) Load 218
+             220:  20(i8vec4) VectorShuffle 219 217 4 5 2 3
+                              Store 218 220
+             221:      6(int) Load 8(invocation)
+             222:    160(ptr) AccessChain 34(data) 58 47
+             223:  20(i8vec4) Load 222
+             224: 169(i8vec3) VectorShuffle 223 223 0 1 2
+             225: 169(i8vec3) GroupNonUniformQuadSwap 43 224 42
+             226:    160(ptr) AccessChain 34(data) 221 47
+             227:  20(i8vec4) Load 226
+             228:  20(i8vec4) VectorShuffle 227 225 4 5 6 3
+                              Store 226 228
+             229:      6(int) Load 8(invocation)
+             230:    160(ptr) AccessChain 34(data) 68 47
+             231:  20(i8vec4) Load 230
+             232:  20(i8vec4) GroupNonUniformQuadSwap 43 231 42
+             233:    160(ptr) AccessChain 34(data) 229 47
+                              Store 233 232
+             234:      6(int) Load 8(invocation)
+             235:    153(ptr) AccessChain 34(data) 37 47 38
+             236:  19(int8_t) Load 235
+             237:  19(int8_t) GroupNonUniformQuadSwap 43 236 128
+             238:    153(ptr) AccessChain 34(data) 234 47 38
+                              Store 238 237
+             239:      6(int) Load 8(invocation)
+             240:    160(ptr) AccessChain 34(data) 47 47
+             241:  20(i8vec4) Load 240
+             242: 159(i8vec2) VectorShuffle 241 241 0 1
+             243: 159(i8vec2) GroupNonUniformQuadSwap 43 242 128
+             244:    160(ptr) AccessChain 34(data) 239 47
+             245:  20(i8vec4) Load 244
+             246:  20(i8vec4) VectorShuffle 245 243 4 5 2 3
+                              Store 244 246
+             247:      6(int) Load 8(invocation)
+             248:    160(ptr) AccessChain 34(data) 58 47
+             249:  20(i8vec4) Load 248
+             250: 169(i8vec3) VectorShuffle 249 249 0 1 2
+             251: 169(i8vec3) GroupNonUniformQuadSwap 43 250 128
+             252:    160(ptr) AccessChain 34(data) 247 47
+             253:  20(i8vec4) Load 252
+             254:  20(i8vec4) VectorShuffle 253 251 4 5 6 3
+                              Store 252 254
+             255:      6(int) Load 8(invocation)
+             256:    160(ptr) AccessChain 34(data) 68 47
+             257:  20(i8vec4) Load 256
+             258:  20(i8vec4) GroupNonUniformQuadSwap 43 257 128
+             259:    160(ptr) AccessChain 34(data) 255 47
+                              Store 259 258
+             260:      6(int) Load 8(invocation)
+             262:    261(ptr) AccessChain 34(data) 37 58 38
+             263: 21(int16_t) Load 262
+             264: 21(int16_t) GroupNonUniformQuadBroadcast 43 263 42
+             265:    261(ptr) AccessChain 34(data) 260 58 38
+                              Store 265 264
+             266:      6(int) Load 8(invocation)
+             269:    268(ptr) AccessChain 34(data) 47 58
+             270: 22(i16vec4) Load 269
+             271:267(i16vec2) VectorShuffle 270 270 0 1
+             272:267(i16vec2) GroupNonUniformQuadBroadcast 43 271 42
+             273:    268(ptr) AccessChain 34(data) 266 58
+             274: 22(i16vec4) Load 273
+             275: 22(i16vec4) VectorShuffle 274 272 4 5 2 3
+                              Store 273 275
+             276:      6(int) Load 8(invocation)
+             278:    268(ptr) AccessChain 34(data) 58 58
+             279: 22(i16vec4) Load 278
+             280:277(i16vec3) VectorShuffle 279 279 0 1 2
+             281:277(i16vec3) GroupNonUniformQuadBroadcast 43 280 42
+             282:    268(ptr) AccessChain 34(data) 276 58
+             283: 22(i16vec4) Load 282
+             284: 22(i16vec4) VectorShuffle 283 281 4 5 6 3
+                              Store 282 284
+             285:      6(int) Load 8(invocation)
+             286:    268(ptr) AccessChain 34(data) 68 58
+             287: 22(i16vec4) Load 286
+             288: 22(i16vec4) GroupNonUniformQuadBroadcast 43 287 42
+             289:    268(ptr) AccessChain 34(data) 285 58
+                              Store 289 288
+             290:      6(int) Load 8(invocation)
+             291:    261(ptr) AccessChain 34(data) 37 58 38
+             292: 21(int16_t) Load 291
+             293: 21(int16_t) GroupNonUniformQuadSwap 43 292 38
+             294:    261(ptr) AccessChain 34(data) 290 58 38
+                              Store 294 293
+             295:      6(int) Load 8(invocation)
+             296:    268(ptr) AccessChain 34(data) 47 58
+             297: 22(i16vec4) Load 296
+             298:267(i16vec2) VectorShuffle 297 297 0 1
+             299:267(i16vec2) GroupNonUniformQuadSwap 43 298 38
+             300:    268(ptr) AccessChain 34(data) 295 58
+             301: 22(i16vec4) Load 300
+             302: 22(i16vec4) VectorShuffle 301 299 4 5 2 3
+                              Store 300 302
+             303:      6(int) Load 8(invocation)
+             304:    268(ptr) AccessChain 34(data) 58 58
+             305: 22(i16vec4) Load 304
+             306:277(i16vec3) VectorShuffle 305 305 0 1 2
+             307:277(i16vec3) GroupNonUniformQuadSwap 43 306 38
+             308:    268(ptr) AccessChain 34(data) 303 58
+             309: 22(i16vec4) Load 308
+             310: 22(i16vec4) VectorShuffle 309 307 4 5 6 3
+                              Store 308 310
+             311:      6(int) Load 8(invocation)
+             312:    268(ptr) AccessChain 34(data) 68 58
+             313: 22(i16vec4) Load 312
+             314: 22(i16vec4) GroupNonUniformQuadSwap 43 313 38
+             315:    268(ptr) AccessChain 34(data) 311 58
+                              Store 315 314
+             316:      6(int) Load 8(invocation)
+             317:    261(ptr) AccessChain 34(data) 37 58 38
+             318: 21(int16_t) Load 317
+             319: 21(int16_t) GroupNonUniformQuadSwap 43 318 42
+             320:    261(ptr) AccessChain 34(data) 316 58 38
+                              Store 320 319
+             321:      6(int) Load 8(invocation)
+             322:    268(ptr) AccessChain 34(data) 47 58
+             323: 22(i16vec4) Load 322
+             324:267(i16vec2) VectorShuffle 323 323 0 1
+             325:267(i16vec2) GroupNonUniformQuadSwap 43 324 42
+             326:    268(ptr) AccessChain 34(data) 321 58
+             327: 22(i16vec4) Load 326
+             328: 22(i16vec4) VectorShuffle 327 325 4 5 2 3
+                              Store 326 328
+             329:      6(int) Load 8(invocation)
+             330:    268(ptr) AccessChain 34(data) 58 58
+             331: 22(i16vec4) Load 330
+             332:277(i16vec3) VectorShuffle 331 331 0 1 2
+             333:277(i16vec3) GroupNonUniformQuadSwap 43 332 42
+             334:    268(ptr) AccessChain 34(data) 329 58
+             335: 22(i16vec4) Load 334
+             336: 22(i16vec4) VectorShuffle 335 333 4 5 6 3
+                              Store 334 336
+             337:      6(int) Load 8(invocation)
+             338:    268(ptr) AccessChain 34(data) 68 58
+             339: 22(i16vec4) Load 338
+             340: 22(i16vec4) GroupNonUniformQuadSwap 43 339 42
+             341:    268(ptr) AccessChain 34(data) 337 58
+                              Store 341 340
+             342:      6(int) Load 8(invocation)
+             343:    261(ptr) AccessChain 34(data) 37 58 38
+             344: 21(int16_t) Load 343
+             345: 21(int16_t) GroupNonUniformQuadSwap 43 344 128
+             346:    261(ptr) AccessChain 34(data) 342 58 38
+                              Store 346 345
+             347:      6(int) Load 8(invocation)
+             348:    268(ptr) AccessChain 34(data) 47 58
+             349: 22(i16vec4) Load 348
+             350:267(i16vec2) VectorShuffle 349 349 0 1
+             351:267(i16vec2) GroupNonUniformQuadSwap 43 350 128
+             352:    268(ptr) AccessChain 34(data) 347 58
+             353: 22(i16vec4) Load 352
+             354: 22(i16vec4) VectorShuffle 353 351 4 5 2 3
+                              Store 352 354
+             355:      6(int) Load 8(invocation)
+             356:    268(ptr) AccessChain 34(data) 58 58
+             357: 22(i16vec4) Load 356
+             358:277(i16vec3) VectorShuffle 357 357 0 1 2
+             359:277(i16vec3) GroupNonUniformQuadSwap 43 358 128
+             360:    268(ptr) AccessChain 34(data) 355 58
+             361: 22(i16vec4) Load 360
+             362: 22(i16vec4) VectorShuffle 361 359 4 5 6 3
+                              Store 360 362
+             363:      6(int) Load 8(invocation)
+             364:    268(ptr) AccessChain 34(data) 68 58
+             365: 22(i16vec4) Load 364
+             366: 22(i16vec4) GroupNonUniformQuadSwap 43 365 128
+             367:    268(ptr) AccessChain 34(data) 363 58
+                              Store 367 366
+             368:      6(int) Load 8(invocation)
+             370:    369(ptr) AccessChain 34(data) 37 68 38
+             371: 23(int16_t) Load 370
+             372: 23(int16_t) GroupNonUniformQuadBroadcast 43 371 42
+             373:    369(ptr) AccessChain 34(data) 368 68 38
+                              Store 373 372
+             374:      6(int) Load 8(invocation)
+             377:    376(ptr) AccessChain 34(data) 47 68
+             378: 24(i16vec4) Load 377
+             379:375(i16vec2) VectorShuffle 378 378 0 1
+             380:375(i16vec2) GroupNonUniformQuadBroadcast 43 379 42
+             381:    376(ptr) AccessChain 34(data) 374 68
+             382: 24(i16vec4) Load 381
+             383: 24(i16vec4) VectorShuffle 382 380 4 5 2 3
+                              Store 381 383
+             384:      6(int) Load 8(invocation)
+             386:    376(ptr) AccessChain 34(data) 58 68
+             387: 24(i16vec4) Load 386
+             388:385(i16vec3) VectorShuffle 387 387 0 1 2
+             389:385(i16vec3) GroupNonUniformQuadBroadcast 43 388 42
+             390:    376(ptr) AccessChain 34(data) 384 68
+             391: 24(i16vec4) Load 390
+             392: 24(i16vec4) VectorShuffle 391 389 4 5 6 3
+                              Store 390 392
+             393:      6(int) Load 8(invocation)
+             394:    376(ptr) AccessChain 34(data) 68 68
+             395: 24(i16vec4) Load 394
+             396: 24(i16vec4) GroupNonUniformQuadBroadcast 43 395 42
+             397:    376(ptr) AccessChain 34(data) 393 68
+                              Store 397 396
+             398:      6(int) Load 8(invocation)
+             399:    369(ptr) AccessChain 34(data) 37 68 38
+             400: 23(int16_t) Load 399
+             401: 23(int16_t) GroupNonUniformQuadSwap 43 400 38
+             402:    369(ptr) AccessChain 34(data) 398 68 38
+                              Store 402 401
+             403:      6(int) Load 8(invocation)
+             404:    376(ptr) AccessChain 34(data) 47 68
+             405: 24(i16vec4) Load 404
+             406:375(i16vec2) VectorShuffle 405 405 0 1
+             407:375(i16vec2) GroupNonUniformQuadSwap 43 406 38
+             408:    376(ptr) AccessChain 34(data) 403 68
+             409: 24(i16vec4) Load 408
+             410: 24(i16vec4) VectorShuffle 409 407 4 5 2 3
+                              Store 408 410
+             411:      6(int) Load 8(invocation)
+             412:    376(ptr) AccessChain 34(data) 58 68
+             413: 24(i16vec4) Load 412
+             414:385(i16vec3) VectorShuffle 413 413 0 1 2
+             415:385(i16vec3) GroupNonUniformQuadSwap 43 414 38
+             416:    376(ptr) AccessChain 34(data) 411 68
+             417: 24(i16vec4) Load 416
+             418: 24(i16vec4) VectorShuffle 417 415 4 5 6 3
+                              Store 416 418
+             419:      6(int) Load 8(invocation)
+             420:    376(ptr) AccessChain 34(data) 68 68
+             421: 24(i16vec4) Load 420
+             422: 24(i16vec4) GroupNonUniformQuadSwap 43 421 38
+             423:    376(ptr) AccessChain 34(data) 419 68
+                              Store 423 422
+             424:      6(int) Load 8(invocation)
+             425:    369(ptr) AccessChain 34(data) 37 68 38
+             426: 23(int16_t) Load 425
+             427: 23(int16_t) GroupNonUniformQuadSwap 43 426 42
+             428:    369(ptr) AccessChain 34(data) 424 68 38
+                              Store 428 427
+             429:      6(int) Load 8(invocation)
+             430:    376(ptr) AccessChain 34(data) 47 68
+             431: 24(i16vec4) Load 430
+             432:375(i16vec2) VectorShuffle 431 431 0 1
+             433:375(i16vec2) GroupNonUniformQuadSwap 43 432 42
+             434:    376(ptr) AccessChain 34(data) 429 68
+             435: 24(i16vec4) Load 434
+             436: 24(i16vec4) VectorShuffle 435 433 4 5 2 3
+                              Store 434 436
+             437:      6(int) Load 8(invocation)
+             438:    376(ptr) AccessChain 34(data) 58 68
+             439: 24(i16vec4) Load 438
+             440:385(i16vec3) VectorShuffle 439 439 0 1 2
+             441:385(i16vec3) GroupNonUniformQuadSwap 43 440 42
+             442:    376(ptr) AccessChain 34(data) 437 68
+             443: 24(i16vec4) Load 442
+             444: 24(i16vec4) VectorShuffle 443 441 4 5 6 3
+                              Store 442 444
+             445:      6(int) Load 8(invocation)
+             446:    376(ptr) AccessChain 34(data) 68 68
+             447: 24(i16vec4) Load 446
+             448: 24(i16vec4) GroupNonUniformQuadSwap 43 447 42
+             449:    376(ptr) AccessChain 34(data) 445 68
+                              Store 449 448
+             450:      6(int) Load 8(invocation)
+             451:    369(ptr) AccessChain 34(data) 37 68 38
+             452: 23(int16_t) Load 451
+             453: 23(int16_t) GroupNonUniformQuadSwap 43 452 128
+             454:    369(ptr) AccessChain 34(data) 450 68 38
+                              Store 454 453
+             455:      6(int) Load 8(invocation)
+             456:    376(ptr) AccessChain 34(data) 47 68
+             457: 24(i16vec4) Load 456
+             458:375(i16vec2) VectorShuffle 457 457 0 1
+             459:375(i16vec2) GroupNonUniformQuadSwap 43 458 128
+             460:    376(ptr) AccessChain 34(data) 455 68
+             461: 24(i16vec4) Load 460
+             462: 24(i16vec4) VectorShuffle 461 459 4 5 2 3
+                              Store 460 462
+             463:      6(int) Load 8(invocation)
+             464:    376(ptr) AccessChain 34(data) 58 68
+             465: 24(i16vec4) Load 464
+             466:385(i16vec3) VectorShuffle 465 465 0 1 2
+             467:385(i16vec3) GroupNonUniformQuadSwap 43 466 128
+             468:    376(ptr) AccessChain 34(data) 463 68
+             469: 24(i16vec4) Load 468
+             470: 24(i16vec4) VectorShuffle 469 467 4 5 6 3
+                              Store 468 470
+             471:      6(int) Load 8(invocation)
+             472:    376(ptr) AccessChain 34(data) 68 68
+             473: 24(i16vec4) Load 472
+             474: 24(i16vec4) GroupNonUniformQuadSwap 43 473 128
+             475:    376(ptr) AccessChain 34(data) 471 68
+                              Store 475 474
+             476:      6(int) Load 8(invocation)
+             479:    478(ptr) AccessChain 34(data) 37 477 38
+             480: 25(int64_t) Load 479
+             481: 25(int64_t) GroupNonUniformQuadBroadcast 43 480 42
+             482:    478(ptr) AccessChain 34(data) 476 477 38
+                              Store 482 481
+             483:      6(int) Load 8(invocation)
+             486:    485(ptr) AccessChain 34(data) 47 477
+             487: 26(i64vec4) Load 486
+             488:484(i64vec2) VectorShuffle 487 487 0 1
+             489:484(i64vec2) GroupNonUniformQuadBroadcast 43 488 42
+             490:    485(ptr) AccessChain 34(data) 483 477
+             491: 26(i64vec4) Load 490
+             492: 26(i64vec4) VectorShuffle 491 489 4 5 2 3
+                              Store 490 492
+             493:      6(int) Load 8(invocation)
+             495:    485(ptr) AccessChain 34(data) 58 477
+             496: 26(i64vec4) Load 495
+             497:494(i64vec3) VectorShuffle 496 496 0 1 2
+             498:494(i64vec3) GroupNonUniformQuadBroadcast 43 497 42
+             499:    485(ptr) AccessChain 34(data) 493 477
+             500: 26(i64vec4) Load 499
+             501: 26(i64vec4) VectorShuffle 500 498 4 5 6 3
+                              Store 499 501
+             502:      6(int) Load 8(invocation)
+             503:    485(ptr) AccessChain 34(data) 68 477
+             504: 26(i64vec4) Load 503
+             505: 26(i64vec4) GroupNonUniformQuadBroadcast 43 504 42
+             506:    485(ptr) AccessChain 34(data) 502 477
+                              Store 506 505
+             507:      6(int) Load 8(invocation)
+             508:    478(ptr) AccessChain 34(data) 37 477 38
+             509: 25(int64_t) Load 508
+             510: 25(int64_t) GroupNonUniformQuadSwap 43 509 38
+             511:    478(ptr) AccessChain 34(data) 507 477 38
+                              Store 511 510
+             512:      6(int) Load 8(invocation)
+             513:    485(ptr) AccessChain 34(data) 47 477
+             514: 26(i64vec4) Load 513
+             515:484(i64vec2) VectorShuffle 514 514 0 1
+             516:484(i64vec2) GroupNonUniformQuadSwap 43 515 38
+             517:    485(ptr) AccessChain 34(data) 512 477
+             518: 26(i64vec4) Load 517
+             519: 26(i64vec4) VectorShuffle 518 516 4 5 2 3
+                              Store 517 519
+             520:      6(int) Load 8(invocation)
+             521:    485(ptr) AccessChain 34(data) 58 477
+             522: 26(i64vec4) Load 521
+             523:494(i64vec3) VectorShuffle 522 522 0 1 2
+             524:494(i64vec3) GroupNonUniformQuadSwap 43 523 38
+             525:    485(ptr) AccessChain 34(data) 520 477
+             526: 26(i64vec4) Load 525
+             527: 26(i64vec4) VectorShuffle 526 524 4 5 6 3
+                              Store 525 527
+             528:      6(int) Load 8(invocation)
+             529:    485(ptr) AccessChain 34(data) 68 477
+             530: 26(i64vec4) Load 529
+             531: 26(i64vec4) GroupNonUniformQuadSwap 43 530 38
+             532:    485(ptr) AccessChain 34(data) 528 477
+                              Store 532 531
+             533:      6(int) Load 8(invocation)
+             534:    478(ptr) AccessChain 34(data) 37 477 38
+             535: 25(int64_t) Load 534
+             536: 25(int64_t) GroupNonUniformQuadSwap 43 535 42
+             537:    478(ptr) AccessChain 34(data) 533 477 38
+                              Store 537 536
+             538:      6(int) Load 8(invocation)
+             539:    485(ptr) AccessChain 34(data) 47 477
+             540: 26(i64vec4) Load 539
+             541:484(i64vec2) VectorShuffle 540 540 0 1
+             542:484(i64vec2) GroupNonUniformQuadSwap 43 541 42
+             543:    485(ptr) AccessChain 34(data) 538 477
+             544: 26(i64vec4) Load 543
+             545: 26(i64vec4) VectorShuffle 544 542 4 5 2 3
+                              Store 543 545
+             546:      6(int) Load 8(invocation)
+             547:    485(ptr) AccessChain 34(data) 58 477
+             548: 26(i64vec4) Load 547
+             549:494(i64vec3) VectorShuffle 548 548 0 1 2
+             550:494(i64vec3) GroupNonUniformQuadSwap 43 549 42
+             551:    485(ptr) AccessChain 34(data) 546 477
+             552: 26(i64vec4) Load 551
+             553: 26(i64vec4) VectorShuffle 552 550 4 5 6 3
+                              Store 551 553
+             554:      6(int) Load 8(invocation)
+             555:    485(ptr) AccessChain 34(data) 68 477
+             556: 26(i64vec4) Load 555
+             557: 26(i64vec4) GroupNonUniformQuadSwap 43 556 42
+             558:    485(ptr) AccessChain 34(data) 554 477
+                              Store 558 557
+             559:      6(int) Load 8(invocation)
+             560:    478(ptr) AccessChain 34(data) 37 477 38
+             561: 25(int64_t) Load 560
+             562: 25(int64_t) GroupNonUniformQuadSwap 43 561 128
+             563:    478(ptr) AccessChain 34(data) 559 477 38
+                              Store 563 562
+             564:      6(int) Load 8(invocation)
+             565:    485(ptr) AccessChain 34(data) 47 477
+             566: 26(i64vec4) Load 565
+             567:484(i64vec2) VectorShuffle 566 566 0 1
+             568:484(i64vec2) GroupNonUniformQuadSwap 43 567 128
+             569:    485(ptr) AccessChain 34(data) 564 477
+             570: 26(i64vec4) Load 569
+             571: 26(i64vec4) VectorShuffle 570 568 4 5 2 3
+                              Store 569 571
+             572:      6(int) Load 8(invocation)
+             573:    485(ptr) AccessChain 34(data) 58 477
+             574: 26(i64vec4) Load 573
+             575:494(i64vec3) VectorShuffle 574 574 0 1 2
+             576:494(i64vec3) GroupNonUniformQuadSwap 43 575 128
+             577:    485(ptr) AccessChain 34(data) 572 477
+             578: 26(i64vec4) Load 577
+             579: 26(i64vec4) VectorShuffle 578 576 4 5 6 3
+                              Store 577 579
+             580:      6(int) Load 8(invocation)
+             581:    485(ptr) AccessChain 34(data) 68 477
+             582: 26(i64vec4) Load 581
+             583: 26(i64vec4) GroupNonUniformQuadSwap 43 582 128
+             584:    485(ptr) AccessChain 34(data) 580 477
+                              Store 584 583
+             585:      6(int) Load 8(invocation)
+             588:    587(ptr) AccessChain 34(data) 37 586 38
+             589: 27(int64_t) Load 588
+             590: 27(int64_t) GroupNonUniformQuadBroadcast 43 589 42
+             591:    587(ptr) AccessChain 34(data) 585 586 38
+                              Store 591 590
+             592:      6(int) Load 8(invocation)
+             595:    594(ptr) AccessChain 34(data) 47 586
+             596: 28(i64vec4) Load 595
+             597:593(i64vec2) VectorShuffle 596 596 0 1
+             598:593(i64vec2) GroupNonUniformQuadBroadcast 43 597 42
+             599:    594(ptr) AccessChain 34(data) 592 586
+             600: 28(i64vec4) Load 599
+             601: 28(i64vec4) VectorShuffle 600 598 4 5 2 3
+                              Store 599 601
+             602:      6(int) Load 8(invocation)
+             604:    594(ptr) AccessChain 34(data) 58 586
+             605: 28(i64vec4) Load 604
+             606:603(i64vec3) VectorShuffle 605 605 0 1 2
+             607:603(i64vec3) GroupNonUniformQuadBroadcast 43 606 42
+             608:    594(ptr) AccessChain 34(data) 602 586
+             609: 28(i64vec4) Load 608
+             610: 28(i64vec4) VectorShuffle 609 607 4 5 6 3
+                              Store 608 610
+             611:      6(int) Load 8(invocation)
+             612:    594(ptr) AccessChain 34(data) 68 586
+             613: 28(i64vec4) Load 612
+             614: 28(i64vec4) GroupNonUniformQuadBroadcast 43 613 42
+             615:    594(ptr) AccessChain 34(data) 611 586
+                              Store 615 614
+             616:      6(int) Load 8(invocation)
+             617:    587(ptr) AccessChain 34(data) 37 586 38
+             618: 27(int64_t) Load 617
+             619: 27(int64_t) GroupNonUniformQuadSwap 43 618 38
+             620:    587(ptr) AccessChain 34(data) 616 586 38
+                              Store 620 619
+             621:      6(int) Load 8(invocation)
+             622:    594(ptr) AccessChain 34(data) 47 586
+             623: 28(i64vec4) Load 622
+             624:593(i64vec2) VectorShuffle 623 623 0 1
+             625:593(i64vec2) GroupNonUniformQuadSwap 43 624 38
+             626:    594(ptr) AccessChain 34(data) 621 586
+             627: 28(i64vec4) Load 626
+             628: 28(i64vec4) VectorShuffle 627 625 4 5 2 3
+                              Store 626 628
+             629:      6(int) Load 8(invocation)
+             630:    594(ptr) AccessChain 34(data) 58 586
+             631: 28(i64vec4) Load 630
+             632:603(i64vec3) VectorShuffle 631 631 0 1 2
+             633:603(i64vec3) GroupNonUniformQuadSwap 43 632 38
+             634:    594(ptr) AccessChain 34(data) 629 586
+             635: 28(i64vec4) Load 634
+             636: 28(i64vec4) VectorShuffle 635 633 4 5 6 3
+                              Store 634 636
+             637:      6(int) Load 8(invocation)
+             638:    594(ptr) AccessChain 34(data) 68 586
+             639: 28(i64vec4) Load 638
+             640: 28(i64vec4) GroupNonUniformQuadSwap 43 639 38
+             641:    594(ptr) AccessChain 34(data) 637 586
+                              Store 641 640
+             642:      6(int) Load 8(invocation)
+             643:    587(ptr) AccessChain 34(data) 37 586 38
+             644: 27(int64_t) Load 643
+             645: 27(int64_t) GroupNonUniformQuadSwap 43 644 42
+             646:    587(ptr) AccessChain 34(data) 642 586 38
+                              Store 646 645
+             647:      6(int) Load 8(invocation)
+             648:    594(ptr) AccessChain 34(data) 47 586
+             649: 28(i64vec4) Load 648
+             650:593(i64vec2) VectorShuffle 649 649 0 1
+             651:593(i64vec2) GroupNonUniformQuadSwap 43 650 42
+             652:    594(ptr) AccessChain 34(data) 647 586
+             653: 28(i64vec4) Load 652
+             654: 28(i64vec4) VectorShuffle 653 651 4 5 2 3
+                              Store 652 654
+             655:      6(int) Load 8(invocation)
+             656:    594(ptr) AccessChain 34(data) 58 586
+             657: 28(i64vec4) Load 656
+             658:603(i64vec3) VectorShuffle 657 657 0 1 2
+             659:603(i64vec3) GroupNonUniformQuadSwap 43 658 42
+             660:    594(ptr) AccessChain 34(data) 655 586
+             661: 28(i64vec4) Load 660
+             662: 28(i64vec4) VectorShuffle 661 659 4 5 6 3
+                              Store 660 662
+             663:      6(int) Load 8(invocation)
+             664:    594(ptr) AccessChain 34(data) 68 586
+             665: 28(i64vec4) Load 664
+             666: 28(i64vec4) GroupNonUniformQuadSwap 43 665 42
+             667:    594(ptr) AccessChain 34(data) 663 586
+                              Store 667 666
+             668:      6(int) Load 8(invocation)
+             669:    587(ptr) AccessChain 34(data) 37 586 38
+             670: 27(int64_t) Load 669
+             671: 27(int64_t) GroupNonUniformQuadSwap 43 670 128
+             672:    587(ptr) AccessChain 34(data) 668 586 38
+                              Store 672 671
+             673:      6(int) Load 8(invocation)
+             674:    594(ptr) AccessChain 34(data) 47 586
+             675: 28(i64vec4) Load 674
+             676:593(i64vec2) VectorShuffle 675 675 0 1
+             677:593(i64vec2) GroupNonUniformQuadSwap 43 676 128
+             678:    594(ptr) AccessChain 34(data) 673 586
+             679: 28(i64vec4) Load 678
+             680: 28(i64vec4) VectorShuffle 679 677 4 5 2 3
+                              Store 678 680
+             681:      6(int) Load 8(invocation)
+             682:    594(ptr) AccessChain 34(data) 58 586
+             683: 28(i64vec4) Load 682
+             684:603(i64vec3) VectorShuffle 683 683 0 1 2
+             685:603(i64vec3) GroupNonUniformQuadSwap 43 684 128
+             686:    594(ptr) AccessChain 34(data) 681 586
+             687: 28(i64vec4) Load 686
+             688: 28(i64vec4) VectorShuffle 687 685 4 5 6 3
+                              Store 686 688
+             689:      6(int) Load 8(invocation)
+             690:    594(ptr) AccessChain 34(data) 68 586
+             691: 28(i64vec4) Load 690
+             692: 28(i64vec4) GroupNonUniformQuadSwap 43 691 128
+             693:    594(ptr) AccessChain 34(data) 689 586
+                              Store 693 692
+             694:      6(int) Load 8(invocation)
+             697:    696(ptr) AccessChain 34(data) 37 695 38
+             698:29(float16_t) Load 697
+             699:29(float16_t) GroupNonUniformQuadBroadcast 43 698 42
+             700:    696(ptr) AccessChain 34(data) 694 695 38
+                              Store 700 699
+             701:      6(int) Load 8(invocation)
+             704:    703(ptr) AccessChain 34(data) 47 695
+             705: 30(f16vec4) Load 704
+             706:702(f16vec2) VectorShuffle 705 705 0 1
+             707:702(f16vec2) GroupNonUniformQuadBroadcast 43 706 42
+             708:    703(ptr) AccessChain 34(data) 701 695
+             709: 30(f16vec4) Load 708
+             710: 30(f16vec4) VectorShuffle 709 707 4 5 2 3
+                              Store 708 710
+             711:      6(int) Load 8(invocation)
+             713:    703(ptr) AccessChain 34(data) 58 695
+             714: 30(f16vec4) Load 713
+             715:712(f16vec3) VectorShuffle 714 714 0 1 2
+             716:712(f16vec3) GroupNonUniformQuadBroadcast 43 715 42
+             717:    703(ptr) AccessChain 34(data) 711 695
+             718: 30(f16vec4) Load 717
+             719: 30(f16vec4) VectorShuffle 718 716 4 5 6 3
+                              Store 717 719
+             720:      6(int) Load 8(invocation)
+             721:    703(ptr) AccessChain 34(data) 68 695
+             722: 30(f16vec4) Load 721
+             723: 30(f16vec4) GroupNonUniformQuadBroadcast 43 722 42
+             724:    703(ptr) AccessChain 34(data) 720 695
+                              Store 724 723
+             725:      6(int) Load 8(invocation)
+             726:    696(ptr) AccessChain 34(data) 37 695 38
+             727:29(float16_t) Load 726
+             728:29(float16_t) GroupNonUniformQuadSwap 43 727 38
+             729:    696(ptr) AccessChain 34(data) 725 695 38
+                              Store 729 728
+             730:      6(int) Load 8(invocation)
+             731:    703(ptr) AccessChain 34(data) 47 695
+             732: 30(f16vec4) Load 731
+             733:702(f16vec2) VectorShuffle 732 732 0 1
+             734:702(f16vec2) GroupNonUniformQuadSwap 43 733 38
+             735:    703(ptr) AccessChain 34(data) 730 695
+             736: 30(f16vec4) Load 735
+             737: 30(f16vec4) VectorShuffle 736 734 4 5 2 3
+                              Store 735 737
+             738:      6(int) Load 8(invocation)
+             739:    703(ptr) AccessChain 34(data) 58 695
+             740: 30(f16vec4) Load 739
+             741:712(f16vec3) VectorShuffle 740 740 0 1 2
+             742:712(f16vec3) GroupNonUniformQuadSwap 43 741 38
+             743:    703(ptr) AccessChain 34(data) 738 695
+             744: 30(f16vec4) Load 743
+             745: 30(f16vec4) VectorShuffle 744 742 4 5 6 3
+                              Store 743 745
+             746:      6(int) Load 8(invocation)
+             747:    703(ptr) AccessChain 34(data) 68 695
+             748: 30(f16vec4) Load 747
+             749: 30(f16vec4) GroupNonUniformQuadSwap 43 748 38
+             750:    703(ptr) AccessChain 34(data) 746 695
+                              Store 750 749
+             751:      6(int) Load 8(invocation)
+             752:    696(ptr) AccessChain 34(data) 37 695 38
+             753:29(float16_t) Load 752
+             754:29(float16_t) GroupNonUniformQuadSwap 43 753 42
+             755:    696(ptr) AccessChain 34(data) 751 695 38
+                              Store 755 754
+             756:      6(int) Load 8(invocation)
+             757:    703(ptr) AccessChain 34(data) 47 695
+             758: 30(f16vec4) Load 757
+             759:702(f16vec2) VectorShuffle 758 758 0 1
+             760:702(f16vec2) GroupNonUniformQuadSwap 43 759 42
+             761:    703(ptr) AccessChain 34(data) 756 695
+             762: 30(f16vec4) Load 761
+             763: 30(f16vec4) VectorShuffle 762 760 4 5 2 3
+                              Store 761 763
+             764:      6(int) Load 8(invocation)
+             765:    703(ptr) AccessChain 34(data) 58 695
+             766: 30(f16vec4) Load 765
+             767:712(f16vec3) VectorShuffle 766 766 0 1 2
+             768:712(f16vec3) GroupNonUniformQuadSwap 43 767 42
+             769:    703(ptr) AccessChain 34(data) 764 695
+             770: 30(f16vec4) Load 769
+             771: 30(f16vec4) VectorShuffle 770 768 4 5 6 3
+                              Store 769 771
+             772:      6(int) Load 8(invocation)
+             773:    703(ptr) AccessChain 34(data) 68 695
+             774: 30(f16vec4) Load 773
+             775: 30(f16vec4) GroupNonUniformQuadSwap 43 774 42
+             776:    703(ptr) AccessChain 34(data) 772 695
+                              Store 776 775
+             777:      6(int) Load 8(invocation)
+             778:    696(ptr) AccessChain 34(data) 37 695 38
+             779:29(float16_t) Load 778
+             780:29(float16_t) GroupNonUniformQuadSwap 43 779 128
+             781:    696(ptr) AccessChain 34(data) 777 695 38
+                              Store 781 780
+             782:      6(int) Load 8(invocation)
+             783:    703(ptr) AccessChain 34(data) 47 695
+             784: 30(f16vec4) Load 783
+             785:702(f16vec2) VectorShuffle 784 784 0 1
+             786:702(f16vec2) GroupNonUniformQuadSwap 43 785 128
+             787:    703(ptr) AccessChain 34(data) 782 695
+             788: 30(f16vec4) Load 787
+             789: 30(f16vec4) VectorShuffle 788 786 4 5 2 3
+                              Store 787 789
+             790:      6(int) Load 8(invocation)
+             791:    703(ptr) AccessChain 34(data) 58 695
+             792: 30(f16vec4) Load 791
+             793:712(f16vec3) VectorShuffle 792 792 0 1 2
+             794:712(f16vec3) GroupNonUniformQuadSwap 43 793 128
+             795:    703(ptr) AccessChain 34(data) 790 695
+             796: 30(f16vec4) Load 795
+             797: 30(f16vec4) VectorShuffle 796 794 4 5 6 3
+                              Store 795 797
+             798:      6(int) Load 8(invocation)
+             799:    703(ptr) AccessChain 34(data) 68 695
+             800: 30(f16vec4) Load 799
+             801: 30(f16vec4) GroupNonUniformQuadSwap 43 800 128
+             802:    703(ptr) AccessChain 34(data) 798 695
+                              Store 802 801
+                              Return
+                              FunctionEnd
diff --git a/Test/baseResults/spv.subgroupExtendedTypesQuadNeg.comp.out b/Test/baseResults/spv.subgroupExtendedTypesQuadNeg.comp.out
new file mode 100644
index 0000000..73b1597
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesQuadNeg.comp.out
@@ -0,0 +1,117 @@
+spv.subgroupExtendedTypesQuadNeg.comp
+ERROR: 0:26: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:27: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:28: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:29: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:31: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:32: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:33: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:34: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:36: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:37: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:38: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:39: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:41: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:42: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:43: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:44: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:46: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:47: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:48: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:49: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:51: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:52: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:53: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:54: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:56: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:57: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:58: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:59: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:61: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:62: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:63: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:64: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:66: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:67: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:68: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:69: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:71: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:72: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:73: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:74: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:76: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:77: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:78: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:79: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:81: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:82: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:83: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:84: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:86: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:87: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:88: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:89: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:91: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:92: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:93: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:94: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:96: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:97: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:98: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:99: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:101: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:102: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:103: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:104: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:106: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:107: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:108: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:109: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:111: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:112: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:113: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:114: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:116: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:117: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:118: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:119: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:121: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:122: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:123: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:124: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:126: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:127: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:128: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:129: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:131: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:132: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:133: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:134: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:136: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:137: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:138: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:139: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:141: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:142: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:143: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:144: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:146: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:147: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:148: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:149: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:151: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:152: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:153: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:154: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:156: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:157: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:158: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:159: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:161: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:162: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:163: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:164: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 112 compilation errors.  No code generated.
+
+
+SPIR-V is not generated for failed compile or link
diff --git a/Test/baseResults/spv.subgroupExtendedTypesShuffle.comp.out b/Test/baseResults/spv.subgroupExtendedTypesShuffle.comp.out
new file mode 100644
index 0000000..d647ded
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesShuffle.comp.out
@@ -0,0 +1,616 @@
+spv.subgroupExtendedTypesShuffle.comp
+// Module Version 10300
+// Generated by (magic number): 80007
+// Id's are bound by 497
+
+                              Capability Shader
+                              Capability Float16
+                              Capability Int64
+                              Capability Int16
+                              Capability Int8
+                              Capability GroupNonUniform
+                              Capability GroupNonUniformShuffle
+                              Capability StorageUniformBufferBlock16
+                              Capability StorageBuffer8BitAccess
+                              Extension  "SPV_KHR_8bit_storage"
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint GLCompute 4  "main" 10 12
+                              ExecutionMode 4 LocalSize 8 1 1
+                              Source GLSL 450
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_float16"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int16"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int64"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int8"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_float16"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int16"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int64"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int8"
+                              SourceExtension  "GL_KHR_shader_subgroup_basic"
+                              SourceExtension  "GL_KHR_shader_subgroup_shuffle"
+                              Name 4  "main"
+                              Name 8  "invocation"
+                              Name 10  "gl_SubgroupInvocationID"
+                              Name 12  "gl_SubgroupSize"
+                              Name 31  "Buffers"
+                              MemberName 31(Buffers) 0  "i8"
+                              MemberName 31(Buffers) 1  "u8"
+                              MemberName 31(Buffers) 2  "i16"
+                              MemberName 31(Buffers) 3  "u16"
+                              MemberName 31(Buffers) 4  "i64"
+                              MemberName 31(Buffers) 5  "u64"
+                              MemberName 31(Buffers) 6  "f16"
+                              Name 34  "data"
+                              Decorate 10(gl_SubgroupInvocationID) RelaxedPrecision
+                              Decorate 10(gl_SubgroupInvocationID) BuiltIn SubgroupLocalInvocationId
+                              Decorate 11 RelaxedPrecision
+                              Decorate 12(gl_SubgroupSize) RelaxedPrecision
+                              Decorate 12(gl_SubgroupSize) BuiltIn SubgroupSize
+                              Decorate 13 RelaxedPrecision
+                              Decorate 14 RelaxedPrecision
+                              Decorate 16 RelaxedPrecision
+                              MemberDecorate 31(Buffers) 0 Offset 0
+                              MemberDecorate 31(Buffers) 1 Offset 4
+                              MemberDecorate 31(Buffers) 2 Offset 8
+                              MemberDecorate 31(Buffers) 3 Offset 16
+                              MemberDecorate 31(Buffers) 4 Offset 32
+                              MemberDecorate 31(Buffers) 5 Offset 64
+                              MemberDecorate 31(Buffers) 6 Offset 96
+                              Decorate 31(Buffers) Block
+                              Decorate 34(data) DescriptorSet 0
+                              Decorate 34(data) Binding 0
+                              Decorate 496 BuiltIn WorkgroupSize
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeInt 32 0
+               7:             TypePointer Function 6(int)
+               9:             TypePointer Input 6(int)
+10(gl_SubgroupInvocationID):      9(ptr) Variable Input
+12(gl_SubgroupSize):      9(ptr) Variable Input
+              15:      6(int) Constant 4
+              17:             TypeInt 8 1
+              18:             TypeVector 17(int8_t) 4
+              19:             TypeInt 8 0
+              20:             TypeVector 19(int8_t) 4
+              21:             TypeInt 16 1
+              22:             TypeVector 21(int16_t) 4
+              23:             TypeInt 16 0
+              24:             TypeVector 23(int16_t) 4
+              25:             TypeInt 64 1
+              26:             TypeVector 25(int64_t) 4
+              27:             TypeInt 64 0
+              28:             TypeVector 27(int64_t) 4
+              29:             TypeFloat 16
+              30:             TypeVector 29(float16_t) 4
+     31(Buffers):             TypeStruct 18(i8vec4) 20(i8vec4) 22(i16vec4) 24(i16vec4) 26(i64vec4) 28(i64vec4) 30(f16vec4)
+              32:             TypeArray 31(Buffers) 15
+              33:             TypePointer StorageBuffer 32
+        34(data):     33(ptr) Variable StorageBuffer
+              36:             TypeInt 32 1
+              37:     36(int) Constant 0
+              38:      6(int) Constant 0
+              39:             TypePointer StorageBuffer 17(int8_t)
+              43:      6(int) Constant 3
+              47:     36(int) Constant 1
+              48:             TypeVector 17(int8_t) 2
+              49:             TypePointer StorageBuffer 18(i8vec4)
+              59:     36(int) Constant 2
+              60:             TypeVector 17(int8_t) 3
+              70:     36(int) Constant 3
+             107:             TypePointer StorageBuffer 19(int8_t)
+             114:             TypeVector 19(int8_t) 2
+             115:             TypePointer StorageBuffer 20(i8vec4)
+             125:             TypeVector 19(int8_t) 3
+             171:             TypePointer StorageBuffer 21(int16_t)
+             178:             TypeVector 21(int16_t) 2
+             179:             TypePointer StorageBuffer 22(i16vec4)
+             189:             TypeVector 21(int16_t) 3
+             235:             TypePointer StorageBuffer 23(int16_t)
+             242:             TypeVector 23(int16_t) 2
+             243:             TypePointer StorageBuffer 24(i16vec4)
+             253:             TypeVector 23(int16_t) 3
+             299:     36(int) Constant 4
+             300:             TypePointer StorageBuffer 25(int64_t)
+             307:             TypeVector 25(int64_t) 2
+             308:             TypePointer StorageBuffer 26(i64vec4)
+             318:             TypeVector 25(int64_t) 3
+             364:     36(int) Constant 5
+             365:             TypePointer StorageBuffer 27(int64_t)
+             372:             TypeVector 27(int64_t) 2
+             373:             TypePointer StorageBuffer 28(i64vec4)
+             383:             TypeVector 27(int64_t) 3
+             429:     36(int) Constant 6
+             430:             TypePointer StorageBuffer 29(float16_t)
+             437:             TypeVector 29(float16_t) 2
+             438:             TypePointer StorageBuffer 30(f16vec4)
+             448:             TypeVector 29(float16_t) 3
+             493:             TypeVector 6(int) 3
+             494:      6(int) Constant 8
+             495:      6(int) Constant 1
+             496:  493(ivec3) ConstantComposite 494 495 495
+         4(main):           2 Function None 3
+               5:             Label
+   8(invocation):      7(ptr) Variable Function
+              11:      6(int) Load 10(gl_SubgroupInvocationID)
+              13:      6(int) Load 12(gl_SubgroupSize)
+              14:      6(int) IAdd 11 13
+              16:      6(int) UMod 14 15
+                              Store 8(invocation) 16
+              35:      6(int) Load 8(invocation)
+              40:     39(ptr) AccessChain 34(data) 37 37 38
+              41:  17(int8_t) Load 40
+              42:      6(int) Load 8(invocation)
+              44:  17(int8_t) GroupNonUniformShuffle 43 41 42
+              45:     39(ptr) AccessChain 34(data) 35 37 38
+                              Store 45 44
+              46:      6(int) Load 8(invocation)
+              50:     49(ptr) AccessChain 34(data) 47 37
+              51:  18(i8vec4) Load 50
+              52:  48(i8vec2) VectorShuffle 51 51 0 1
+              53:      6(int) Load 8(invocation)
+              54:  48(i8vec2) GroupNonUniformShuffle 43 52 53
+              55:     49(ptr) AccessChain 34(data) 46 37
+              56:  18(i8vec4) Load 55
+              57:  18(i8vec4) VectorShuffle 56 54 4 5 2 3
+                              Store 55 57
+              58:      6(int) Load 8(invocation)
+              61:     49(ptr) AccessChain 34(data) 59 37
+              62:  18(i8vec4) Load 61
+              63:  60(i8vec3) VectorShuffle 62 62 0 1 2
+              64:      6(int) Load 8(invocation)
+              65:  60(i8vec3) GroupNonUniformShuffle 43 63 64
+              66:     49(ptr) AccessChain 34(data) 58 37
+              67:  18(i8vec4) Load 66
+              68:  18(i8vec4) VectorShuffle 67 65 4 5 6 3
+                              Store 66 68
+              69:      6(int) Load 8(invocation)
+              71:     49(ptr) AccessChain 34(data) 70 37
+              72:  18(i8vec4) Load 71
+              73:      6(int) Load 8(invocation)
+              74:  18(i8vec4) GroupNonUniformShuffle 43 72 73
+              75:     49(ptr) AccessChain 34(data) 69 37
+                              Store 75 74
+              76:      6(int) Load 8(invocation)
+              77:     39(ptr) AccessChain 34(data) 37 37 38
+              78:  17(int8_t) Load 77
+              79:      6(int) Load 8(invocation)
+              80:  17(int8_t) GroupNonUniformShuffleXor 43 78 79
+              81:     39(ptr) AccessChain 34(data) 76 37 38
+                              Store 81 80
+              82:      6(int) Load 8(invocation)
+              83:     49(ptr) AccessChain 34(data) 47 37
+              84:  18(i8vec4) Load 83
+              85:  48(i8vec2) VectorShuffle 84 84 0 1
+              86:      6(int) Load 8(invocation)
+              87:  48(i8vec2) GroupNonUniformShuffleXor 43 85 86
+              88:     49(ptr) AccessChain 34(data) 82 37
+              89:  18(i8vec4) Load 88
+              90:  18(i8vec4) VectorShuffle 89 87 4 5 2 3
+                              Store 88 90
+              91:      6(int) Load 8(invocation)
+              92:     49(ptr) AccessChain 34(data) 59 37
+              93:  18(i8vec4) Load 92
+              94:  60(i8vec3) VectorShuffle 93 93 0 1 2
+              95:      6(int) Load 8(invocation)
+              96:  60(i8vec3) GroupNonUniformShuffleXor 43 94 95
+              97:     49(ptr) AccessChain 34(data) 91 37
+              98:  18(i8vec4) Load 97
+              99:  18(i8vec4) VectorShuffle 98 96 4 5 6 3
+                              Store 97 99
+             100:      6(int) Load 8(invocation)
+             101:     49(ptr) AccessChain 34(data) 70 37
+             102:  18(i8vec4) Load 101
+             103:      6(int) Load 8(invocation)
+             104:  18(i8vec4) GroupNonUniformShuffleXor 43 102 103
+             105:     49(ptr) AccessChain 34(data) 100 37
+                              Store 105 104
+             106:      6(int) Load 8(invocation)
+             108:    107(ptr) AccessChain 34(data) 37 47 38
+             109:  19(int8_t) Load 108
+             110:      6(int) Load 8(invocation)
+             111:  19(int8_t) GroupNonUniformShuffle 43 109 110
+             112:    107(ptr) AccessChain 34(data) 106 47 38
+                              Store 112 111
+             113:      6(int) Load 8(invocation)
+             116:    115(ptr) AccessChain 34(data) 47 47
+             117:  20(i8vec4) Load 116
+             118: 114(i8vec2) VectorShuffle 117 117 0 1
+             119:      6(int) Load 8(invocation)
+             120: 114(i8vec2) GroupNonUniformShuffle 43 118 119
+             121:    115(ptr) AccessChain 34(data) 113 47
+             122:  20(i8vec4) Load 121
+             123:  20(i8vec4) VectorShuffle 122 120 4 5 2 3
+                              Store 121 123
+             124:      6(int) Load 8(invocation)
+             126:    115(ptr) AccessChain 34(data) 59 47
+             127:  20(i8vec4) Load 126
+             128: 125(i8vec3) VectorShuffle 127 127 0 1 2
+             129:      6(int) Load 8(invocation)
+             130: 125(i8vec3) GroupNonUniformShuffle 43 128 129
+             131:    115(ptr) AccessChain 34(data) 124 47
+             132:  20(i8vec4) Load 131
+             133:  20(i8vec4) VectorShuffle 132 130 4 5 6 3
+                              Store 131 133
+             134:      6(int) Load 8(invocation)
+             135:    115(ptr) AccessChain 34(data) 70 47
+             136:  20(i8vec4) Load 135
+             137:      6(int) Load 8(invocation)
+             138:  20(i8vec4) GroupNonUniformShuffle 43 136 137
+             139:    115(ptr) AccessChain 34(data) 134 47
+                              Store 139 138
+             140:      6(int) Load 8(invocation)
+             141:    107(ptr) AccessChain 34(data) 37 47 38
+             142:  19(int8_t) Load 141
+             143:      6(int) Load 8(invocation)
+             144:  19(int8_t) GroupNonUniformShuffleXor 43 142 143
+             145:    107(ptr) AccessChain 34(data) 140 47 38
+                              Store 145 144
+             146:      6(int) Load 8(invocation)
+             147:    115(ptr) AccessChain 34(data) 47 47
+             148:  20(i8vec4) Load 147
+             149: 114(i8vec2) VectorShuffle 148 148 0 1
+             150:      6(int) Load 8(invocation)
+             151: 114(i8vec2) GroupNonUniformShuffleXor 43 149 150
+             152:    115(ptr) AccessChain 34(data) 146 47
+             153:  20(i8vec4) Load 152
+             154:  20(i8vec4) VectorShuffle 153 151 4 5 2 3
+                              Store 152 154
+             155:      6(int) Load 8(invocation)
+             156:    115(ptr) AccessChain 34(data) 59 47
+             157:  20(i8vec4) Load 156
+             158: 125(i8vec3) VectorShuffle 157 157 0 1 2
+             159:      6(int) Load 8(invocation)
+             160: 125(i8vec3) GroupNonUniformShuffleXor 43 158 159
+             161:    115(ptr) AccessChain 34(data) 155 47
+             162:  20(i8vec4) Load 161
+             163:  20(i8vec4) VectorShuffle 162 160 4 5 6 3
+                              Store 161 163
+             164:      6(int) Load 8(invocation)
+             165:    115(ptr) AccessChain 34(data) 70 47
+             166:  20(i8vec4) Load 165
+             167:      6(int) Load 8(invocation)
+             168:  20(i8vec4) GroupNonUniformShuffleXor 43 166 167
+             169:    115(ptr) AccessChain 34(data) 164 47
+                              Store 169 168
+             170:      6(int) Load 8(invocation)
+             172:    171(ptr) AccessChain 34(data) 37 59 38
+             173: 21(int16_t) Load 172
+             174:      6(int) Load 8(invocation)
+             175: 21(int16_t) GroupNonUniformShuffle 43 173 174
+             176:    171(ptr) AccessChain 34(data) 170 59 38
+                              Store 176 175
+             177:      6(int) Load 8(invocation)
+             180:    179(ptr) AccessChain 34(data) 47 59
+             181: 22(i16vec4) Load 180
+             182:178(i16vec2) VectorShuffle 181 181 0 1
+             183:      6(int) Load 8(invocation)
+             184:178(i16vec2) GroupNonUniformShuffle 43 182 183
+             185:    179(ptr) AccessChain 34(data) 177 59
+             186: 22(i16vec4) Load 185
+             187: 22(i16vec4) VectorShuffle 186 184 4 5 2 3
+                              Store 185 187
+             188:      6(int) Load 8(invocation)
+             190:    179(ptr) AccessChain 34(data) 59 59
+             191: 22(i16vec4) Load 190
+             192:189(i16vec3) VectorShuffle 191 191 0 1 2
+             193:      6(int) Load 8(invocation)
+             194:189(i16vec3) GroupNonUniformShuffle 43 192 193
+             195:    179(ptr) AccessChain 34(data) 188 59
+             196: 22(i16vec4) Load 195
+             197: 22(i16vec4) VectorShuffle 196 194 4 5 6 3
+                              Store 195 197
+             198:      6(int) Load 8(invocation)
+             199:    179(ptr) AccessChain 34(data) 70 59
+             200: 22(i16vec4) Load 199
+             201:      6(int) Load 8(invocation)
+             202: 22(i16vec4) GroupNonUniformShuffle 43 200 201
+             203:    179(ptr) AccessChain 34(data) 198 59
+                              Store 203 202
+             204:      6(int) Load 8(invocation)
+             205:    171(ptr) AccessChain 34(data) 37 59 38
+             206: 21(int16_t) Load 205
+             207:      6(int) Load 8(invocation)
+             208: 21(int16_t) GroupNonUniformShuffleXor 43 206 207
+             209:    171(ptr) AccessChain 34(data) 204 59 38
+                              Store 209 208
+             210:      6(int) Load 8(invocation)
+             211:    179(ptr) AccessChain 34(data) 47 59
+             212: 22(i16vec4) Load 211
+             213:178(i16vec2) VectorShuffle 212 212 0 1
+             214:      6(int) Load 8(invocation)
+             215:178(i16vec2) GroupNonUniformShuffleXor 43 213 214
+             216:    179(ptr) AccessChain 34(data) 210 59
+             217: 22(i16vec4) Load 216
+             218: 22(i16vec4) VectorShuffle 217 215 4 5 2 3
+                              Store 216 218
+             219:      6(int) Load 8(invocation)
+             220:    179(ptr) AccessChain 34(data) 59 59
+             221: 22(i16vec4) Load 220
+             222:189(i16vec3) VectorShuffle 221 221 0 1 2
+             223:      6(int) Load 8(invocation)
+             224:189(i16vec3) GroupNonUniformShuffleXor 43 222 223
+             225:    179(ptr) AccessChain 34(data) 219 59
+             226: 22(i16vec4) Load 225
+             227: 22(i16vec4) VectorShuffle 226 224 4 5 6 3
+                              Store 225 227
+             228:      6(int) Load 8(invocation)
+             229:    179(ptr) AccessChain 34(data) 70 59
+             230: 22(i16vec4) Load 229
+             231:      6(int) Load 8(invocation)
+             232: 22(i16vec4) GroupNonUniformShuffleXor 43 230 231
+             233:    179(ptr) AccessChain 34(data) 228 59
+                              Store 233 232
+             234:      6(int) Load 8(invocation)
+             236:    235(ptr) AccessChain 34(data) 37 70 38
+             237: 23(int16_t) Load 236
+             238:      6(int) Load 8(invocation)
+             239: 23(int16_t) GroupNonUniformShuffle 43 237 238
+             240:    235(ptr) AccessChain 34(data) 234 70 38
+                              Store 240 239
+             241:      6(int) Load 8(invocation)
+             244:    243(ptr) AccessChain 34(data) 47 70
+             245: 24(i16vec4) Load 244
+             246:242(i16vec2) VectorShuffle 245 245 0 1
+             247:      6(int) Load 8(invocation)
+             248:242(i16vec2) GroupNonUniformShuffle 43 246 247
+             249:    243(ptr) AccessChain 34(data) 241 70
+             250: 24(i16vec4) Load 249
+             251: 24(i16vec4) VectorShuffle 250 248 4 5 2 3
+                              Store 249 251
+             252:      6(int) Load 8(invocation)
+             254:    243(ptr) AccessChain 34(data) 59 70
+             255: 24(i16vec4) Load 254
+             256:253(i16vec3) VectorShuffle 255 255 0 1 2
+             257:      6(int) Load 8(invocation)
+             258:253(i16vec3) GroupNonUniformShuffle 43 256 257
+             259:    243(ptr) AccessChain 34(data) 252 70
+             260: 24(i16vec4) Load 259
+             261: 24(i16vec4) VectorShuffle 260 258 4 5 6 3
+                              Store 259 261
+             262:      6(int) Load 8(invocation)
+             263:    243(ptr) AccessChain 34(data) 70 70
+             264: 24(i16vec4) Load 263
+             265:      6(int) Load 8(invocation)
+             266: 24(i16vec4) GroupNonUniformShuffle 43 264 265
+             267:    243(ptr) AccessChain 34(data) 262 70
+                              Store 267 266
+             268:      6(int) Load 8(invocation)
+             269:    235(ptr) AccessChain 34(data) 37 70 38
+             270: 23(int16_t) Load 269
+             271:      6(int) Load 8(invocation)
+             272: 23(int16_t) GroupNonUniformShuffleXor 43 270 271
+             273:    235(ptr) AccessChain 34(data) 268 70 38
+                              Store 273 272
+             274:      6(int) Load 8(invocation)
+             275:    243(ptr) AccessChain 34(data) 47 70
+             276: 24(i16vec4) Load 275
+             277:242(i16vec2) VectorShuffle 276 276 0 1
+             278:      6(int) Load 8(invocation)
+             279:242(i16vec2) GroupNonUniformShuffleXor 43 277 278
+             280:    243(ptr) AccessChain 34(data) 274 70
+             281: 24(i16vec4) Load 280
+             282: 24(i16vec4) VectorShuffle 281 279 4 5 2 3
+                              Store 280 282
+             283:      6(int) Load 8(invocation)
+             284:    243(ptr) AccessChain 34(data) 59 70
+             285: 24(i16vec4) Load 284
+             286:253(i16vec3) VectorShuffle 285 285 0 1 2
+             287:      6(int) Load 8(invocation)
+             288:253(i16vec3) GroupNonUniformShuffleXor 43 286 287
+             289:    243(ptr) AccessChain 34(data) 283 70
+             290: 24(i16vec4) Load 289
+             291: 24(i16vec4) VectorShuffle 290 288 4 5 6 3
+                              Store 289 291
+             292:      6(int) Load 8(invocation)
+             293:    243(ptr) AccessChain 34(data) 70 70
+             294: 24(i16vec4) Load 293
+             295:      6(int) Load 8(invocation)
+             296: 24(i16vec4) GroupNonUniformShuffleXor 43 294 295
+             297:    243(ptr) AccessChain 34(data) 292 70
+                              Store 297 296
+             298:      6(int) Load 8(invocation)
+             301:    300(ptr) AccessChain 34(data) 37 299 38
+             302: 25(int64_t) Load 301
+             303:      6(int) Load 8(invocation)
+             304: 25(int64_t) GroupNonUniformShuffle 43 302 303
+             305:    300(ptr) AccessChain 34(data) 298 299 38
+                              Store 305 304
+             306:      6(int) Load 8(invocation)
+             309:    308(ptr) AccessChain 34(data) 47 299
+             310: 26(i64vec4) Load 309
+             311:307(i64vec2) VectorShuffle 310 310 0 1
+             312:      6(int) Load 8(invocation)
+             313:307(i64vec2) GroupNonUniformShuffle 43 311 312
+             314:    308(ptr) AccessChain 34(data) 306 299
+             315: 26(i64vec4) Load 314
+             316: 26(i64vec4) VectorShuffle 315 313 4 5 2 3
+                              Store 314 316
+             317:      6(int) Load 8(invocation)
+             319:    308(ptr) AccessChain 34(data) 59 299
+             320: 26(i64vec4) Load 319
+             321:318(i64vec3) VectorShuffle 320 320 0 1 2
+             322:      6(int) Load 8(invocation)
+             323:318(i64vec3) GroupNonUniformShuffle 43 321 322
+             324:    308(ptr) AccessChain 34(data) 317 299
+             325: 26(i64vec4) Load 324
+             326: 26(i64vec4) VectorShuffle 325 323 4 5 6 3
+                              Store 324 326
+             327:      6(int) Load 8(invocation)
+             328:    308(ptr) AccessChain 34(data) 70 299
+             329: 26(i64vec4) Load 328
+             330:      6(int) Load 8(invocation)
+             331: 26(i64vec4) GroupNonUniformShuffle 43 329 330
+             332:    308(ptr) AccessChain 34(data) 327 299
+                              Store 332 331
+             333:      6(int) Load 8(invocation)
+             334:    300(ptr) AccessChain 34(data) 37 299 38
+             335: 25(int64_t) Load 334
+             336:      6(int) Load 8(invocation)
+             337: 25(int64_t) GroupNonUniformShuffleXor 43 335 336
+             338:    300(ptr) AccessChain 34(data) 333 299 38
+                              Store 338 337
+             339:      6(int) Load 8(invocation)
+             340:    308(ptr) AccessChain 34(data) 47 299
+             341: 26(i64vec4) Load 340
+             342:307(i64vec2) VectorShuffle 341 341 0 1
+             343:      6(int) Load 8(invocation)
+             344:307(i64vec2) GroupNonUniformShuffleXor 43 342 343
+             345:    308(ptr) AccessChain 34(data) 339 299
+             346: 26(i64vec4) Load 345
+             347: 26(i64vec4) VectorShuffle 346 344 4 5 2 3
+                              Store 345 347
+             348:      6(int) Load 8(invocation)
+             349:    308(ptr) AccessChain 34(data) 59 299
+             350: 26(i64vec4) Load 349
+             351:318(i64vec3) VectorShuffle 350 350 0 1 2
+             352:      6(int) Load 8(invocation)
+             353:318(i64vec3) GroupNonUniformShuffleXor 43 351 352
+             354:    308(ptr) AccessChain 34(data) 348 299
+             355: 26(i64vec4) Load 354
+             356: 26(i64vec4) VectorShuffle 355 353 4 5 6 3
+                              Store 354 356
+             357:      6(int) Load 8(invocation)
+             358:    308(ptr) AccessChain 34(data) 70 299
+             359: 26(i64vec4) Load 358
+             360:      6(int) Load 8(invocation)
+             361: 26(i64vec4) GroupNonUniformShuffleXor 43 359 360
+             362:    308(ptr) AccessChain 34(data) 357 299
+                              Store 362 361
+             363:      6(int) Load 8(invocation)
+             366:    365(ptr) AccessChain 34(data) 37 364 38
+             367: 27(int64_t) Load 366
+             368:      6(int) Load 8(invocation)
+             369: 27(int64_t) GroupNonUniformShuffle 43 367 368
+             370:    365(ptr) AccessChain 34(data) 363 364 38
+                              Store 370 369
+             371:      6(int) Load 8(invocation)
+             374:    373(ptr) AccessChain 34(data) 47 364
+             375: 28(i64vec4) Load 374
+             376:372(i64vec2) VectorShuffle 375 375 0 1
+             377:      6(int) Load 8(invocation)
+             378:372(i64vec2) GroupNonUniformShuffle 43 376 377
+             379:    373(ptr) AccessChain 34(data) 371 364
+             380: 28(i64vec4) Load 379
+             381: 28(i64vec4) VectorShuffle 380 378 4 5 2 3
+                              Store 379 381
+             382:      6(int) Load 8(invocation)
+             384:    373(ptr) AccessChain 34(data) 59 364
+             385: 28(i64vec4) Load 384
+             386:383(i64vec3) VectorShuffle 385 385 0 1 2
+             387:      6(int) Load 8(invocation)
+             388:383(i64vec3) GroupNonUniformShuffle 43 386 387
+             389:    373(ptr) AccessChain 34(data) 382 364
+             390: 28(i64vec4) Load 389
+             391: 28(i64vec4) VectorShuffle 390 388 4 5 6 3
+                              Store 389 391
+             392:      6(int) Load 8(invocation)
+             393:    373(ptr) AccessChain 34(data) 70 364
+             394: 28(i64vec4) Load 393
+             395:      6(int) Load 8(invocation)
+             396: 28(i64vec4) GroupNonUniformShuffle 43 394 395
+             397:    373(ptr) AccessChain 34(data) 392 364
+                              Store 397 396
+             398:      6(int) Load 8(invocation)
+             399:    365(ptr) AccessChain 34(data) 37 364 38
+             400: 27(int64_t) Load 399
+             401:      6(int) Load 8(invocation)
+             402: 27(int64_t) GroupNonUniformShuffleXor 43 400 401
+             403:    365(ptr) AccessChain 34(data) 398 364 38
+                              Store 403 402
+             404:      6(int) Load 8(invocation)
+             405:    373(ptr) AccessChain 34(data) 47 364
+             406: 28(i64vec4) Load 405
+             407:372(i64vec2) VectorShuffle 406 406 0 1
+             408:      6(int) Load 8(invocation)
+             409:372(i64vec2) GroupNonUniformShuffleXor 43 407 408
+             410:    373(ptr) AccessChain 34(data) 404 364
+             411: 28(i64vec4) Load 410
+             412: 28(i64vec4) VectorShuffle 411 409 4 5 2 3
+                              Store 410 412
+             413:      6(int) Load 8(invocation)
+             414:    373(ptr) AccessChain 34(data) 59 364
+             415: 28(i64vec4) Load 414
+             416:383(i64vec3) VectorShuffle 415 415 0 1 2
+             417:      6(int) Load 8(invocation)
+             418:383(i64vec3) GroupNonUniformShuffleXor 43 416 417
+             419:    373(ptr) AccessChain 34(data) 413 364
+             420: 28(i64vec4) Load 419
+             421: 28(i64vec4) VectorShuffle 420 418 4 5 6 3
+                              Store 419 421
+             422:      6(int) Load 8(invocation)
+             423:    373(ptr) AccessChain 34(data) 70 364
+             424: 28(i64vec4) Load 423
+             425:      6(int) Load 8(invocation)
+             426: 28(i64vec4) GroupNonUniformShuffleXor 43 424 425
+             427:    373(ptr) AccessChain 34(data) 422 364
+                              Store 427 426
+             428:      6(int) Load 8(invocation)
+             431:    430(ptr) AccessChain 34(data) 37 429 38
+             432:29(float16_t) Load 431
+             433:      6(int) Load 8(invocation)
+             434:29(float16_t) GroupNonUniformShuffle 43 432 433
+             435:    430(ptr) AccessChain 34(data) 428 429 38
+                              Store 435 434
+             436:      6(int) Load 8(invocation)
+             439:    438(ptr) AccessChain 34(data) 47 429
+             440: 30(f16vec4) Load 439
+             441:437(f16vec2) VectorShuffle 440 440 0 1
+             442:      6(int) Load 8(invocation)
+             443:437(f16vec2) GroupNonUniformShuffle 43 441 442
+             444:    438(ptr) AccessChain 34(data) 436 429
+             445: 30(f16vec4) Load 444
+             446: 30(f16vec4) VectorShuffle 445 443 4 5 2 3
+                              Store 444 446
+             447:      6(int) Load 8(invocation)
+             449:    438(ptr) AccessChain 34(data) 59 429
+             450: 30(f16vec4) Load 449
+             451:448(f16vec3) VectorShuffle 450 450 0 1 2
+             452:      6(int) Load 8(invocation)
+             453:448(f16vec3) GroupNonUniformShuffle 43 451 452
+             454:    438(ptr) AccessChain 34(data) 447 429
+             455: 30(f16vec4) Load 454
+             456: 30(f16vec4) VectorShuffle 455 453 4 5 6 3
+                              Store 454 456
+             457:      6(int) Load 8(invocation)
+             458:    438(ptr) AccessChain 34(data) 70 429
+             459: 30(f16vec4) Load 458
+             460:      6(int) Load 8(invocation)
+             461: 30(f16vec4) GroupNonUniformShuffle 43 459 460
+             462:    438(ptr) AccessChain 34(data) 457 429
+                              Store 462 461
+             463:      6(int) Load 8(invocation)
+             464:    430(ptr) AccessChain 34(data) 37 429 38
+             465:29(float16_t) Load 464
+             466:      6(int) Load 8(invocation)
+             467:29(float16_t) GroupNonUniformShuffleXor 43 465 466
+             468:    430(ptr) AccessChain 34(data) 463 429 38
+                              Store 468 467
+             469:      6(int) Load 8(invocation)
+             470:    438(ptr) AccessChain 34(data) 47 429
+             471: 30(f16vec4) Load 470
+             472:437(f16vec2) VectorShuffle 471 471 0 1
+             473:      6(int) Load 8(invocation)
+             474:437(f16vec2) GroupNonUniformShuffleXor 43 472 473
+             475:    438(ptr) AccessChain 34(data) 469 429
+             476: 30(f16vec4) Load 475
+             477: 30(f16vec4) VectorShuffle 476 474 4 5 2 3
+                              Store 475 477
+             478:      6(int) Load 8(invocation)
+             479:    438(ptr) AccessChain 34(data) 59 429
+             480: 30(f16vec4) Load 479
+             481:448(f16vec3) VectorShuffle 480 480 0 1 2
+             482:      6(int) Load 8(invocation)
+             483:448(f16vec3) GroupNonUniformShuffleXor 43 481 482
+             484:    438(ptr) AccessChain 34(data) 478 429
+             485: 30(f16vec4) Load 484
+             486: 30(f16vec4) VectorShuffle 485 483 4 5 6 3
+                              Store 484 486
+             487:      6(int) Load 8(invocation)
+             488:    438(ptr) AccessChain 34(data) 70 429
+             489: 30(f16vec4) Load 488
+             490:      6(int) Load 8(invocation)
+             491: 30(f16vec4) GroupNonUniformShuffleXor 43 489 490
+             492:    438(ptr) AccessChain 34(data) 487 429
+                              Store 492 491
+                              Return
+                              FunctionEnd
diff --git a/Test/baseResults/spv.subgroupExtendedTypesShuffleNeg.comp.out b/Test/baseResults/spv.subgroupExtendedTypesShuffleNeg.comp.out
new file mode 100644
index 0000000..df1234b
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesShuffleNeg.comp.out
@@ -0,0 +1,61 @@
+spv.subgroupExtendedTypesShuffleNeg.comp
+ERROR: 0:26: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:27: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:28: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:29: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:31: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:32: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:33: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:34: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:36: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:37: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:38: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:39: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:41: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:42: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:43: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:44: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:46: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:47: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:48: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:49: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:51: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:52: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:53: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:54: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:56: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:57: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:58: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:59: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:61: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:62: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:63: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:64: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:66: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:67: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:68: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:69: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:71: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:72: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:73: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:74: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:76: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:77: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:78: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:79: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:81: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:82: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:83: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:84: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:86: ' temp highp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:87: ' temp highp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:88: ' temp highp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:89: 'layout( column_major std430) buffer highp 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:91: ' temp highp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:92: ' temp highp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:93: ' temp highp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:94: 'layout( column_major std430) buffer highp 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 56 compilation errors.  No code generated.
+
+
+SPIR-V is not generated for failed compile or link
diff --git a/Test/baseResults/spv.subgroupExtendedTypesShuffleRelative.comp.out b/Test/baseResults/spv.subgroupExtendedTypesShuffleRelative.comp.out
new file mode 100644
index 0000000..ef5def5
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesShuffleRelative.comp.out
@@ -0,0 +1,616 @@
+spv.subgroupExtendedTypesShuffleRelative.comp
+// Module Version 10300
+// Generated by (magic number): 80007
+// Id's are bound by 497
+
+                              Capability Shader
+                              Capability Float16
+                              Capability Int64
+                              Capability Int16
+                              Capability Int8
+                              Capability GroupNonUniform
+                              Capability GroupNonUniformShuffleRelative
+                              Capability StorageUniformBufferBlock16
+                              Capability StorageBuffer8BitAccess
+                              Extension  "SPV_KHR_8bit_storage"
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint GLCompute 4  "main" 10 12
+                              ExecutionMode 4 LocalSize 8 1 1
+                              Source GLSL 450
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_float16"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int16"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int64"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int8"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_float16"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int16"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int64"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int8"
+                              SourceExtension  "GL_KHR_shader_subgroup_basic"
+                              SourceExtension  "GL_KHR_shader_subgroup_shuffle_relative"
+                              Name 4  "main"
+                              Name 8  "invocation"
+                              Name 10  "gl_SubgroupInvocationID"
+                              Name 12  "gl_SubgroupSize"
+                              Name 31  "Buffers"
+                              MemberName 31(Buffers) 0  "i8"
+                              MemberName 31(Buffers) 1  "u8"
+                              MemberName 31(Buffers) 2  "i16"
+                              MemberName 31(Buffers) 3  "u16"
+                              MemberName 31(Buffers) 4  "i64"
+                              MemberName 31(Buffers) 5  "u64"
+                              MemberName 31(Buffers) 6  "f16"
+                              Name 34  "data"
+                              Decorate 10(gl_SubgroupInvocationID) RelaxedPrecision
+                              Decorate 10(gl_SubgroupInvocationID) BuiltIn SubgroupLocalInvocationId
+                              Decorate 11 RelaxedPrecision
+                              Decorate 12(gl_SubgroupSize) RelaxedPrecision
+                              Decorate 12(gl_SubgroupSize) BuiltIn SubgroupSize
+                              Decorate 13 RelaxedPrecision
+                              Decorate 14 RelaxedPrecision
+                              Decorate 16 RelaxedPrecision
+                              MemberDecorate 31(Buffers) 0 Offset 0
+                              MemberDecorate 31(Buffers) 1 Offset 4
+                              MemberDecorate 31(Buffers) 2 Offset 8
+                              MemberDecorate 31(Buffers) 3 Offset 16
+                              MemberDecorate 31(Buffers) 4 Offset 32
+                              MemberDecorate 31(Buffers) 5 Offset 64
+                              MemberDecorate 31(Buffers) 6 Offset 96
+                              Decorate 31(Buffers) Block
+                              Decorate 34(data) DescriptorSet 0
+                              Decorate 34(data) Binding 0
+                              Decorate 496 BuiltIn WorkgroupSize
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeInt 32 0
+               7:             TypePointer Function 6(int)
+               9:             TypePointer Input 6(int)
+10(gl_SubgroupInvocationID):      9(ptr) Variable Input
+12(gl_SubgroupSize):      9(ptr) Variable Input
+              15:      6(int) Constant 4
+              17:             TypeInt 8 1
+              18:             TypeVector 17(int8_t) 4
+              19:             TypeInt 8 0
+              20:             TypeVector 19(int8_t) 4
+              21:             TypeInt 16 1
+              22:             TypeVector 21(int16_t) 4
+              23:             TypeInt 16 0
+              24:             TypeVector 23(int16_t) 4
+              25:             TypeInt 64 1
+              26:             TypeVector 25(int64_t) 4
+              27:             TypeInt 64 0
+              28:             TypeVector 27(int64_t) 4
+              29:             TypeFloat 16
+              30:             TypeVector 29(float16_t) 4
+     31(Buffers):             TypeStruct 18(i8vec4) 20(i8vec4) 22(i16vec4) 24(i16vec4) 26(i64vec4) 28(i64vec4) 30(f16vec4)
+              32:             TypeArray 31(Buffers) 15
+              33:             TypePointer StorageBuffer 32
+        34(data):     33(ptr) Variable StorageBuffer
+              36:             TypeInt 32 1
+              37:     36(int) Constant 0
+              38:      6(int) Constant 0
+              39:             TypePointer StorageBuffer 17(int8_t)
+              43:      6(int) Constant 3
+              47:     36(int) Constant 1
+              48:             TypeVector 17(int8_t) 2
+              49:             TypePointer StorageBuffer 18(i8vec4)
+              59:     36(int) Constant 2
+              60:             TypeVector 17(int8_t) 3
+              70:     36(int) Constant 3
+             107:             TypePointer StorageBuffer 19(int8_t)
+             114:             TypeVector 19(int8_t) 2
+             115:             TypePointer StorageBuffer 20(i8vec4)
+             125:             TypeVector 19(int8_t) 3
+             171:             TypePointer StorageBuffer 21(int16_t)
+             178:             TypeVector 21(int16_t) 2
+             179:             TypePointer StorageBuffer 22(i16vec4)
+             189:             TypeVector 21(int16_t) 3
+             235:             TypePointer StorageBuffer 23(int16_t)
+             242:             TypeVector 23(int16_t) 2
+             243:             TypePointer StorageBuffer 24(i16vec4)
+             253:             TypeVector 23(int16_t) 3
+             299:     36(int) Constant 4
+             300:             TypePointer StorageBuffer 25(int64_t)
+             307:             TypeVector 25(int64_t) 2
+             308:             TypePointer StorageBuffer 26(i64vec4)
+             318:             TypeVector 25(int64_t) 3
+             364:     36(int) Constant 5
+             365:             TypePointer StorageBuffer 27(int64_t)
+             372:             TypeVector 27(int64_t) 2
+             373:             TypePointer StorageBuffer 28(i64vec4)
+             383:             TypeVector 27(int64_t) 3
+             429:     36(int) Constant 6
+             430:             TypePointer StorageBuffer 29(float16_t)
+             437:             TypeVector 29(float16_t) 2
+             438:             TypePointer StorageBuffer 30(f16vec4)
+             448:             TypeVector 29(float16_t) 3
+             493:             TypeVector 6(int) 3
+             494:      6(int) Constant 8
+             495:      6(int) Constant 1
+             496:  493(ivec3) ConstantComposite 494 495 495
+         4(main):           2 Function None 3
+               5:             Label
+   8(invocation):      7(ptr) Variable Function
+              11:      6(int) Load 10(gl_SubgroupInvocationID)
+              13:      6(int) Load 12(gl_SubgroupSize)
+              14:      6(int) IAdd 11 13
+              16:      6(int) UMod 14 15
+                              Store 8(invocation) 16
+              35:      6(int) Load 8(invocation)
+              40:     39(ptr) AccessChain 34(data) 37 37 38
+              41:  17(int8_t) Load 40
+              42:      6(int) Load 8(invocation)
+              44:  17(int8_t) GroupNonUniformShuffleUp 43 41 42
+              45:     39(ptr) AccessChain 34(data) 35 37 38
+                              Store 45 44
+              46:      6(int) Load 8(invocation)
+              50:     49(ptr) AccessChain 34(data) 47 37
+              51:  18(i8vec4) Load 50
+              52:  48(i8vec2) VectorShuffle 51 51 0 1
+              53:      6(int) Load 8(invocation)
+              54:  48(i8vec2) GroupNonUniformShuffleUp 43 52 53
+              55:     49(ptr) AccessChain 34(data) 46 37
+              56:  18(i8vec4) Load 55
+              57:  18(i8vec4) VectorShuffle 56 54 4 5 2 3
+                              Store 55 57
+              58:      6(int) Load 8(invocation)
+              61:     49(ptr) AccessChain 34(data) 59 37
+              62:  18(i8vec4) Load 61
+              63:  60(i8vec3) VectorShuffle 62 62 0 1 2
+              64:      6(int) Load 8(invocation)
+              65:  60(i8vec3) GroupNonUniformShuffleUp 43 63 64
+              66:     49(ptr) AccessChain 34(data) 58 37
+              67:  18(i8vec4) Load 66
+              68:  18(i8vec4) VectorShuffle 67 65 4 5 6 3
+                              Store 66 68
+              69:      6(int) Load 8(invocation)
+              71:     49(ptr) AccessChain 34(data) 70 37
+              72:  18(i8vec4) Load 71
+              73:      6(int) Load 8(invocation)
+              74:  18(i8vec4) GroupNonUniformShuffleUp 43 72 73
+              75:     49(ptr) AccessChain 34(data) 69 37
+                              Store 75 74
+              76:      6(int) Load 8(invocation)
+              77:     39(ptr) AccessChain 34(data) 37 37 38
+              78:  17(int8_t) Load 77
+              79:      6(int) Load 8(invocation)
+              80:  17(int8_t) GroupNonUniformShuffleDown 43 78 79
+              81:     39(ptr) AccessChain 34(data) 76 37 38
+                              Store 81 80
+              82:      6(int) Load 8(invocation)
+              83:     49(ptr) AccessChain 34(data) 47 37
+              84:  18(i8vec4) Load 83
+              85:  48(i8vec2) VectorShuffle 84 84 0 1
+              86:      6(int) Load 8(invocation)
+              87:  48(i8vec2) GroupNonUniformShuffleDown 43 85 86
+              88:     49(ptr) AccessChain 34(data) 82 37
+              89:  18(i8vec4) Load 88
+              90:  18(i8vec4) VectorShuffle 89 87 4 5 2 3
+                              Store 88 90
+              91:      6(int) Load 8(invocation)
+              92:     49(ptr) AccessChain 34(data) 59 37
+              93:  18(i8vec4) Load 92
+              94:  60(i8vec3) VectorShuffle 93 93 0 1 2
+              95:      6(int) Load 8(invocation)
+              96:  60(i8vec3) GroupNonUniformShuffleDown 43 94 95
+              97:     49(ptr) AccessChain 34(data) 91 37
+              98:  18(i8vec4) Load 97
+              99:  18(i8vec4) VectorShuffle 98 96 4 5 6 3
+                              Store 97 99
+             100:      6(int) Load 8(invocation)
+             101:     49(ptr) AccessChain 34(data) 70 37
+             102:  18(i8vec4) Load 101
+             103:      6(int) Load 8(invocation)
+             104:  18(i8vec4) GroupNonUniformShuffleDown 43 102 103
+             105:     49(ptr) AccessChain 34(data) 100 37
+                              Store 105 104
+             106:      6(int) Load 8(invocation)
+             108:    107(ptr) AccessChain 34(data) 37 47 38
+             109:  19(int8_t) Load 108
+             110:      6(int) Load 8(invocation)
+             111:  19(int8_t) GroupNonUniformShuffleUp 43 109 110
+             112:    107(ptr) AccessChain 34(data) 106 47 38
+                              Store 112 111
+             113:      6(int) Load 8(invocation)
+             116:    115(ptr) AccessChain 34(data) 47 47
+             117:  20(i8vec4) Load 116
+             118: 114(i8vec2) VectorShuffle 117 117 0 1
+             119:      6(int) Load 8(invocation)
+             120: 114(i8vec2) GroupNonUniformShuffleUp 43 118 119
+             121:    115(ptr) AccessChain 34(data) 113 47
+             122:  20(i8vec4) Load 121
+             123:  20(i8vec4) VectorShuffle 122 120 4 5 2 3
+                              Store 121 123
+             124:      6(int) Load 8(invocation)
+             126:    115(ptr) AccessChain 34(data) 59 47
+             127:  20(i8vec4) Load 126
+             128: 125(i8vec3) VectorShuffle 127 127 0 1 2
+             129:      6(int) Load 8(invocation)
+             130: 125(i8vec3) GroupNonUniformShuffleUp 43 128 129
+             131:    115(ptr) AccessChain 34(data) 124 47
+             132:  20(i8vec4) Load 131
+             133:  20(i8vec4) VectorShuffle 132 130 4 5 6 3
+                              Store 131 133
+             134:      6(int) Load 8(invocation)
+             135:    115(ptr) AccessChain 34(data) 70 47
+             136:  20(i8vec4) Load 135
+             137:      6(int) Load 8(invocation)
+             138:  20(i8vec4) GroupNonUniformShuffleUp 43 136 137
+             139:    115(ptr) AccessChain 34(data) 134 47
+                              Store 139 138
+             140:      6(int) Load 8(invocation)
+             141:    107(ptr) AccessChain 34(data) 37 47 38
+             142:  19(int8_t) Load 141
+             143:      6(int) Load 8(invocation)
+             144:  19(int8_t) GroupNonUniformShuffleDown 43 142 143
+             145:    107(ptr) AccessChain 34(data) 140 47 38
+                              Store 145 144
+             146:      6(int) Load 8(invocation)
+             147:    115(ptr) AccessChain 34(data) 47 47
+             148:  20(i8vec4) Load 147
+             149: 114(i8vec2) VectorShuffle 148 148 0 1
+             150:      6(int) Load 8(invocation)
+             151: 114(i8vec2) GroupNonUniformShuffleDown 43 149 150
+             152:    115(ptr) AccessChain 34(data) 146 47
+             153:  20(i8vec4) Load 152
+             154:  20(i8vec4) VectorShuffle 153 151 4 5 2 3
+                              Store 152 154
+             155:      6(int) Load 8(invocation)
+             156:    115(ptr) AccessChain 34(data) 59 47
+             157:  20(i8vec4) Load 156
+             158: 125(i8vec3) VectorShuffle 157 157 0 1 2
+             159:      6(int) Load 8(invocation)
+             160: 125(i8vec3) GroupNonUniformShuffleDown 43 158 159
+             161:    115(ptr) AccessChain 34(data) 155 47
+             162:  20(i8vec4) Load 161
+             163:  20(i8vec4) VectorShuffle 162 160 4 5 6 3
+                              Store 161 163
+             164:      6(int) Load 8(invocation)
+             165:    115(ptr) AccessChain 34(data) 70 47
+             166:  20(i8vec4) Load 165
+             167:      6(int) Load 8(invocation)
+             168:  20(i8vec4) GroupNonUniformShuffleDown 43 166 167
+             169:    115(ptr) AccessChain 34(data) 164 47
+                              Store 169 168
+             170:      6(int) Load 8(invocation)
+             172:    171(ptr) AccessChain 34(data) 37 59 38
+             173: 21(int16_t) Load 172
+             174:      6(int) Load 8(invocation)
+             175: 21(int16_t) GroupNonUniformShuffleUp 43 173 174
+             176:    171(ptr) AccessChain 34(data) 170 59 38
+                              Store 176 175
+             177:      6(int) Load 8(invocation)
+             180:    179(ptr) AccessChain 34(data) 47 59
+             181: 22(i16vec4) Load 180
+             182:178(i16vec2) VectorShuffle 181 181 0 1
+             183:      6(int) Load 8(invocation)
+             184:178(i16vec2) GroupNonUniformShuffleUp 43 182 183
+             185:    179(ptr) AccessChain 34(data) 177 59
+             186: 22(i16vec4) Load 185
+             187: 22(i16vec4) VectorShuffle 186 184 4 5 2 3
+                              Store 185 187
+             188:      6(int) Load 8(invocation)
+             190:    179(ptr) AccessChain 34(data) 59 59
+             191: 22(i16vec4) Load 190
+             192:189(i16vec3) VectorShuffle 191 191 0 1 2
+             193:      6(int) Load 8(invocation)
+             194:189(i16vec3) GroupNonUniformShuffleUp 43 192 193
+             195:    179(ptr) AccessChain 34(data) 188 59
+             196: 22(i16vec4) Load 195
+             197: 22(i16vec4) VectorShuffle 196 194 4 5 6 3
+                              Store 195 197
+             198:      6(int) Load 8(invocation)
+             199:    179(ptr) AccessChain 34(data) 70 59
+             200: 22(i16vec4) Load 199
+             201:      6(int) Load 8(invocation)
+             202: 22(i16vec4) GroupNonUniformShuffleUp 43 200 201
+             203:    179(ptr) AccessChain 34(data) 198 59
+                              Store 203 202
+             204:      6(int) Load 8(invocation)
+             205:    171(ptr) AccessChain 34(data) 37 59 38
+             206: 21(int16_t) Load 205
+             207:      6(int) Load 8(invocation)
+             208: 21(int16_t) GroupNonUniformShuffleDown 43 206 207
+             209:    171(ptr) AccessChain 34(data) 204 59 38
+                              Store 209 208
+             210:      6(int) Load 8(invocation)
+             211:    179(ptr) AccessChain 34(data) 47 59
+             212: 22(i16vec4) Load 211
+             213:178(i16vec2) VectorShuffle 212 212 0 1
+             214:      6(int) Load 8(invocation)
+             215:178(i16vec2) GroupNonUniformShuffleDown 43 213 214
+             216:    179(ptr) AccessChain 34(data) 210 59
+             217: 22(i16vec4) Load 216
+             218: 22(i16vec4) VectorShuffle 217 215 4 5 2 3
+                              Store 216 218
+             219:      6(int) Load 8(invocation)
+             220:    179(ptr) AccessChain 34(data) 59 59
+             221: 22(i16vec4) Load 220
+             222:189(i16vec3) VectorShuffle 221 221 0 1 2
+             223:      6(int) Load 8(invocation)
+             224:189(i16vec3) GroupNonUniformShuffleDown 43 222 223
+             225:    179(ptr) AccessChain 34(data) 219 59
+             226: 22(i16vec4) Load 225
+             227: 22(i16vec4) VectorShuffle 226 224 4 5 6 3
+                              Store 225 227
+             228:      6(int) Load 8(invocation)
+             229:    179(ptr) AccessChain 34(data) 70 59
+             230: 22(i16vec4) Load 229
+             231:      6(int) Load 8(invocation)
+             232: 22(i16vec4) GroupNonUniformShuffleDown 43 230 231
+             233:    179(ptr) AccessChain 34(data) 228 59
+                              Store 233 232
+             234:      6(int) Load 8(invocation)
+             236:    235(ptr) AccessChain 34(data) 37 70 38
+             237: 23(int16_t) Load 236
+             238:      6(int) Load 8(invocation)
+             239: 23(int16_t) GroupNonUniformShuffleUp 43 237 238
+             240:    235(ptr) AccessChain 34(data) 234 70 38
+                              Store 240 239
+             241:      6(int) Load 8(invocation)
+             244:    243(ptr) AccessChain 34(data) 47 70
+             245: 24(i16vec4) Load 244
+             246:242(i16vec2) VectorShuffle 245 245 0 1
+             247:      6(int) Load 8(invocation)
+             248:242(i16vec2) GroupNonUniformShuffleUp 43 246 247
+             249:    243(ptr) AccessChain 34(data) 241 70
+             250: 24(i16vec4) Load 249
+             251: 24(i16vec4) VectorShuffle 250 248 4 5 2 3
+                              Store 249 251
+             252:      6(int) Load 8(invocation)
+             254:    243(ptr) AccessChain 34(data) 59 70
+             255: 24(i16vec4) Load 254
+             256:253(i16vec3) VectorShuffle 255 255 0 1 2
+             257:      6(int) Load 8(invocation)
+             258:253(i16vec3) GroupNonUniformShuffleUp 43 256 257
+             259:    243(ptr) AccessChain 34(data) 252 70
+             260: 24(i16vec4) Load 259
+             261: 24(i16vec4) VectorShuffle 260 258 4 5 6 3
+                              Store 259 261
+             262:      6(int) Load 8(invocation)
+             263:    243(ptr) AccessChain 34(data) 70 70
+             264: 24(i16vec4) Load 263
+             265:      6(int) Load 8(invocation)
+             266: 24(i16vec4) GroupNonUniformShuffleUp 43 264 265
+             267:    243(ptr) AccessChain 34(data) 262 70
+                              Store 267 266
+             268:      6(int) Load 8(invocation)
+             269:    235(ptr) AccessChain 34(data) 37 70 38
+             270: 23(int16_t) Load 269
+             271:      6(int) Load 8(invocation)
+             272: 23(int16_t) GroupNonUniformShuffleDown 43 270 271
+             273:    235(ptr) AccessChain 34(data) 268 70 38
+                              Store 273 272
+             274:      6(int) Load 8(invocation)
+             275:    243(ptr) AccessChain 34(data) 47 70
+             276: 24(i16vec4) Load 275
+             277:242(i16vec2) VectorShuffle 276 276 0 1
+             278:      6(int) Load 8(invocation)
+             279:242(i16vec2) GroupNonUniformShuffleDown 43 277 278
+             280:    243(ptr) AccessChain 34(data) 274 70
+             281: 24(i16vec4) Load 280
+             282: 24(i16vec4) VectorShuffle 281 279 4 5 2 3
+                              Store 280 282
+             283:      6(int) Load 8(invocation)
+             284:    243(ptr) AccessChain 34(data) 59 70
+             285: 24(i16vec4) Load 284
+             286:253(i16vec3) VectorShuffle 285 285 0 1 2
+             287:      6(int) Load 8(invocation)
+             288:253(i16vec3) GroupNonUniformShuffleDown 43 286 287
+             289:    243(ptr) AccessChain 34(data) 283 70
+             290: 24(i16vec4) Load 289
+             291: 24(i16vec4) VectorShuffle 290 288 4 5 6 3
+                              Store 289 291
+             292:      6(int) Load 8(invocation)
+             293:    243(ptr) AccessChain 34(data) 70 70
+             294: 24(i16vec4) Load 293
+             295:      6(int) Load 8(invocation)
+             296: 24(i16vec4) GroupNonUniformShuffleDown 43 294 295
+             297:    243(ptr) AccessChain 34(data) 292 70
+                              Store 297 296
+             298:      6(int) Load 8(invocation)
+             301:    300(ptr) AccessChain 34(data) 37 299 38
+             302: 25(int64_t) Load 301
+             303:      6(int) Load 8(invocation)
+             304: 25(int64_t) GroupNonUniformShuffleUp 43 302 303
+             305:    300(ptr) AccessChain 34(data) 298 299 38
+                              Store 305 304
+             306:      6(int) Load 8(invocation)
+             309:    308(ptr) AccessChain 34(data) 47 299
+             310: 26(i64vec4) Load 309
+             311:307(i64vec2) VectorShuffle 310 310 0 1
+             312:      6(int) Load 8(invocation)
+             313:307(i64vec2) GroupNonUniformShuffleUp 43 311 312
+             314:    308(ptr) AccessChain 34(data) 306 299
+             315: 26(i64vec4) Load 314
+             316: 26(i64vec4) VectorShuffle 315 313 4 5 2 3
+                              Store 314 316
+             317:      6(int) Load 8(invocation)
+             319:    308(ptr) AccessChain 34(data) 59 299
+             320: 26(i64vec4) Load 319
+             321:318(i64vec3) VectorShuffle 320 320 0 1 2
+             322:      6(int) Load 8(invocation)
+             323:318(i64vec3) GroupNonUniformShuffleUp 43 321 322
+             324:    308(ptr) AccessChain 34(data) 317 299
+             325: 26(i64vec4) Load 324
+             326: 26(i64vec4) VectorShuffle 325 323 4 5 6 3
+                              Store 324 326
+             327:      6(int) Load 8(invocation)
+             328:    308(ptr) AccessChain 34(data) 70 299
+             329: 26(i64vec4) Load 328
+             330:      6(int) Load 8(invocation)
+             331: 26(i64vec4) GroupNonUniformShuffleUp 43 329 330
+             332:    308(ptr) AccessChain 34(data) 327 299
+                              Store 332 331
+             333:      6(int) Load 8(invocation)
+             334:    300(ptr) AccessChain 34(data) 37 299 38
+             335: 25(int64_t) Load 334
+             336:      6(int) Load 8(invocation)
+             337: 25(int64_t) GroupNonUniformShuffleDown 43 335 336
+             338:    300(ptr) AccessChain 34(data) 333 299 38
+                              Store 338 337
+             339:      6(int) Load 8(invocation)
+             340:    308(ptr) AccessChain 34(data) 47 299
+             341: 26(i64vec4) Load 340
+             342:307(i64vec2) VectorShuffle 341 341 0 1
+             343:      6(int) Load 8(invocation)
+             344:307(i64vec2) GroupNonUniformShuffleDown 43 342 343
+             345:    308(ptr) AccessChain 34(data) 339 299
+             346: 26(i64vec4) Load 345
+             347: 26(i64vec4) VectorShuffle 346 344 4 5 2 3
+                              Store 345 347
+             348:      6(int) Load 8(invocation)
+             349:    308(ptr) AccessChain 34(data) 59 299
+             350: 26(i64vec4) Load 349
+             351:318(i64vec3) VectorShuffle 350 350 0 1 2
+             352:      6(int) Load 8(invocation)
+             353:318(i64vec3) GroupNonUniformShuffleDown 43 351 352
+             354:    308(ptr) AccessChain 34(data) 348 299
+             355: 26(i64vec4) Load 354
+             356: 26(i64vec4) VectorShuffle 355 353 4 5 6 3
+                              Store 354 356
+             357:      6(int) Load 8(invocation)
+             358:    308(ptr) AccessChain 34(data) 70 299
+             359: 26(i64vec4) Load 358
+             360:      6(int) Load 8(invocation)
+             361: 26(i64vec4) GroupNonUniformShuffleDown 43 359 360
+             362:    308(ptr) AccessChain 34(data) 357 299
+                              Store 362 361
+             363:      6(int) Load 8(invocation)
+             366:    365(ptr) AccessChain 34(data) 37 364 38
+             367: 27(int64_t) Load 366
+             368:      6(int) Load 8(invocation)
+             369: 27(int64_t) GroupNonUniformShuffleUp 43 367 368
+             370:    365(ptr) AccessChain 34(data) 363 364 38
+                              Store 370 369
+             371:      6(int) Load 8(invocation)
+             374:    373(ptr) AccessChain 34(data) 47 364
+             375: 28(i64vec4) Load 374
+             376:372(i64vec2) VectorShuffle 375 375 0 1
+             377:      6(int) Load 8(invocation)
+             378:372(i64vec2) GroupNonUniformShuffleUp 43 376 377
+             379:    373(ptr) AccessChain 34(data) 371 364
+             380: 28(i64vec4) Load 379
+             381: 28(i64vec4) VectorShuffle 380 378 4 5 2 3
+                              Store 379 381
+             382:      6(int) Load 8(invocation)
+             384:    373(ptr) AccessChain 34(data) 59 364
+             385: 28(i64vec4) Load 384
+             386:383(i64vec3) VectorShuffle 385 385 0 1 2
+             387:      6(int) Load 8(invocation)
+             388:383(i64vec3) GroupNonUniformShuffleUp 43 386 387
+             389:    373(ptr) AccessChain 34(data) 382 364
+             390: 28(i64vec4) Load 389
+             391: 28(i64vec4) VectorShuffle 390 388 4 5 6 3
+                              Store 389 391
+             392:      6(int) Load 8(invocation)
+             393:    373(ptr) AccessChain 34(data) 70 364
+             394: 28(i64vec4) Load 393
+             395:      6(int) Load 8(invocation)
+             396: 28(i64vec4) GroupNonUniformShuffleUp 43 394 395
+             397:    373(ptr) AccessChain 34(data) 392 364
+                              Store 397 396
+             398:      6(int) Load 8(invocation)
+             399:    365(ptr) AccessChain 34(data) 37 364 38
+             400: 27(int64_t) Load 399
+             401:      6(int) Load 8(invocation)
+             402: 27(int64_t) GroupNonUniformShuffleDown 43 400 401
+             403:    365(ptr) AccessChain 34(data) 398 364 38
+                              Store 403 402
+             404:      6(int) Load 8(invocation)
+             405:    373(ptr) AccessChain 34(data) 47 364
+             406: 28(i64vec4) Load 405
+             407:372(i64vec2) VectorShuffle 406 406 0 1
+             408:      6(int) Load 8(invocation)
+             409:372(i64vec2) GroupNonUniformShuffleDown 43 407 408
+             410:    373(ptr) AccessChain 34(data) 404 364
+             411: 28(i64vec4) Load 410
+             412: 28(i64vec4) VectorShuffle 411 409 4 5 2 3
+                              Store 410 412
+             413:      6(int) Load 8(invocation)
+             414:    373(ptr) AccessChain 34(data) 59 364
+             415: 28(i64vec4) Load 414
+             416:383(i64vec3) VectorShuffle 415 415 0 1 2
+             417:      6(int) Load 8(invocation)
+             418:383(i64vec3) GroupNonUniformShuffleDown 43 416 417
+             419:    373(ptr) AccessChain 34(data) 413 364
+             420: 28(i64vec4) Load 419
+             421: 28(i64vec4) VectorShuffle 420 418 4 5 6 3
+                              Store 419 421
+             422:      6(int) Load 8(invocation)
+             423:    373(ptr) AccessChain 34(data) 70 364
+             424: 28(i64vec4) Load 423
+             425:      6(int) Load 8(invocation)
+             426: 28(i64vec4) GroupNonUniformShuffleDown 43 424 425
+             427:    373(ptr) AccessChain 34(data) 422 364
+                              Store 427 426
+             428:      6(int) Load 8(invocation)
+             431:    430(ptr) AccessChain 34(data) 37 429 38
+             432:29(float16_t) Load 431
+             433:      6(int) Load 8(invocation)
+             434:29(float16_t) GroupNonUniformShuffleUp 43 432 433
+             435:    430(ptr) AccessChain 34(data) 428 429 38
+                              Store 435 434
+             436:      6(int) Load 8(invocation)
+             439:    438(ptr) AccessChain 34(data) 47 429
+             440: 30(f16vec4) Load 439
+             441:437(f16vec2) VectorShuffle 440 440 0 1
+             442:      6(int) Load 8(invocation)
+             443:437(f16vec2) GroupNonUniformShuffleUp 43 441 442
+             444:    438(ptr) AccessChain 34(data) 436 429
+             445: 30(f16vec4) Load 444
+             446: 30(f16vec4) VectorShuffle 445 443 4 5 2 3
+                              Store 444 446
+             447:      6(int) Load 8(invocation)
+             449:    438(ptr) AccessChain 34(data) 59 429
+             450: 30(f16vec4) Load 449
+             451:448(f16vec3) VectorShuffle 450 450 0 1 2
+             452:      6(int) Load 8(invocation)
+             453:448(f16vec3) GroupNonUniformShuffleUp 43 451 452
+             454:    438(ptr) AccessChain 34(data) 447 429
+             455: 30(f16vec4) Load 454
+             456: 30(f16vec4) VectorShuffle 455 453 4 5 6 3
+                              Store 454 456
+             457:      6(int) Load 8(invocation)
+             458:    438(ptr) AccessChain 34(data) 70 429
+             459: 30(f16vec4) Load 458
+             460:      6(int) Load 8(invocation)
+             461: 30(f16vec4) GroupNonUniformShuffleUp 43 459 460
+             462:    438(ptr) AccessChain 34(data) 457 429
+                              Store 462 461
+             463:      6(int) Load 8(invocation)
+             464:    430(ptr) AccessChain 34(data) 37 429 38
+             465:29(float16_t) Load 464
+             466:      6(int) Load 8(invocation)
+             467:29(float16_t) GroupNonUniformShuffleDown 43 465 466
+             468:    430(ptr) AccessChain 34(data) 463 429 38
+                              Store 468 467
+             469:      6(int) Load 8(invocation)
+             470:    438(ptr) AccessChain 34(data) 47 429
+             471: 30(f16vec4) Load 470
+             472:437(f16vec2) VectorShuffle 471 471 0 1
+             473:      6(int) Load 8(invocation)
+             474:437(f16vec2) GroupNonUniformShuffleDown 43 472 473
+             475:    438(ptr) AccessChain 34(data) 469 429
+             476: 30(f16vec4) Load 475
+             477: 30(f16vec4) VectorShuffle 476 474 4 5 2 3
+                              Store 475 477
+             478:      6(int) Load 8(invocation)
+             479:    438(ptr) AccessChain 34(data) 59 429
+             480: 30(f16vec4) Load 479
+             481:448(f16vec3) VectorShuffle 480 480 0 1 2
+             482:      6(int) Load 8(invocation)
+             483:448(f16vec3) GroupNonUniformShuffleDown 43 481 482
+             484:    438(ptr) AccessChain 34(data) 478 429
+             485: 30(f16vec4) Load 484
+             486: 30(f16vec4) VectorShuffle 485 483 4 5 6 3
+                              Store 484 486
+             487:      6(int) Load 8(invocation)
+             488:    438(ptr) AccessChain 34(data) 70 429
+             489: 30(f16vec4) Load 488
+             490:      6(int) Load 8(invocation)
+             491: 30(f16vec4) GroupNonUniformShuffleDown 43 489 490
+             492:    438(ptr) AccessChain 34(data) 487 429
+                              Store 492 491
+                              Return
+                              FunctionEnd
diff --git a/Test/baseResults/spv.subgroupExtendedTypesShuffleRelativeNeg.comp.out b/Test/baseResults/spv.subgroupExtendedTypesShuffleRelativeNeg.comp.out
new file mode 100644
index 0000000..a043715
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesShuffleRelativeNeg.comp.out
@@ -0,0 +1,61 @@
+spv.subgroupExtendedTypesShuffleRelativeNeg.comp
+ERROR: 0:26: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:27: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:28: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:29: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:31: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:32: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:33: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:34: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:36: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:37: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:38: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:39: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:41: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:42: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:43: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:44: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:46: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:47: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:48: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:49: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:51: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:52: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:53: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:54: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:56: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:57: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:58: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:59: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:61: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:62: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:63: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:64: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:66: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:67: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:68: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:69: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:71: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:72: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:73: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:74: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:76: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:77: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:78: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:79: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:81: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:82: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:83: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:84: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:86: ' temp highp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:87: ' temp highp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:88: ' temp highp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:89: 'layout( column_major std430) buffer highp 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:91: ' temp highp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:92: ' temp highp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:93: ' temp highp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:94: 'layout( column_major std430) buffer highp 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 56 compilation errors.  No code generated.
+
+
+SPIR-V is not generated for failed compile or link
diff --git a/Test/baseResults/spv.subgroupExtendedTypesVote.comp.out b/Test/baseResults/spv.subgroupExtendedTypesVote.comp.out
new file mode 100644
index 0000000..a32c25d
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesVote.comp.out
@@ -0,0 +1,377 @@
+spv.subgroupExtendedTypesVote.comp
+// Module Version 10300
+// Generated by (magic number): 80007
+// Id's are bound by 277
+
+                              Capability Shader
+                              Capability Float16
+                              Capability Int64
+                              Capability Int16
+                              Capability Int8
+                              Capability GroupNonUniform
+                              Capability GroupNonUniformVote
+                              Capability StorageUniformBufferBlock16
+                              Capability StorageBuffer8BitAccess
+                              Extension  "SPV_KHR_8bit_storage"
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint GLCompute 4  "main" 10 12
+                              ExecutionMode 4 LocalSize 8 1 1
+                              Source GLSL 450
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_float16"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int16"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int64"
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_int8"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_float16"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int16"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int64"
+                              SourceExtension  "GL_EXT_shader_subgroup_extended_types_int8"
+                              SourceExtension  "GL_KHR_shader_subgroup_basic"
+                              SourceExtension  "GL_KHR_shader_subgroup_vote"
+                              Name 4  "main"
+                              Name 8  "invocation"
+                              Name 10  "gl_SubgroupInvocationID"
+                              Name 12  "gl_SubgroupSize"
+                              Name 32  "Buffers"
+                              MemberName 32(Buffers) 0  "i8"
+                              MemberName 32(Buffers) 1  "u8"
+                              MemberName 32(Buffers) 2  "i16"
+                              MemberName 32(Buffers) 3  "u16"
+                              MemberName 32(Buffers) 4  "i64"
+                              MemberName 32(Buffers) 5  "u64"
+                              MemberName 32(Buffers) 6  "f16"
+                              MemberName 32(Buffers) 7  "r"
+                              Name 35  "data"
+                              Decorate 10(gl_SubgroupInvocationID) RelaxedPrecision
+                              Decorate 10(gl_SubgroupInvocationID) BuiltIn SubgroupLocalInvocationId
+                              Decorate 11 RelaxedPrecision
+                              Decorate 12(gl_SubgroupSize) RelaxedPrecision
+                              Decorate 12(gl_SubgroupSize) BuiltIn SubgroupSize
+                              Decorate 13 RelaxedPrecision
+                              Decorate 14 RelaxedPrecision
+                              Decorate 16 RelaxedPrecision
+                              MemberDecorate 32(Buffers) 0 Offset 0
+                              MemberDecorate 32(Buffers) 1 Offset 4
+                              MemberDecorate 32(Buffers) 2 Offset 8
+                              MemberDecorate 32(Buffers) 3 Offset 16
+                              MemberDecorate 32(Buffers) 4 Offset 32
+                              MemberDecorate 32(Buffers) 5 Offset 64
+                              MemberDecorate 32(Buffers) 6 Offset 96
+                              MemberDecorate 32(Buffers) 7 Offset 104
+                              Decorate 32(Buffers) Block
+                              Decorate 35(data) DescriptorSet 0
+                              Decorate 35(data) Binding 0
+                              Decorate 276 BuiltIn WorkgroupSize
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeInt 32 0
+               7:             TypePointer Function 6(int)
+               9:             TypePointer Input 6(int)
+10(gl_SubgroupInvocationID):      9(ptr) Variable Input
+12(gl_SubgroupSize):      9(ptr) Variable Input
+              15:      6(int) Constant 4
+              17:             TypeInt 8 1
+              18:             TypeVector 17(int8_t) 4
+              19:             TypeInt 8 0
+              20:             TypeVector 19(int8_t) 4
+              21:             TypeInt 16 1
+              22:             TypeVector 21(int16_t) 4
+              23:             TypeInt 16 0
+              24:             TypeVector 23(int16_t) 4
+              25:             TypeInt 64 1
+              26:             TypeVector 25(int64_t) 4
+              27:             TypeInt 64 0
+              28:             TypeVector 27(int64_t) 4
+              29:             TypeFloat 16
+              30:             TypeVector 29(float16_t) 4
+              31:             TypeInt 32 1
+     32(Buffers):             TypeStruct 18(i8vec4) 20(i8vec4) 22(i16vec4) 24(i16vec4) 26(i64vec4) 28(i64vec4) 30(f16vec4) 31(int)
+              33:             TypeArray 32(Buffers) 15
+              34:             TypePointer StorageBuffer 33
+        35(data):     34(ptr) Variable StorageBuffer
+              37:     31(int) Constant 7
+              38:             TypePointer StorageBuffer 31(int)
+              41:     31(int) Constant 0
+              42:             TypeBool
+              44:      6(int) Constant 3
+              49:      6(int) Constant 0
+              50:             TypePointer StorageBuffer 17(int8_t)
+              54:     31(int) Constant 1
+              58:             TypeVector 17(int8_t) 2
+              59:             TypePointer StorageBuffer 18(i8vec4)
+              67:     31(int) Constant 2
+              68:             TypeVector 17(int8_t) 3
+              76:     31(int) Constant 3
+              83:             TypePointer StorageBuffer 19(int8_t)
+              90:             TypeVector 19(int8_t) 2
+              91:             TypePointer StorageBuffer 20(i8vec4)
+              99:             TypeVector 19(int8_t) 3
+             113:             TypePointer StorageBuffer 21(int16_t)
+             120:             TypeVector 21(int16_t) 2
+             121:             TypePointer StorageBuffer 22(i16vec4)
+             129:             TypeVector 21(int16_t) 3
+             143:             TypePointer StorageBuffer 23(int16_t)
+             150:             TypeVector 23(int16_t) 2
+             151:             TypePointer StorageBuffer 24(i16vec4)
+             159:             TypeVector 23(int16_t) 3
+             181:     31(int) Constant 4
+             182:             TypePointer StorageBuffer 25(int64_t)
+             189:             TypeVector 25(int64_t) 2
+             190:             TypePointer StorageBuffer 26(i64vec4)
+             198:             TypeVector 25(int64_t) 3
+             212:     31(int) Constant 5
+             213:             TypePointer StorageBuffer 27(int64_t)
+             220:             TypeVector 27(int64_t) 2
+             221:             TypePointer StorageBuffer 28(i64vec4)
+             229:             TypeVector 27(int64_t) 3
+             243:     31(int) Constant 6
+             244:             TypePointer StorageBuffer 29(float16_t)
+             251:             TypeVector 29(float16_t) 2
+             252:             TypePointer StorageBuffer 30(f16vec4)
+             260:             TypeVector 29(float16_t) 3
+             273:             TypeVector 6(int) 3
+             274:      6(int) Constant 8
+             275:      6(int) Constant 1
+             276:  273(ivec3) ConstantComposite 274 275 275
+         4(main):           2 Function None 3
+               5:             Label
+   8(invocation):      7(ptr) Variable Function
+              11:      6(int) Load 10(gl_SubgroupInvocationID)
+              13:      6(int) Load 12(gl_SubgroupSize)
+              14:      6(int) IAdd 11 13
+              16:      6(int) UMod 14 15
+                              Store 8(invocation) 16
+              36:      6(int) Load 8(invocation)
+              39:     38(ptr) AccessChain 35(data) 36 37
+              40:     31(int) Load 39
+              43:    42(bool) SLessThan 40 41
+              45:    42(bool) GroupNonUniformAll 44 43
+                              SelectionMerge 47 None
+                              BranchConditional 45 46 172
+              46:               Label
+              48:      6(int)   Load 8(invocation)
+              51:     50(ptr)   AccessChain 35(data) 41 41 49
+              52:  17(int8_t)   Load 51
+              53:    42(bool)   GroupNonUniformAllEqual 44 52
+              55:     31(int)   Select 53 54 41
+              56:     38(ptr)   AccessChain 35(data) 48 37
+                                Store 56 55
+              57:      6(int)   Load 8(invocation)
+              60:     59(ptr)   AccessChain 35(data) 54 41
+              61:  18(i8vec4)   Load 60
+              62:  58(i8vec2)   VectorShuffle 61 61 0 1
+              63:    42(bool)   GroupNonUniformAllEqual 44 62
+              64:     31(int)   Select 63 54 41
+              65:     38(ptr)   AccessChain 35(data) 57 37
+                                Store 65 64
+              66:      6(int)   Load 8(invocation)
+              69:     59(ptr)   AccessChain 35(data) 67 41
+              70:  18(i8vec4)   Load 69
+              71:  68(i8vec3)   VectorShuffle 70 70 0 1 2
+              72:    42(bool)   GroupNonUniformAllEqual 44 71
+              73:     31(int)   Select 72 54 41
+              74:     38(ptr)   AccessChain 35(data) 66 37
+                                Store 74 73
+              75:      6(int)   Load 8(invocation)
+              77:     59(ptr)   AccessChain 35(data) 76 41
+              78:  18(i8vec4)   Load 77
+              79:    42(bool)   GroupNonUniformAllEqual 44 78
+              80:     31(int)   Select 79 54 41
+              81:     38(ptr)   AccessChain 35(data) 75 37
+                                Store 81 80
+              82:      6(int)   Load 8(invocation)
+              84:     83(ptr)   AccessChain 35(data) 41 54 49
+              85:  19(int8_t)   Load 84
+              86:    42(bool)   GroupNonUniformAllEqual 44 85
+              87:     31(int)   Select 86 54 41
+              88:     38(ptr)   AccessChain 35(data) 82 37
+                                Store 88 87
+              89:      6(int)   Load 8(invocation)
+              92:     91(ptr)   AccessChain 35(data) 54 54
+              93:  20(i8vec4)   Load 92
+              94:  90(i8vec2)   VectorShuffle 93 93 0 1
+              95:    42(bool)   GroupNonUniformAllEqual 44 94
+              96:     31(int)   Select 95 54 41
+              97:     38(ptr)   AccessChain 35(data) 89 37
+                                Store 97 96
+              98:      6(int)   Load 8(invocation)
+             100:     91(ptr)   AccessChain 35(data) 67 54
+             101:  20(i8vec4)   Load 100
+             102:  99(i8vec3)   VectorShuffle 101 101 0 1 2
+             103:    42(bool)   GroupNonUniformAllEqual 44 102
+             104:     31(int)   Select 103 54 41
+             105:     38(ptr)   AccessChain 35(data) 98 37
+                                Store 105 104
+             106:      6(int)   Load 8(invocation)
+             107:     91(ptr)   AccessChain 35(data) 76 54
+             108:  20(i8vec4)   Load 107
+             109:    42(bool)   GroupNonUniformAllEqual 44 108
+             110:     31(int)   Select 109 54 41
+             111:     38(ptr)   AccessChain 35(data) 106 37
+                                Store 111 110
+             112:      6(int)   Load 8(invocation)
+             114:    113(ptr)   AccessChain 35(data) 41 67 49
+             115: 21(int16_t)   Load 114
+             116:    42(bool)   GroupNonUniformAllEqual 44 115
+             117:     31(int)   Select 116 54 41
+             118:     38(ptr)   AccessChain 35(data) 112 37
+                                Store 118 117
+             119:      6(int)   Load 8(invocation)
+             122:    121(ptr)   AccessChain 35(data) 54 67
+             123: 22(i16vec4)   Load 122
+             124:120(i16vec2)   VectorShuffle 123 123 0 1
+             125:    42(bool)   GroupNonUniformAllEqual 44 124
+             126:     31(int)   Select 125 54 41
+             127:     38(ptr)   AccessChain 35(data) 119 37
+                                Store 127 126
+             128:      6(int)   Load 8(invocation)
+             130:    121(ptr)   AccessChain 35(data) 67 67
+             131: 22(i16vec4)   Load 130
+             132:129(i16vec3)   VectorShuffle 131 131 0 1 2
+             133:    42(bool)   GroupNonUniformAllEqual 44 132
+             134:     31(int)   Select 133 54 41
+             135:     38(ptr)   AccessChain 35(data) 128 37
+                                Store 135 134
+             136:      6(int)   Load 8(invocation)
+             137:    121(ptr)   AccessChain 35(data) 76 67
+             138: 22(i16vec4)   Load 137
+             139:    42(bool)   GroupNonUniformAllEqual 44 138
+             140:     31(int)   Select 139 54 41
+             141:     38(ptr)   AccessChain 35(data) 136 37
+                                Store 141 140
+             142:      6(int)   Load 8(invocation)
+             144:    143(ptr)   AccessChain 35(data) 41 76 49
+             145: 23(int16_t)   Load 144
+             146:    42(bool)   GroupNonUniformAllEqual 44 145
+             147:     31(int)   Select 146 54 41
+             148:     38(ptr)   AccessChain 35(data) 142 37
+                                Store 148 147
+             149:      6(int)   Load 8(invocation)
+             152:    151(ptr)   AccessChain 35(data) 54 76
+             153: 24(i16vec4)   Load 152
+             154:150(i16vec2)   VectorShuffle 153 153 0 1
+             155:    42(bool)   GroupNonUniformAllEqual 44 154
+             156:     31(int)   Select 155 54 41
+             157:     38(ptr)   AccessChain 35(data) 149 37
+                                Store 157 156
+             158:      6(int)   Load 8(invocation)
+             160:    151(ptr)   AccessChain 35(data) 67 76
+             161: 24(i16vec4)   Load 160
+             162:159(i16vec3)   VectorShuffle 161 161 0 1 2
+             163:    42(bool)   GroupNonUniformAllEqual 44 162
+             164:     31(int)   Select 163 54 41
+             165:     38(ptr)   AccessChain 35(data) 158 37
+                                Store 165 164
+             166:      6(int)   Load 8(invocation)
+             167:    151(ptr)   AccessChain 35(data) 76 76
+             168: 24(i16vec4)   Load 167
+             169:    42(bool)   GroupNonUniformAllEqual 44 168
+             170:     31(int)   Select 169 54 41
+             171:     38(ptr)   AccessChain 35(data) 166 37
+                                Store 171 170
+                                Branch 47
+             172:               Label
+             173:      6(int)   Load 8(invocation)
+             174:     38(ptr)   AccessChain 35(data) 173 37
+             175:     31(int)   Load 174
+             176:    42(bool)   SLessThan 175 41
+             177:    42(bool)   GroupNonUniformAny 44 176
+                                SelectionMerge 179 None
+                                BranchConditional 177 178 179
+             178:                 Label
+             180:      6(int)     Load 8(invocation)
+             183:    182(ptr)     AccessChain 35(data) 41 181 49
+             184: 25(int64_t)     Load 183
+             185:    42(bool)     GroupNonUniformAllEqual 44 184
+             186:     31(int)     Select 185 54 41
+             187:     38(ptr)     AccessChain 35(data) 180 37
+                                  Store 187 186
+             188:      6(int)     Load 8(invocation)
+             191:    190(ptr)     AccessChain 35(data) 54 181
+             192: 26(i64vec4)     Load 191
+             193:189(i64vec2)     VectorShuffle 192 192 0 1
+             194:    42(bool)     GroupNonUniformAllEqual 44 193
+             195:     31(int)     Select 194 54 41
+             196:     38(ptr)     AccessChain 35(data) 188 37
+                                  Store 196 195
+             197:      6(int)     Load 8(invocation)
+             199:    190(ptr)     AccessChain 35(data) 67 181
+             200: 26(i64vec4)     Load 199
+             201:198(i64vec3)     VectorShuffle 200 200 0 1 2
+             202:    42(bool)     GroupNonUniformAllEqual 44 201
+             203:     31(int)     Select 202 54 41
+             204:     38(ptr)     AccessChain 35(data) 197 37
+                                  Store 204 203
+             205:      6(int)     Load 8(invocation)
+             206:    190(ptr)     AccessChain 35(data) 76 181
+             207: 26(i64vec4)     Load 206
+             208:    42(bool)     GroupNonUniformAllEqual 44 207
+             209:     31(int)     Select 208 54 41
+             210:     38(ptr)     AccessChain 35(data) 205 37
+                                  Store 210 209
+             211:      6(int)     Load 8(invocation)
+             214:    213(ptr)     AccessChain 35(data) 41 212 49
+             215: 27(int64_t)     Load 214
+             216:    42(bool)     GroupNonUniformAllEqual 44 215
+             217:     31(int)     Select 216 54 41
+             218:     38(ptr)     AccessChain 35(data) 211 37
+                                  Store 218 217
+             219:      6(int)     Load 8(invocation)
+             222:    221(ptr)     AccessChain 35(data) 54 212
+             223: 28(i64vec4)     Load 222
+             224:220(i64vec2)     VectorShuffle 223 223 0 1
+             225:    42(bool)     GroupNonUniformAllEqual 44 224
+             226:     31(int)     Select 225 54 41
+             227:     38(ptr)     AccessChain 35(data) 219 37
+                                  Store 227 226
+             228:      6(int)     Load 8(invocation)
+             230:    221(ptr)     AccessChain 35(data) 67 212
+             231: 28(i64vec4)     Load 230
+             232:229(i64vec3)     VectorShuffle 231 231 0 1 2
+             233:    42(bool)     GroupNonUniformAllEqual 44 232
+             234:     31(int)     Select 233 54 41
+             235:     38(ptr)     AccessChain 35(data) 228 37
+                                  Store 235 234
+             236:      6(int)     Load 8(invocation)
+             237:    221(ptr)     AccessChain 35(data) 76 212
+             238: 28(i64vec4)     Load 237
+             239:    42(bool)     GroupNonUniformAllEqual 44 238
+             240:     31(int)     Select 239 54 41
+             241:     38(ptr)     AccessChain 35(data) 236 37
+                                  Store 241 240
+             242:      6(int)     Load 8(invocation)
+             245:    244(ptr)     AccessChain 35(data) 41 243 49
+             246:29(float16_t)     Load 245
+             247:    42(bool)     GroupNonUniformAllEqual 44 246
+             248:     31(int)     Select 247 54 41
+             249:     38(ptr)     AccessChain 35(data) 242 37
+                                  Store 249 248
+             250:      6(int)     Load 8(invocation)
+             253:    252(ptr)     AccessChain 35(data) 54 243
+             254: 30(f16vec4)     Load 253
+             255:251(f16vec2)     VectorShuffle 254 254 0 1
+             256:    42(bool)     GroupNonUniformAllEqual 44 255
+             257:     31(int)     Select 256 54 41
+             258:     38(ptr)     AccessChain 35(data) 250 37
+                                  Store 258 257
+             259:      6(int)     Load 8(invocation)
+             261:    252(ptr)     AccessChain 35(data) 67 243
+             262: 30(f16vec4)     Load 261
+             263:260(f16vec3)     VectorShuffle 262 262 0 1 2
+             264:    42(bool)     GroupNonUniformAllEqual 44 263
+             265:     31(int)     Select 264 54 41
+             266:     38(ptr)     AccessChain 35(data) 259 37
+                                  Store 266 265
+             267:      6(int)     Load 8(invocation)
+             268:    252(ptr)     AccessChain 35(data) 76 243
+             269: 30(f16vec4)     Load 268
+             270:    42(bool)     GroupNonUniformAllEqual 44 269
+             271:     31(int)     Select 270 54 41
+             272:     38(ptr)     AccessChain 35(data) 267 37
+                                  Store 272 271
+                                  Branch 179
+             179:               Label
+                                Branch 47
+              47:             Label
+                              Return
+                              FunctionEnd
diff --git a/Test/baseResults/spv.subgroupExtendedTypesVoteNeg.comp.out b/Test/baseResults/spv.subgroupExtendedTypesVoteNeg.comp.out
new file mode 100644
index 0000000..2197d41
--- /dev/null
+++ b/Test/baseResults/spv.subgroupExtendedTypesVoteNeg.comp.out
@@ -0,0 +1,33 @@
+spv.subgroupExtendedTypesVoteNeg.comp
+ERROR: 0:29: ' temp int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:30: ' temp 2-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:31: ' temp 3-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:32: 'layout( column_major std430) buffer 4-component vector of int8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:34: ' temp uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:35: ' temp 2-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:36: ' temp 3-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:37: 'layout( column_major std430) buffer 4-component vector of uint8_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int8
+ERROR: 0:39: ' temp int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:40: ' temp 2-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:41: ' temp 3-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:42: 'layout( column_major std430) buffer 4-component vector of int16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:44: ' temp uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:45: ' temp 2-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:46: ' temp 3-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:47: 'layout( column_major std430) buffer 4-component vector of uint16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int16
+ERROR: 0:51: ' temp int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:52: ' temp 2-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:53: ' temp 3-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:54: 'layout( column_major std430) buffer 4-component vector of int64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:56: ' temp uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:57: ' temp 2-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:58: ' temp 3-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:59: 'layout( column_major std430) buffer 4-component vector of uint64_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_int64
+ERROR: 0:61: ' temp float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:62: ' temp 2-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:63: ' temp 3-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 0:64: 'layout( column_major std430) buffer 4-component vector of float16_t' : required extension not requested: GL_EXT_shader_subgroup_extended_types_float16
+ERROR: 28 compilation errors.  No code generated.
+
+
+SPIR-V is not generated for failed compile or link
diff --git a/Test/baseResults/spv.volatileAtomic.comp.out b/Test/baseResults/spv.volatileAtomic.comp.out
new file mode 100644
index 0000000..aa2d211
--- /dev/null
+++ b/Test/baseResults/spv.volatileAtomic.comp.out
@@ -0,0 +1,40 @@
+spv.volatileAtomic.comp
+// Module Version 10000
+// Generated by (magic number): 80007
+// Id's are bound by 18
+
+                              Capability Shader
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint GLCompute 4  "main"
+                              ExecutionMode 4 LocalSize 1 1 1
+                              Source GLSL 450
+                              Name 4  "main"
+                              Name 8  "D"
+                              MemberName 8(D) 0  "d"
+                              Name 10  "d"
+                              Decorate 7 ArrayStride 4
+                              MemberDecorate 8(D) 0 Volatile
+                              MemberDecorate 8(D) 0 Coherent
+                              MemberDecorate 8(D) 0 Offset 0
+                              Decorate 8(D) BufferBlock
+                              Decorate 10(d) DescriptorSet 0
+                              Decorate 10(d) Binding 3
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeInt 32 0
+               7:             TypeRuntimeArray 6(int)
+            8(D):             TypeStruct 7
+               9:             TypePointer Uniform 8(D)
+           10(d):      9(ptr) Variable Uniform
+              11:             TypeInt 32 1
+              12:     11(int) Constant 0
+              13:             TypePointer Uniform 6(int)
+              15:      6(int) Constant 0
+              16:      6(int) Constant 1
+         4(main):           2 Function None 3
+               5:             Label
+              14:     13(ptr) AccessChain 10(d) 12 12
+              17:      6(int) AtomicExchange 14 16 15 15
+                              Return
+                              FunctionEnd
diff --git a/Test/glsl.450.subgroup.frag b/Test/glsl.450.subgroup.frag
index 61cfc8f..d0b9573 100644
--- a/Test/glsl.450.subgroup.frag
+++ b/Test/glsl.450.subgroup.frag
@@ -114,12 +114,14 @@
 
 #extension GL_KHR_shader_subgroup_ballot: enable
 void ballot_works(vec4 f4) {
+  int i;
   gl_SubgroupEqMask;
   gl_SubgroupGeMask;
   gl_SubgroupGtMask;
   gl_SubgroupLeMask;
   gl_SubgroupLtMask;
   subgroupBroadcast(f4, 0);
+  subgroupBroadcast(f4, i);
   subgroupBroadcastFirst(f4);
   uvec4 ballot = subgroupBallot(false);
   subgroupInverseBallot(uvec4(0x1));
@@ -192,7 +194,9 @@
 #extension GL_KHR_shader_subgroup_quad: enable
 void quad_works(vec4 f4)
 {
+  int i;
   subgroupQuadBroadcast(f4, 0);
+  subgroupQuadBroadcast(f4, i);
   subgroupQuadSwapHorizontal(f4);
   subgroupQuadSwapVertical(f4);
   subgroupQuadSwapDiagonal(f4);
diff --git a/Test/hlsl.format.rwtexture.frag b/Test/hlsl.format.rwtexture.frag
new file mode 100644
index 0000000..87ee7de
--- /dev/null
+++ b/Test/hlsl.format.rwtexture.frag
@@ -0,0 +1,63 @@
+SamplerState       g_sSamp : register(s0);
+
+[[spv::format_rgba32f]]    RWTexture1D <float4> g_tTex1df4 : register(t0);
+[[spv::format_rg32f]]      RWTexture1D <int4>   g_tTex1di4;
+[[spv::format_rgba8snorm]] RWTexture1D <uint4>  g_tTex1du4;
+
+[[spv::format_rgba8i]]       RWTexture2D <float4> g_tTex2df4;
+[[spv::format_r11fg11fb10f]] RWTexture2D <int4>   g_tTex2di4;
+[[spv::format_r8snorm]]      RWTexture2D <uint4>  g_tTex2du4;
+
+[[spv::format_rg8]] [[spv::nonwritable]]     RWTexture3D <float4> g_tTex3df4;
+[[spv::format_rgba16i]] [[spv::nonreadable]] RWTexture3D <int4>   g_tTex3di4;
+[[spv::format_r8i]] [[spv::nonwritable]] [[spv::nonreadable]] RWTexture3D <uint4>  g_tTex3du4;
+
+[[spv::format_rgba8ui]] RWTexture1DArray <float4> g_tTex1df4a;
+[[spv::format_rg32ui]]  RWTexture1DArray <int4>   g_tTex1di4a;
+[[spv::format_r16ui]]   RWTexture1DArray <uint4>  g_tTex1du4a;
+
+[[spv::format_rgb10a2ui]] RWTexture2DArray <float4> g_tTex2df4a;
+[[spv::format_r8ui]]      RWTexture2DArray <int4>   g_tTex2di4a;
+[[spv::format_rgba16f]]   RWTexture2DArray <uint4>  g_tTex2du4a;
+
+[[spv::format_rgba8         ]] RWTexture2DArray<int4> g_tTex01;
+[[spv::format_rg16f         ]] RWTexture2DArray<int4> g_tTex02;
+[[spv::format_r16f          ]] RWTexture2DArray<int4> g_tTex03;
+[[spv::format_rgb10a2       ]] RWTexture2DArray<int4> g_tTex04;
+[[spv::format_rg16          ]] RWTexture2DArray<int4> g_tTex05;
+[[spv::format_r32f          ]] RWTexture2DArray<int4> g_tTex06;
+[[spv::format_rgba16        ]] RWTexture2DArray<int4> g_tTex07;
+[[spv::format_r16           ]] RWTexture2DArray<int4> g_tTex08;
+[[spv::format_r8            ]] RWTexture2DArray<int4> g_tTex09;
+[[spv::format_rgba16snorm   ]] RWTexture2DArray<int4> g_tTex10;
+[[spv::format_rg16snorm     ]] RWTexture2DArray<int4> g_tTex11;
+[[spv::format_r16snorm      ]] RWTexture2DArray<int4> g_tTex12;
+[[spv::format_r8snorm       ]] RWTexture2DArray<int4> g_tTex13;
+[[spv::format_rgba32i       ]] RWTexture2DArray<int4> g_tTex14;
+[[spv::format_r32i          ]] RWTexture2DArray<int4> g_tTex15;
+[[spv::format_r32ui         ]] RWTexture2DArray<int4> g_tTex16;
+[[spv::format_rg16i         ]] RWTexture2DArray<int4> g_tTex17;
+[[spv::format_r16i          ]] RWTexture2DArray<int4> g_tTex18;
+[[spv::format_rg32i         ]] RWTexture2DArray<int4> g_tTex19;
+[[spv::format_rg8i          ]] RWTexture2DArray<int4> g_tTex20;
+[[spv::format_rg8ui         ]] RWTexture2DArray<int4> g_tTex21;
+[[spv::format_rgba32ui      ]] RWTexture2DArray<int4> g_tTex22;
+[[spv::format_rgba16ui      ]] RWTexture2DArray<int4> g_tTex23;
+[[spv::format_rg32ui        ]] RWTexture2DArray<int4> g_tTex24;
+[[spv::format_rg16ui        ]] RWTexture2DArray<int4> g_tTex25;
+
+struct PS_OUTPUT
+{
+    float4 Color : SV_Target0;
+    float  Depth : SV_Depth;
+};
+
+PS_OUTPUT main()
+{
+   PS_OUTPUT psout;
+
+   psout.Color = 1.0;
+   psout.Depth = 1.0;
+
+   return psout;
+}
diff --git a/Test/nonuniform.frag b/Test/nonuniform.frag
index 3f3dd67..e98aacc 100644
--- a/Test/nonuniform.frag
+++ b/Test/nonuniform.frag
@@ -22,12 +22,12 @@
     nonuniformEXT const int nu_ci = 2; // ERROR, const
 
     foo(nu_li, nu_li);
-
+    int table[5];
     int a;
     nu_li = nonuniformEXT(a) + nonuniformEXT(a * 2);
     nu_li = nonuniformEXT(a, a);       // ERROR, too many arguments
     nu_li = nonuniformEXT();           // ERROR, no arguments
+    nu_li = table[nonuniformEXT(3)];
 }
-
 layout(location=1) in struct S { float a; nonuniformEXT float b; } ins;  // ERROR, not on member
 layout(location=3) in inbName { float a; nonuniformEXT float b; } inb;   // ERROR, not on member
diff --git a/Test/runtests b/Test/runtests
index 8e31c06..23406dc 100755
--- a/Test/runtests
+++ b/Test/runtests
@@ -1,11 +1,16 @@
 #!/usr/bin/env bash
 
-TARGETDIR=localResults
+# Arguments:
+#  1- TargetDirectory, where to write test results and intermediary files
+#  2- Path to glslangValidator
+#  3- Path to spirv-remap
+
+TARGETDIR=${1:-localResults}
 BASEDIR=baseResults
-EXE=../build/install/bin/glslangValidator
-REMAPEXE=../build/install/bin/spirv-remap
+EXE=${2:-../build/install/bin/glslangValidator}
+REMAPEXE=${3:-../build/install/bin/spirv-remap}
 HASERROR=0
-mkdir -p localResults
+mkdir -p $TARGETDIR
 
 if [ -a localtestlist ]
   then
@@ -55,13 +60,13 @@
 # multi-threaded test
 #
 echo Comparing single thread to multithread for all tests in current directory...
-$EXE -i -C *.vert *.geom *.frag *.tesc *.tese *.comp > singleThread.out
-$EXE -i -C *.vert *.geom *.frag *.tesc *.tese *.comp -t > multiThread.out
-diff singleThread.out multiThread.out || HASERROR=1
+$EXE -i -C *.vert *.geom *.frag *.tesc *.tese *.comp > $TARGETDIR/singleThread.out
+$EXE -i -C *.vert *.geom *.frag *.tesc *.tese *.comp -t > $TARGETDIR/multiThread.out
+diff $TARGETDIR/singleThread.out $TARGETDIR/multiThread.out || HASERROR=1
 if [ $HASERROR -eq 0 ]
 then
-    rm singleThread.out
-    rm multiThread.out
+    rm $TARGETDIR/singleThread.out
+    rm $TARGETDIR/multiThread.out
 fi
 
 #
diff --git a/Test/spv.bufferhandleUvec2.frag b/Test/spv.bufferhandleUvec2.frag
new file mode 100644
index 0000000..bc86822
--- /dev/null
+++ b/Test/spv.bufferhandleUvec2.frag
@@ -0,0 +1,32 @@
+#version 450
+
+#extension GL_EXT_buffer_reference_uvec2 : enable
+
+layout(buffer_reference, std430) buffer blockType {
+    layout(offset = 0)  int a;
+    layout(offset = 4)  int b;
+    layout(offset = 8)  int c;
+    layout(offset = 12) int d;
+    layout(offset = 16) int e;
+};
+
+layout(std430) buffer t2 {
+    blockType f;
+    blockType g;
+} t;
+
+flat in uvec2 h, i;
+
+void main() {
+
+    blockType b1[2] = blockType[2](blockType(h), blockType(i));
+    b1[0].a = b1[1].b;
+    blockType b2 = blockType(h);
+    blockType b3 = blockType(i);
+    b2.a = b3.b;
+    uvec2 j = uvec2(b2);
+    uint carry;
+    j.x = uaddCarry(j.x, 256, carry);
+    j.y += carry;
+    b2 = blockType(j);
+}
diff --git a/Test/spv.subgroupExtendedTypesArithmetic.comp b/Test/spv.subgroupExtendedTypesArithmetic.comp
new file mode 100644
index 0000000..f58268f
--- /dev/null
+++ b/Test/spv.subgroupExtendedTypesArithmetic.comp
@@ -0,0 +1,715 @@
+#version 450
+
+#extension GL_KHR_shader_subgroup_arithmetic: enable
+#extension GL_EXT_shader_subgroup_extended_types_int8: enable
+#extension GL_EXT_shader_subgroup_extended_types_int16: enable
+#extension GL_EXT_shader_subgroup_extended_types_int64: enable
+#extension GL_EXT_shader_subgroup_extended_types_float16: enable
+
+layout (local_size_x = 8) in;
+
+layout(binding = 0) buffer Buffers
+{
+    i8vec4 i8;
+    u8vec4 u8;
+    i16vec4 i16;
+    u16vec4 u16;
+    i64vec4 i64;
+    u64vec4 u64;
+    f16vec4 f16;
+} data[4];
+
+void main()
+{
+    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
+
+    data[invocation].i8.x   = subgroupAdd(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupAdd(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupAdd(data[2].i8.xyz);
+    data[invocation].i8     = subgroupAdd(data[3].i8);
+
+    data[invocation].i8.x   = subgroupMul(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupMul(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupMul(data[2].i8.xyz);
+    data[invocation].i8     = subgroupMul(data[3].i8);
+
+    data[invocation].i8.x   = subgroupMin(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupMin(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupMin(data[2].i8.xyz);
+    data[invocation].i8     = subgroupMin(data[3].i8);
+
+    data[invocation].i8.x   = subgroupMax(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupMax(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupMax(data[2].i8.xyz);
+    data[invocation].i8     = subgroupMax(data[3].i8);
+
+    data[invocation].i8.x   = subgroupAnd(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupAnd(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupAnd(data[2].i8.xyz);
+    data[invocation].i8     = subgroupAnd(data[3].i8);
+
+    data[invocation].i8.x   = subgroupOr(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupOr(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupOr(data[2].i8.xyz);
+    data[invocation].i8     = subgroupOr(data[3].i8);
+
+    data[invocation].i8.x   = subgroupXor(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupXor(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupXor(data[2].i8.xyz);
+    data[invocation].i8     = subgroupXor(data[3].i8);
+
+    data[invocation].i8.x   = subgroupInclusiveAdd(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupInclusiveAdd(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupInclusiveAdd(data[2].i8.xyz);
+    data[invocation].i8     = subgroupInclusiveAdd(data[3].i8);
+
+    data[invocation].i8.x   = subgroupInclusiveMul(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupInclusiveMul(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupInclusiveMul(data[2].i8.xyz);
+    data[invocation].i8     = subgroupInclusiveMul(data[3].i8);
+
+    data[invocation].i8.x   = subgroupInclusiveMin(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupInclusiveMin(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupInclusiveMin(data[2].i8.xyz);
+    data[invocation].i8     = subgroupInclusiveMin(data[3].i8);
+
+    data[invocation].i8.x   = subgroupInclusiveMax(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupInclusiveMax(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupInclusiveMax(data[2].i8.xyz);
+    data[invocation].i8     = subgroupInclusiveMax(data[3].i8);
+
+    data[invocation].i8.x   = subgroupInclusiveAnd(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupInclusiveAnd(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupInclusiveAnd(data[2].i8.xyz);
+    data[invocation].i8     = subgroupInclusiveAnd(data[3].i8);
+
+    data[invocation].i8.x   = subgroupInclusiveOr(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupInclusiveOr(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupInclusiveOr(data[2].i8.xyz);
+    data[invocation].i8     = subgroupInclusiveOr(data[3].i8);
+
+    data[invocation].i8.x   = subgroupInclusiveXor(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupInclusiveXor(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupInclusiveXor(data[2].i8.xyz);
+    data[invocation].i8     = subgroupInclusiveXor(data[3].i8);
+
+    data[invocation].i8.x   = subgroupExclusiveAdd(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupExclusiveAdd(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupExclusiveAdd(data[2].i8.xyz);
+    data[invocation].i8     = subgroupExclusiveAdd(data[3].i8);
+
+    data[invocation].i8.x   = subgroupExclusiveMul(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupExclusiveMul(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupExclusiveMul(data[2].i8.xyz);
+    data[invocation].i8     = subgroupExclusiveMul(data[3].i8);
+
+    data[invocation].i8.x   = subgroupExclusiveMin(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupExclusiveMin(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupExclusiveMin(data[2].i8.xyz);
+    data[invocation].i8     = subgroupExclusiveMin(data[3].i8);
+
+    data[invocation].i8.x   = subgroupExclusiveMax(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupExclusiveMax(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupExclusiveMax(data[2].i8.xyz);
+    data[invocation].i8     = subgroupExclusiveMax(data[3].i8);
+
+    data[invocation].i8.x   = subgroupExclusiveAnd(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupExclusiveAnd(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupExclusiveAnd(data[2].i8.xyz);
+    data[invocation].i8     = subgroupExclusiveAnd(data[3].i8);
+
+    data[invocation].i8.x   = subgroupExclusiveOr(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupExclusiveOr(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupExclusiveOr(data[2].i8.xyz);
+    data[invocation].i8     = subgroupExclusiveOr(data[3].i8);
+
+    data[invocation].i8.x   = subgroupExclusiveXor(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupExclusiveXor(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupExclusiveXor(data[2].i8.xyz);
+    data[invocation].i8     = subgroupExclusiveXor(data[3].i8);
+
+    data[invocation].u8.x   = subgroupAdd(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupAdd(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupAdd(data[2].u8.xyz);
+    data[invocation].u8     = subgroupAdd(data[3].u8);
+
+    data[invocation].u8.x   = subgroupMul(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupMul(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupMul(data[2].u8.xyz);
+    data[invocation].u8     = subgroupMul(data[3].u8);
+
+    data[invocation].u8.x   = subgroupMin(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupMin(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupMin(data[2].u8.xyz);
+    data[invocation].u8     = subgroupMin(data[3].u8);
+
+    data[invocation].u8.x   = subgroupMax(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupMax(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupMax(data[2].u8.xyz);
+    data[invocation].u8     = subgroupMax(data[3].u8);
+
+    data[invocation].u8.x   = subgroupAnd(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupAnd(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupAnd(data[2].u8.xyz);
+    data[invocation].u8     = subgroupAnd(data[3].u8);
+
+    data[invocation].u8.x   = subgroupOr(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupOr(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupOr(data[2].u8.xyz);
+    data[invocation].u8     = subgroupOr(data[3].u8);
+
+    data[invocation].u8.x   = subgroupXor(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupXor(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupXor(data[2].u8.xyz);
+    data[invocation].u8     = subgroupXor(data[3].u8);
+
+    data[invocation].u8.x   = subgroupInclusiveAdd(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupInclusiveAdd(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupInclusiveAdd(data[2].u8.xyz);
+    data[invocation].u8     = subgroupInclusiveAdd(data[3].u8);
+
+    data[invocation].u8.x   = subgroupInclusiveMul(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupInclusiveMul(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupInclusiveMul(data[2].u8.xyz);
+    data[invocation].u8     = subgroupInclusiveMul(data[3].u8);
+
+    data[invocation].u8.x   = subgroupInclusiveMin(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupInclusiveMin(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupInclusiveMin(data[2].u8.xyz);
+    data[invocation].u8     = subgroupInclusiveMin(data[3].u8);
+
+    data[invocation].u8.x   = subgroupInclusiveMax(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupInclusiveMax(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupInclusiveMax(data[2].u8.xyz);
+    data[invocation].u8     = subgroupInclusiveMax(data[3].u8);
+
+    data[invocation].u8.x   = subgroupInclusiveAnd(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupInclusiveAnd(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupInclusiveAnd(data[2].u8.xyz);
+    data[invocation].u8     = subgroupInclusiveAnd(data[3].u8);
+
+    data[invocation].u8.x   = subgroupInclusiveOr(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupInclusiveOr(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupInclusiveOr(data[2].u8.xyz);
+    data[invocation].u8     = subgroupInclusiveOr(data[3].u8);
+
+    data[invocation].u8.x   = subgroupInclusiveXor(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupInclusiveXor(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupInclusiveXor(data[2].u8.xyz);
+    data[invocation].u8     = subgroupInclusiveXor(data[3].u8);
+
+    data[invocation].u8.x   = subgroupExclusiveAdd(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupExclusiveAdd(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupExclusiveAdd(data[2].u8.xyz);
+    data[invocation].u8     = subgroupExclusiveAdd(data[3].u8);
+
+    data[invocation].u8.x   = subgroupExclusiveMul(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupExclusiveMul(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupExclusiveMul(data[2].u8.xyz);
+    data[invocation].u8     = subgroupExclusiveMul(data[3].u8);
+
+    data[invocation].u8.x   = subgroupExclusiveMin(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupExclusiveMin(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupExclusiveMin(data[2].u8.xyz);
+    data[invocation].u8     = subgroupExclusiveMin(data[3].u8);
+
+    data[invocation].u8.x   = subgroupExclusiveMax(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupExclusiveMax(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupExclusiveMax(data[2].u8.xyz);
+    data[invocation].u8     = subgroupExclusiveMax(data[3].u8);
+
+    data[invocation].u8.x   = subgroupExclusiveAnd(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupExclusiveAnd(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupExclusiveAnd(data[2].u8.xyz);
+    data[invocation].u8     = subgroupExclusiveAnd(data[3].u8);
+
+    data[invocation].u8.x   = subgroupExclusiveOr(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupExclusiveOr(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupExclusiveOr(data[2].u8.xyz);
+    data[invocation].u8     = subgroupExclusiveOr(data[3].u8);
+
+    data[invocation].u8.x   = subgroupExclusiveXor(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupExclusiveXor(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupExclusiveXor(data[2].u8.xyz);
+    data[invocation].u8     = subgroupExclusiveXor(data[3].u8);
+
+    data[invocation].i16.x   = subgroupAdd(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupAdd(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupAdd(data[2].i16.xyz);
+    data[invocation].i16     = subgroupAdd(data[3].i16);
+
+    data[invocation].i16.x   = subgroupMul(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupMul(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupMul(data[2].i16.xyz);
+    data[invocation].i16     = subgroupMul(data[3].i16);
+
+    data[invocation].i16.x   = subgroupMin(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupMin(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupMin(data[2].i16.xyz);
+    data[invocation].i16     = subgroupMin(data[3].i16);
+
+    data[invocation].i16.x   = subgroupMax(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupMax(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupMax(data[2].i16.xyz);
+    data[invocation].i16     = subgroupMax(data[3].i16);
+
+    data[invocation].i16.x   = subgroupAnd(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupAnd(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupAnd(data[2].i16.xyz);
+    data[invocation].i16     = subgroupAnd(data[3].i16);
+
+    data[invocation].i16.x   = subgroupOr(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupOr(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupOr(data[2].i16.xyz);
+    data[invocation].i16     = subgroupOr(data[3].i16);
+
+    data[invocation].i16.x   = subgroupXor(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupXor(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupXor(data[2].i16.xyz);
+    data[invocation].i16     = subgroupXor(data[3].i16);
+
+    data[invocation].i16.x   = subgroupInclusiveAdd(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupInclusiveAdd(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupInclusiveAdd(data[2].i16.xyz);
+    data[invocation].i16     = subgroupInclusiveAdd(data[3].i16);
+
+    data[invocation].i16.x   = subgroupInclusiveMul(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupInclusiveMul(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupInclusiveMul(data[2].i16.xyz);
+    data[invocation].i16     = subgroupInclusiveMul(data[3].i16);
+
+    data[invocation].i16.x   = subgroupInclusiveMin(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupInclusiveMin(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupInclusiveMin(data[2].i16.xyz);
+    data[invocation].i16     = subgroupInclusiveMin(data[3].i16);
+
+    data[invocation].i16.x   = subgroupInclusiveMax(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupInclusiveMax(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupInclusiveMax(data[2].i16.xyz);
+    data[invocation].i16     = subgroupInclusiveMax(data[3].i16);
+
+    data[invocation].i16.x   = subgroupInclusiveAnd(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupInclusiveAnd(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupInclusiveAnd(data[2].i16.xyz);
+    data[invocation].i16     = subgroupInclusiveAnd(data[3].i16);
+
+    data[invocation].i16.x   = subgroupInclusiveOr(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupInclusiveOr(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupInclusiveOr(data[2].i16.xyz);
+    data[invocation].i16     = subgroupInclusiveOr(data[3].i16);
+
+    data[invocation].i16.x   = subgroupInclusiveXor(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupInclusiveXor(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupInclusiveXor(data[2].i16.xyz);
+    data[invocation].i16     = subgroupInclusiveXor(data[3].i16);
+
+    data[invocation].i16.x   = subgroupExclusiveAdd(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupExclusiveAdd(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupExclusiveAdd(data[2].i16.xyz);
+    data[invocation].i16     = subgroupExclusiveAdd(data[3].i16);
+
+    data[invocation].i16.x   = subgroupExclusiveMul(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupExclusiveMul(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupExclusiveMul(data[2].i16.xyz);
+    data[invocation].i16     = subgroupExclusiveMul(data[3].i16);
+
+    data[invocation].i16.x   = subgroupExclusiveMin(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupExclusiveMin(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupExclusiveMin(data[2].i16.xyz);
+    data[invocation].i16     = subgroupExclusiveMin(data[3].i16);
+
+    data[invocation].i16.x   = subgroupExclusiveMax(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupExclusiveMax(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupExclusiveMax(data[2].i16.xyz);
+    data[invocation].i16     = subgroupExclusiveMax(data[3].i16);
+
+    data[invocation].i16.x   = subgroupExclusiveAnd(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupExclusiveAnd(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupExclusiveAnd(data[2].i16.xyz);
+    data[invocation].i16     = subgroupExclusiveAnd(data[3].i16);
+
+    data[invocation].i16.x   = subgroupExclusiveOr(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupExclusiveOr(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupExclusiveOr(data[2].i16.xyz);
+    data[invocation].i16     = subgroupExclusiveOr(data[3].i16);
+
+    data[invocation].i16.x   = subgroupExclusiveXor(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupExclusiveXor(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupExclusiveXor(data[2].i16.xyz);
+    data[invocation].i16     = subgroupExclusiveXor(data[3].i16);
+
+    data[invocation].u16.x   = subgroupAdd(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupAdd(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupAdd(data[2].u16.xyz);
+    data[invocation].u16     = subgroupAdd(data[3].u16);
+
+    data[invocation].u16.x   = subgroupMul(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupMul(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupMul(data[2].u16.xyz);
+    data[invocation].u16     = subgroupMul(data[3].u16);
+
+    data[invocation].u16.x   = subgroupMin(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupMin(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupMin(data[2].u16.xyz);
+    data[invocation].u16     = subgroupMin(data[3].u16);
+
+    data[invocation].u16.x   = subgroupMax(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupMax(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupMax(data[2].u16.xyz);
+    data[invocation].u16     = subgroupMax(data[3].u16);
+
+    data[invocation].u16.x   = subgroupAnd(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupAnd(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupAnd(data[2].u16.xyz);
+    data[invocation].u16     = subgroupAnd(data[3].u16);
+
+    data[invocation].u16.x   = subgroupOr(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupOr(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupOr(data[2].u16.xyz);
+    data[invocation].u16     = subgroupOr(data[3].u16);
+
+    data[invocation].u16.x   = subgroupXor(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupXor(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupXor(data[2].u16.xyz);
+    data[invocation].u16     = subgroupXor(data[3].u16);
+
+    data[invocation].u16.x   = subgroupInclusiveAdd(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupInclusiveAdd(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupInclusiveAdd(data[2].u16.xyz);
+    data[invocation].u16     = subgroupInclusiveAdd(data[3].u16);
+
+    data[invocation].u16.x   = subgroupInclusiveMul(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupInclusiveMul(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupInclusiveMul(data[2].u16.xyz);
+    data[invocation].u16     = subgroupInclusiveMul(data[3].u16);
+
+    data[invocation].u16.x   = subgroupInclusiveMin(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupInclusiveMin(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupInclusiveMin(data[2].u16.xyz);
+    data[invocation].u16     = subgroupInclusiveMin(data[3].u16);
+
+    data[invocation].u16.x   = subgroupInclusiveMax(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupInclusiveMax(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupInclusiveMax(data[2].u16.xyz);
+    data[invocation].u16     = subgroupInclusiveMax(data[3].u16);
+
+    data[invocation].u16.x   = subgroupInclusiveAnd(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupInclusiveAnd(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupInclusiveAnd(data[2].u16.xyz);
+    data[invocation].u16     = subgroupInclusiveAnd(data[3].u16);
+
+    data[invocation].u16.x   = subgroupInclusiveOr(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupInclusiveOr(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupInclusiveOr(data[2].u16.xyz);
+    data[invocation].u16     = subgroupInclusiveOr(data[3].u16);
+
+    data[invocation].u16.x   = subgroupInclusiveXor(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupInclusiveXor(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupInclusiveXor(data[2].u16.xyz);
+    data[invocation].u16     = subgroupInclusiveXor(data[3].u16);
+
+    data[invocation].u16.x   = subgroupExclusiveAdd(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupExclusiveAdd(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupExclusiveAdd(data[2].u16.xyz);
+    data[invocation].u16     = subgroupExclusiveAdd(data[3].u16);
+
+    data[invocation].u16.x   = subgroupExclusiveMul(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupExclusiveMul(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupExclusiveMul(data[2].u16.xyz);
+    data[invocation].u16     = subgroupExclusiveMul(data[3].u16);
+
+    data[invocation].u16.x   = subgroupExclusiveMin(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupExclusiveMin(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupExclusiveMin(data[2].u16.xyz);
+    data[invocation].u16     = subgroupExclusiveMin(data[3].u16);
+
+    data[invocation].u16.x   = subgroupExclusiveMax(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupExclusiveMax(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupExclusiveMax(data[2].u16.xyz);
+    data[invocation].u16     = subgroupExclusiveMax(data[3].u16);
+
+    data[invocation].u16.x   = subgroupExclusiveAnd(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupExclusiveAnd(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupExclusiveAnd(data[2].u16.xyz);
+    data[invocation].u16     = subgroupExclusiveAnd(data[3].u16);
+
+    data[invocation].u16.x   = subgroupExclusiveOr(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupExclusiveOr(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupExclusiveOr(data[2].u16.xyz);
+    data[invocation].u16     = subgroupExclusiveOr(data[3].u16);
+
+    data[invocation].u16.x   = subgroupExclusiveXor(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupExclusiveXor(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupExclusiveXor(data[2].u16.xyz);
+    data[invocation].u16     = subgroupExclusiveXor(data[3].u16);
+
+    data[invocation].i64.x   = subgroupAdd(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupAdd(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupAdd(data[2].i64.xyz);
+    data[invocation].i64     = subgroupAdd(data[3].i64);
+
+    data[invocation].i64.x   = subgroupMul(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupMul(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupMul(data[2].i64.xyz);
+    data[invocation].i64     = subgroupMul(data[3].i64);
+
+    data[invocation].i64.x   = subgroupMin(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupMin(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupMin(data[2].i64.xyz);
+    data[invocation].i64     = subgroupMin(data[3].i64);
+
+    data[invocation].i64.x   = subgroupMax(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupMax(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupMax(data[2].i64.xyz);
+    data[invocation].i64     = subgroupMax(data[3].i64);
+
+    data[invocation].i64.x   = subgroupAnd(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupAnd(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupAnd(data[2].i64.xyz);
+    data[invocation].i64     = subgroupAnd(data[3].i64);
+
+    data[invocation].i64.x   = subgroupOr(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupOr(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupOr(data[2].i64.xyz);
+    data[invocation].i64     = subgroupOr(data[3].i64);
+
+    data[invocation].i64.x   = subgroupXor(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupXor(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupXor(data[2].i64.xyz);
+    data[invocation].i64     = subgroupXor(data[3].i64);
+
+    data[invocation].i64.x   = subgroupInclusiveAdd(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupInclusiveAdd(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupInclusiveAdd(data[2].i64.xyz);
+    data[invocation].i64     = subgroupInclusiveAdd(data[3].i64);
+
+    data[invocation].i64.x   = subgroupInclusiveMul(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupInclusiveMul(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupInclusiveMul(data[2].i64.xyz);
+    data[invocation].i64     = subgroupInclusiveMul(data[3].i64);
+
+    data[invocation].i64.x   = subgroupInclusiveMin(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupInclusiveMin(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupInclusiveMin(data[2].i64.xyz);
+    data[invocation].i64     = subgroupInclusiveMin(data[3].i64);
+
+    data[invocation].i64.x   = subgroupInclusiveMax(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupInclusiveMax(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupInclusiveMax(data[2].i64.xyz);
+    data[invocation].i64     = subgroupInclusiveMax(data[3].i64);
+
+    data[invocation].i64.x   = subgroupInclusiveAnd(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupInclusiveAnd(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupInclusiveAnd(data[2].i64.xyz);
+    data[invocation].i64     = subgroupInclusiveAnd(data[3].i64);
+
+    data[invocation].i64.x   = subgroupInclusiveOr(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupInclusiveOr(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupInclusiveOr(data[2].i64.xyz);
+    data[invocation].i64     = subgroupInclusiveOr(data[3].i64);
+
+    data[invocation].i64.x   = subgroupInclusiveXor(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupInclusiveXor(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupInclusiveXor(data[2].i64.xyz);
+    data[invocation].i64     = subgroupInclusiveXor(data[3].i64);
+
+    data[invocation].i64.x   = subgroupExclusiveAdd(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupExclusiveAdd(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupExclusiveAdd(data[2].i64.xyz);
+    data[invocation].i64     = subgroupExclusiveAdd(data[3].i64);
+
+    data[invocation].i64.x   = subgroupExclusiveMul(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupExclusiveMul(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupExclusiveMul(data[2].i64.xyz);
+    data[invocation].i64     = subgroupExclusiveMul(data[3].i64);
+
+    data[invocation].i64.x   = subgroupExclusiveMin(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupExclusiveMin(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupExclusiveMin(data[2].i64.xyz);
+    data[invocation].i64     = subgroupExclusiveMin(data[3].i64);
+
+    data[invocation].i64.x   = subgroupExclusiveMax(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupExclusiveMax(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupExclusiveMax(data[2].i64.xyz);
+    data[invocation].i64     = subgroupExclusiveMax(data[3].i64);
+
+    data[invocation].i64.x   = subgroupExclusiveAnd(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupExclusiveAnd(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupExclusiveAnd(data[2].i64.xyz);
+    data[invocation].i64     = subgroupExclusiveAnd(data[3].i64);
+
+    data[invocation].i64.x   = subgroupExclusiveOr(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupExclusiveOr(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupExclusiveOr(data[2].i64.xyz);
+    data[invocation].i64     = subgroupExclusiveOr(data[3].i64);
+
+    data[invocation].i64.x   = subgroupExclusiveXor(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupExclusiveXor(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupExclusiveXor(data[2].i64.xyz);
+    data[invocation].i64     = subgroupExclusiveXor(data[3].i64);
+
+    data[invocation].u64.x   = subgroupAdd(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupAdd(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupAdd(data[2].u64.xyz);
+    data[invocation].u64     = subgroupAdd(data[3].u64);
+
+    data[invocation].u64.x   = subgroupMul(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupMul(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupMul(data[2].u64.xyz);
+    data[invocation].u64     = subgroupMul(data[3].u64);
+
+    data[invocation].u64.x   = subgroupMin(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupMin(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupMin(data[2].u64.xyz);
+    data[invocation].u64     = subgroupMin(data[3].u64);
+
+    data[invocation].u64.x   = subgroupMax(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupMax(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupMax(data[2].u64.xyz);
+    data[invocation].u64     = subgroupMax(data[3].u64);
+
+    data[invocation].u64.x   = subgroupAnd(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupAnd(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupAnd(data[2].u64.xyz);
+    data[invocation].u64     = subgroupAnd(data[3].u64);
+
+    data[invocation].u64.x   = subgroupOr(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupOr(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupOr(data[2].u64.xyz);
+    data[invocation].u64     = subgroupOr(data[3].u64);
+
+    data[invocation].u64.x   = subgroupXor(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupXor(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupXor(data[2].u64.xyz);
+    data[invocation].u64     = subgroupXor(data[3].u64);
+
+    data[invocation].u64.x   = subgroupInclusiveAdd(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupInclusiveAdd(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupInclusiveAdd(data[2].u64.xyz);
+    data[invocation].u64     = subgroupInclusiveAdd(data[3].u64);
+
+    data[invocation].u64.x   = subgroupInclusiveMul(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupInclusiveMul(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupInclusiveMul(data[2].u64.xyz);
+    data[invocation].u64     = subgroupInclusiveMul(data[3].u64);
+
+    data[invocation].u64.x   = subgroupInclusiveMin(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupInclusiveMin(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupInclusiveMin(data[2].u64.xyz);
+    data[invocation].u64     = subgroupInclusiveMin(data[3].u64);
+
+    data[invocation].u64.x   = subgroupInclusiveMax(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupInclusiveMax(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupInclusiveMax(data[2].u64.xyz);
+    data[invocation].u64     = subgroupInclusiveMax(data[3].u64);
+
+    data[invocation].u64.x   = subgroupInclusiveAnd(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupInclusiveAnd(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupInclusiveAnd(data[2].u64.xyz);
+    data[invocation].u64     = subgroupInclusiveAnd(data[3].u64);
+
+    data[invocation].u64.x   = subgroupInclusiveOr(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupInclusiveOr(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupInclusiveOr(data[2].u64.xyz);
+    data[invocation].u64     = subgroupInclusiveOr(data[3].u64);
+
+    data[invocation].u64.x   = subgroupInclusiveXor(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupInclusiveXor(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupInclusiveXor(data[2].u64.xyz);
+    data[invocation].u64     = subgroupInclusiveXor(data[3].u64);
+
+    data[invocation].u64.x   = subgroupExclusiveAdd(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupExclusiveAdd(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupExclusiveAdd(data[2].u64.xyz);
+    data[invocation].u64     = subgroupExclusiveAdd(data[3].u64);
+
+    data[invocation].u64.x   = subgroupExclusiveMul(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupExclusiveMul(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupExclusiveMul(data[2].u64.xyz);
+    data[invocation].u64     = subgroupExclusiveMul(data[3].u64);
+
+    data[invocation].u64.x   = subgroupExclusiveMin(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupExclusiveMin(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupExclusiveMin(data[2].u64.xyz);
+    data[invocation].u64     = subgroupExclusiveMin(data[3].u64);
+
+    data[invocation].u64.x   = subgroupExclusiveMax(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupExclusiveMax(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupExclusiveMax(data[2].u64.xyz);
+    data[invocation].u64     = subgroupExclusiveMax(data[3].u64);
+
+    data[invocation].u64.x   = subgroupExclusiveAnd(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupExclusiveAnd(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupExclusiveAnd(data[2].u64.xyz);
+    data[invocation].u64     = subgroupExclusiveAnd(data[3].u64);
+
+    data[invocation].u64.x   = subgroupExclusiveOr(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupExclusiveOr(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupExclusiveOr(data[2].u64.xyz);
+    data[invocation].u64     = subgroupExclusiveOr(data[3].u64);
+
+    data[invocation].u64.x   = subgroupExclusiveXor(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupExclusiveXor(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupExclusiveXor(data[2].u64.xyz);
+    data[invocation].u64     = subgroupExclusiveXor(data[3].u64);
+
+    data[invocation].f16.x   = subgroupAdd(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupAdd(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupAdd(data[2].f16.xyz);
+    data[invocation].f16     = subgroupAdd(data[3].f16);
+
+    data[invocation].f16.x   = subgroupMul(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupMul(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupMul(data[2].f16.xyz);
+    data[invocation].f16     = subgroupMul(data[3].f16);
+
+    data[invocation].f16.x   = subgroupMin(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupMin(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupMin(data[2].f16.xyz);
+    data[invocation].f16     = subgroupMin(data[3].f16);
+
+    data[invocation].f16.x   = subgroupMax(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupMax(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupMax(data[2].f16.xyz);
+    data[invocation].f16     = subgroupMax(data[3].f16);
+
+    data[invocation].f16.x   = subgroupInclusiveAdd(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupInclusiveAdd(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupInclusiveAdd(data[2].f16.xyz);
+    data[invocation].f16     = subgroupInclusiveAdd(data[3].f16);
+
+    data[invocation].f16.x   = subgroupInclusiveMul(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupInclusiveMul(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupInclusiveMul(data[2].f16.xyz);
+    data[invocation].f16     = subgroupInclusiveMul(data[3].f16);
+
+    data[invocation].f16.x   = subgroupInclusiveMin(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupInclusiveMin(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupInclusiveMin(data[2].f16.xyz);
+    data[invocation].f16     = subgroupInclusiveMin(data[3].f16);
+
+    data[invocation].f16.x   = subgroupInclusiveMax(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupInclusiveMax(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupInclusiveMax(data[2].f16.xyz);
+    data[invocation].f16     = subgroupInclusiveMax(data[3].f16);
+
+    data[invocation].f16.x   = subgroupExclusiveAdd(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupExclusiveAdd(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupExclusiveAdd(data[2].f16.xyz);
+    data[invocation].f16     = subgroupExclusiveAdd(data[3].f16);
+
+    data[invocation].f16.x   = subgroupExclusiveMul(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupExclusiveMul(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupExclusiveMul(data[2].f16.xyz);
+    data[invocation].f16     = subgroupExclusiveMul(data[3].f16);
+
+    data[invocation].f16.x   = subgroupExclusiveMin(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupExclusiveMin(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupExclusiveMin(data[2].f16.xyz);
+    data[invocation].f16     = subgroupExclusiveMin(data[3].f16);
+
+    data[invocation].f16.x   = subgroupExclusiveMax(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupExclusiveMax(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupExclusiveMax(data[2].f16.xyz);
+    data[invocation].f16     = subgroupExclusiveMax(data[3].f16);
+}
diff --git a/Test/spv.subgroupExtendedTypesArithmeticNeg.comp b/Test/spv.subgroupExtendedTypesArithmeticNeg.comp
new file mode 100644
index 0000000..eb22cab
--- /dev/null
+++ b/Test/spv.subgroupExtendedTypesArithmeticNeg.comp
@@ -0,0 +1,715 @@
+#version 450
+
+#extension GL_KHR_shader_subgroup_arithmetic: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int8: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int16: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int64: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_float16: enable
+
+layout (local_size_x = 8) in;
+
+layout(binding = 0) buffer Buffers
+{
+    i8vec4 i8;
+    u8vec4 u8;
+    i16vec4 i16;
+    u16vec4 u16;
+    i64vec4 i64;
+    u64vec4 u64;
+    f16vec4 f16;
+} data[4];
+
+void main()
+{
+    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
+
+    data[invocation].i8.x   = subgroupAdd(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupAdd(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupAdd(data[2].i8.xyz);
+    data[invocation].i8     = subgroupAdd(data[3].i8);
+
+    data[invocation].i8.x   = subgroupMul(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupMul(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupMul(data[2].i8.xyz);
+    data[invocation].i8     = subgroupMul(data[3].i8);
+
+    data[invocation].i8.x   = subgroupMin(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupMin(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupMin(data[2].i8.xyz);
+    data[invocation].i8     = subgroupMin(data[3].i8);
+
+    data[invocation].i8.x   = subgroupMax(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupMax(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupMax(data[2].i8.xyz);
+    data[invocation].i8     = subgroupMax(data[3].i8);
+
+    data[invocation].i8.x   = subgroupAnd(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupAnd(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupAnd(data[2].i8.xyz);
+    data[invocation].i8     = subgroupAnd(data[3].i8);
+
+    data[invocation].i8.x   = subgroupOr(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupOr(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupOr(data[2].i8.xyz);
+    data[invocation].i8     = subgroupOr(data[3].i8);
+
+    data[invocation].i8.x   = subgroupXor(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupXor(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupXor(data[2].i8.xyz);
+    data[invocation].i8     = subgroupXor(data[3].i8);
+
+    data[invocation].i8.x   = subgroupInclusiveAdd(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupInclusiveAdd(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupInclusiveAdd(data[2].i8.xyz);
+    data[invocation].i8     = subgroupInclusiveAdd(data[3].i8);
+
+    data[invocation].i8.x   = subgroupInclusiveMul(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupInclusiveMul(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupInclusiveMul(data[2].i8.xyz);
+    data[invocation].i8     = subgroupInclusiveMul(data[3].i8);
+
+    data[invocation].i8.x   = subgroupInclusiveMin(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupInclusiveMin(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupInclusiveMin(data[2].i8.xyz);
+    data[invocation].i8     = subgroupInclusiveMin(data[3].i8);
+
+    data[invocation].i8.x   = subgroupInclusiveMax(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupInclusiveMax(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupInclusiveMax(data[2].i8.xyz);
+    data[invocation].i8     = subgroupInclusiveMax(data[3].i8);
+
+    data[invocation].i8.x   = subgroupInclusiveAnd(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupInclusiveAnd(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupInclusiveAnd(data[2].i8.xyz);
+    data[invocation].i8     = subgroupInclusiveAnd(data[3].i8);
+
+    data[invocation].i8.x   = subgroupInclusiveOr(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupInclusiveOr(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupInclusiveOr(data[2].i8.xyz);
+    data[invocation].i8     = subgroupInclusiveOr(data[3].i8);
+
+    data[invocation].i8.x   = subgroupInclusiveXor(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupInclusiveXor(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupInclusiveXor(data[2].i8.xyz);
+    data[invocation].i8     = subgroupInclusiveXor(data[3].i8);
+
+    data[invocation].i8.x   = subgroupExclusiveAdd(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupExclusiveAdd(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupExclusiveAdd(data[2].i8.xyz);
+    data[invocation].i8     = subgroupExclusiveAdd(data[3].i8);
+
+    data[invocation].i8.x   = subgroupExclusiveMul(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupExclusiveMul(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupExclusiveMul(data[2].i8.xyz);
+    data[invocation].i8     = subgroupExclusiveMul(data[3].i8);
+
+    data[invocation].i8.x   = subgroupExclusiveMin(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupExclusiveMin(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupExclusiveMin(data[2].i8.xyz);
+    data[invocation].i8     = subgroupExclusiveMin(data[3].i8);
+
+    data[invocation].i8.x   = subgroupExclusiveMax(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupExclusiveMax(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupExclusiveMax(data[2].i8.xyz);
+    data[invocation].i8     = subgroupExclusiveMax(data[3].i8);
+
+    data[invocation].i8.x   = subgroupExclusiveAnd(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupExclusiveAnd(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupExclusiveAnd(data[2].i8.xyz);
+    data[invocation].i8     = subgroupExclusiveAnd(data[3].i8);
+
+    data[invocation].i8.x   = subgroupExclusiveOr(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupExclusiveOr(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupExclusiveOr(data[2].i8.xyz);
+    data[invocation].i8     = subgroupExclusiveOr(data[3].i8);
+
+    data[invocation].i8.x   = subgroupExclusiveXor(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupExclusiveXor(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupExclusiveXor(data[2].i8.xyz);
+    data[invocation].i8     = subgroupExclusiveXor(data[3].i8);
+
+    data[invocation].u8.x   = subgroupAdd(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupAdd(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupAdd(data[2].u8.xyz);
+    data[invocation].u8     = subgroupAdd(data[3].u8);
+
+    data[invocation].u8.x   = subgroupMul(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupMul(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupMul(data[2].u8.xyz);
+    data[invocation].u8     = subgroupMul(data[3].u8);
+
+    data[invocation].u8.x   = subgroupMin(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupMin(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupMin(data[2].u8.xyz);
+    data[invocation].u8     = subgroupMin(data[3].u8);
+
+    data[invocation].u8.x   = subgroupMax(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupMax(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupMax(data[2].u8.xyz);
+    data[invocation].u8     = subgroupMax(data[3].u8);
+
+    data[invocation].u8.x   = subgroupAnd(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupAnd(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupAnd(data[2].u8.xyz);
+    data[invocation].u8     = subgroupAnd(data[3].u8);
+
+    data[invocation].u8.x   = subgroupOr(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupOr(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupOr(data[2].u8.xyz);
+    data[invocation].u8     = subgroupOr(data[3].u8);
+
+    data[invocation].u8.x   = subgroupXor(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupXor(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupXor(data[2].u8.xyz);
+    data[invocation].u8     = subgroupXor(data[3].u8);
+
+    data[invocation].u8.x   = subgroupInclusiveAdd(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupInclusiveAdd(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupInclusiveAdd(data[2].u8.xyz);
+    data[invocation].u8     = subgroupInclusiveAdd(data[3].u8);
+
+    data[invocation].u8.x   = subgroupInclusiveMul(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupInclusiveMul(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupInclusiveMul(data[2].u8.xyz);
+    data[invocation].u8     = subgroupInclusiveMul(data[3].u8);
+
+    data[invocation].u8.x   = subgroupInclusiveMin(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupInclusiveMin(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupInclusiveMin(data[2].u8.xyz);
+    data[invocation].u8     = subgroupInclusiveMin(data[3].u8);
+
+    data[invocation].u8.x   = subgroupInclusiveMax(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupInclusiveMax(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupInclusiveMax(data[2].u8.xyz);
+    data[invocation].u8     = subgroupInclusiveMax(data[3].u8);
+
+    data[invocation].u8.x   = subgroupInclusiveAnd(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupInclusiveAnd(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupInclusiveAnd(data[2].u8.xyz);
+    data[invocation].u8     = subgroupInclusiveAnd(data[3].u8);
+
+    data[invocation].u8.x   = subgroupInclusiveOr(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupInclusiveOr(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupInclusiveOr(data[2].u8.xyz);
+    data[invocation].u8     = subgroupInclusiveOr(data[3].u8);
+
+    data[invocation].u8.x   = subgroupInclusiveXor(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupInclusiveXor(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupInclusiveXor(data[2].u8.xyz);
+    data[invocation].u8     = subgroupInclusiveXor(data[3].u8);
+
+    data[invocation].u8.x   = subgroupExclusiveAdd(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupExclusiveAdd(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupExclusiveAdd(data[2].u8.xyz);
+    data[invocation].u8     = subgroupExclusiveAdd(data[3].u8);
+
+    data[invocation].u8.x   = subgroupExclusiveMul(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupExclusiveMul(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupExclusiveMul(data[2].u8.xyz);
+    data[invocation].u8     = subgroupExclusiveMul(data[3].u8);
+
+    data[invocation].u8.x   = subgroupExclusiveMin(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupExclusiveMin(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupExclusiveMin(data[2].u8.xyz);
+    data[invocation].u8     = subgroupExclusiveMin(data[3].u8);
+
+    data[invocation].u8.x   = subgroupExclusiveMax(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupExclusiveMax(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupExclusiveMax(data[2].u8.xyz);
+    data[invocation].u8     = subgroupExclusiveMax(data[3].u8);
+
+    data[invocation].u8.x   = subgroupExclusiveAnd(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupExclusiveAnd(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupExclusiveAnd(data[2].u8.xyz);
+    data[invocation].u8     = subgroupExclusiveAnd(data[3].u8);
+
+    data[invocation].u8.x   = subgroupExclusiveOr(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupExclusiveOr(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupExclusiveOr(data[2].u8.xyz);
+    data[invocation].u8     = subgroupExclusiveOr(data[3].u8);
+
+    data[invocation].u8.x   = subgroupExclusiveXor(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupExclusiveXor(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupExclusiveXor(data[2].u8.xyz);
+    data[invocation].u8     = subgroupExclusiveXor(data[3].u8);
+
+    data[invocation].i16.x   = subgroupAdd(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupAdd(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupAdd(data[2].i16.xyz);
+    data[invocation].i16     = subgroupAdd(data[3].i16);
+
+    data[invocation].i16.x   = subgroupMul(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupMul(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupMul(data[2].i16.xyz);
+    data[invocation].i16     = subgroupMul(data[3].i16);
+
+    data[invocation].i16.x   = subgroupMin(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupMin(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupMin(data[2].i16.xyz);
+    data[invocation].i16     = subgroupMin(data[3].i16);
+
+    data[invocation].i16.x   = subgroupMax(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupMax(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupMax(data[2].i16.xyz);
+    data[invocation].i16     = subgroupMax(data[3].i16);
+
+    data[invocation].i16.x   = subgroupAnd(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupAnd(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupAnd(data[2].i16.xyz);
+    data[invocation].i16     = subgroupAnd(data[3].i16);
+
+    data[invocation].i16.x   = subgroupOr(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupOr(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupOr(data[2].i16.xyz);
+    data[invocation].i16     = subgroupOr(data[3].i16);
+
+    data[invocation].i16.x   = subgroupXor(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupXor(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupXor(data[2].i16.xyz);
+    data[invocation].i16     = subgroupXor(data[3].i16);
+
+    data[invocation].i16.x   = subgroupInclusiveAdd(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupInclusiveAdd(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupInclusiveAdd(data[2].i16.xyz);
+    data[invocation].i16     = subgroupInclusiveAdd(data[3].i16);
+
+    data[invocation].i16.x   = subgroupInclusiveMul(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupInclusiveMul(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupInclusiveMul(data[2].i16.xyz);
+    data[invocation].i16     = subgroupInclusiveMul(data[3].i16);
+
+    data[invocation].i16.x   = subgroupInclusiveMin(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupInclusiveMin(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupInclusiveMin(data[2].i16.xyz);
+    data[invocation].i16     = subgroupInclusiveMin(data[3].i16);
+
+    data[invocation].i16.x   = subgroupInclusiveMax(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupInclusiveMax(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupInclusiveMax(data[2].i16.xyz);
+    data[invocation].i16     = subgroupInclusiveMax(data[3].i16);
+
+    data[invocation].i16.x   = subgroupInclusiveAnd(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupInclusiveAnd(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupInclusiveAnd(data[2].i16.xyz);
+    data[invocation].i16     = subgroupInclusiveAnd(data[3].i16);
+
+    data[invocation].i16.x   = subgroupInclusiveOr(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupInclusiveOr(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupInclusiveOr(data[2].i16.xyz);
+    data[invocation].i16     = subgroupInclusiveOr(data[3].i16);
+
+    data[invocation].i16.x   = subgroupInclusiveXor(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupInclusiveXor(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupInclusiveXor(data[2].i16.xyz);
+    data[invocation].i16     = subgroupInclusiveXor(data[3].i16);
+
+    data[invocation].i16.x   = subgroupExclusiveAdd(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupExclusiveAdd(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupExclusiveAdd(data[2].i16.xyz);
+    data[invocation].i16     = subgroupExclusiveAdd(data[3].i16);
+
+    data[invocation].i16.x   = subgroupExclusiveMul(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupExclusiveMul(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupExclusiveMul(data[2].i16.xyz);
+    data[invocation].i16     = subgroupExclusiveMul(data[3].i16);
+
+    data[invocation].i16.x   = subgroupExclusiveMin(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupExclusiveMin(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupExclusiveMin(data[2].i16.xyz);
+    data[invocation].i16     = subgroupExclusiveMin(data[3].i16);
+
+    data[invocation].i16.x   = subgroupExclusiveMax(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupExclusiveMax(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupExclusiveMax(data[2].i16.xyz);
+    data[invocation].i16     = subgroupExclusiveMax(data[3].i16);
+
+    data[invocation].i16.x   = subgroupExclusiveAnd(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupExclusiveAnd(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupExclusiveAnd(data[2].i16.xyz);
+    data[invocation].i16     = subgroupExclusiveAnd(data[3].i16);
+
+    data[invocation].i16.x   = subgroupExclusiveOr(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupExclusiveOr(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupExclusiveOr(data[2].i16.xyz);
+    data[invocation].i16     = subgroupExclusiveOr(data[3].i16);
+
+    data[invocation].i16.x   = subgroupExclusiveXor(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupExclusiveXor(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupExclusiveXor(data[2].i16.xyz);
+    data[invocation].i16     = subgroupExclusiveXor(data[3].i16);
+
+    data[invocation].u16.x   = subgroupAdd(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupAdd(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupAdd(data[2].u16.xyz);
+    data[invocation].u16     = subgroupAdd(data[3].u16);
+
+    data[invocation].u16.x   = subgroupMul(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupMul(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupMul(data[2].u16.xyz);
+    data[invocation].u16     = subgroupMul(data[3].u16);
+
+    data[invocation].u16.x   = subgroupMin(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupMin(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupMin(data[2].u16.xyz);
+    data[invocation].u16     = subgroupMin(data[3].u16);
+
+    data[invocation].u16.x   = subgroupMax(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupMax(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupMax(data[2].u16.xyz);
+    data[invocation].u16     = subgroupMax(data[3].u16);
+
+    data[invocation].u16.x   = subgroupAnd(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupAnd(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupAnd(data[2].u16.xyz);
+    data[invocation].u16     = subgroupAnd(data[3].u16);
+
+    data[invocation].u16.x   = subgroupOr(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupOr(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupOr(data[2].u16.xyz);
+    data[invocation].u16     = subgroupOr(data[3].u16);
+
+    data[invocation].u16.x   = subgroupXor(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupXor(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupXor(data[2].u16.xyz);
+    data[invocation].u16     = subgroupXor(data[3].u16);
+
+    data[invocation].u16.x   = subgroupInclusiveAdd(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupInclusiveAdd(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupInclusiveAdd(data[2].u16.xyz);
+    data[invocation].u16     = subgroupInclusiveAdd(data[3].u16);
+
+    data[invocation].u16.x   = subgroupInclusiveMul(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupInclusiveMul(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupInclusiveMul(data[2].u16.xyz);
+    data[invocation].u16     = subgroupInclusiveMul(data[3].u16);
+
+    data[invocation].u16.x   = subgroupInclusiveMin(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupInclusiveMin(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupInclusiveMin(data[2].u16.xyz);
+    data[invocation].u16     = subgroupInclusiveMin(data[3].u16);
+
+    data[invocation].u16.x   = subgroupInclusiveMax(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupInclusiveMax(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupInclusiveMax(data[2].u16.xyz);
+    data[invocation].u16     = subgroupInclusiveMax(data[3].u16);
+
+    data[invocation].u16.x   = subgroupInclusiveAnd(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupInclusiveAnd(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupInclusiveAnd(data[2].u16.xyz);
+    data[invocation].u16     = subgroupInclusiveAnd(data[3].u16);
+
+    data[invocation].u16.x   = subgroupInclusiveOr(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupInclusiveOr(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupInclusiveOr(data[2].u16.xyz);
+    data[invocation].u16     = subgroupInclusiveOr(data[3].u16);
+
+    data[invocation].u16.x   = subgroupInclusiveXor(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupInclusiveXor(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupInclusiveXor(data[2].u16.xyz);
+    data[invocation].u16     = subgroupInclusiveXor(data[3].u16);
+
+    data[invocation].u16.x   = subgroupExclusiveAdd(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupExclusiveAdd(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupExclusiveAdd(data[2].u16.xyz);
+    data[invocation].u16     = subgroupExclusiveAdd(data[3].u16);
+
+    data[invocation].u16.x   = subgroupExclusiveMul(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupExclusiveMul(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupExclusiveMul(data[2].u16.xyz);
+    data[invocation].u16     = subgroupExclusiveMul(data[3].u16);
+
+    data[invocation].u16.x   = subgroupExclusiveMin(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupExclusiveMin(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupExclusiveMin(data[2].u16.xyz);
+    data[invocation].u16     = subgroupExclusiveMin(data[3].u16);
+
+    data[invocation].u16.x   = subgroupExclusiveMax(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupExclusiveMax(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupExclusiveMax(data[2].u16.xyz);
+    data[invocation].u16     = subgroupExclusiveMax(data[3].u16);
+
+    data[invocation].u16.x   = subgroupExclusiveAnd(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupExclusiveAnd(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupExclusiveAnd(data[2].u16.xyz);
+    data[invocation].u16     = subgroupExclusiveAnd(data[3].u16);
+
+    data[invocation].u16.x   = subgroupExclusiveOr(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupExclusiveOr(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupExclusiveOr(data[2].u16.xyz);
+    data[invocation].u16     = subgroupExclusiveOr(data[3].u16);
+
+    data[invocation].u16.x   = subgroupExclusiveXor(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupExclusiveXor(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupExclusiveXor(data[2].u16.xyz);
+    data[invocation].u16     = subgroupExclusiveXor(data[3].u16);
+
+    data[invocation].i64.x   = subgroupAdd(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupAdd(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupAdd(data[2].i64.xyz);
+    data[invocation].i64     = subgroupAdd(data[3].i64);
+
+    data[invocation].i64.x   = subgroupMul(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupMul(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupMul(data[2].i64.xyz);
+    data[invocation].i64     = subgroupMul(data[3].i64);
+
+    data[invocation].i64.x   = subgroupMin(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupMin(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupMin(data[2].i64.xyz);
+    data[invocation].i64     = subgroupMin(data[3].i64);
+
+    data[invocation].i64.x   = subgroupMax(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupMax(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupMax(data[2].i64.xyz);
+    data[invocation].i64     = subgroupMax(data[3].i64);
+
+    data[invocation].i64.x   = subgroupAnd(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupAnd(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupAnd(data[2].i64.xyz);
+    data[invocation].i64     = subgroupAnd(data[3].i64);
+
+    data[invocation].i64.x   = subgroupOr(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupOr(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupOr(data[2].i64.xyz);
+    data[invocation].i64     = subgroupOr(data[3].i64);
+
+    data[invocation].i64.x   = subgroupXor(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupXor(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupXor(data[2].i64.xyz);
+    data[invocation].i64     = subgroupXor(data[3].i64);
+
+    data[invocation].i64.x   = subgroupInclusiveAdd(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupInclusiveAdd(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupInclusiveAdd(data[2].i64.xyz);
+    data[invocation].i64     = subgroupInclusiveAdd(data[3].i64);
+
+    data[invocation].i64.x   = subgroupInclusiveMul(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupInclusiveMul(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupInclusiveMul(data[2].i64.xyz);
+    data[invocation].i64     = subgroupInclusiveMul(data[3].i64);
+
+    data[invocation].i64.x   = subgroupInclusiveMin(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupInclusiveMin(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupInclusiveMin(data[2].i64.xyz);
+    data[invocation].i64     = subgroupInclusiveMin(data[3].i64);
+
+    data[invocation].i64.x   = subgroupInclusiveMax(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupInclusiveMax(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupInclusiveMax(data[2].i64.xyz);
+    data[invocation].i64     = subgroupInclusiveMax(data[3].i64);
+
+    data[invocation].i64.x   = subgroupInclusiveAnd(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupInclusiveAnd(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupInclusiveAnd(data[2].i64.xyz);
+    data[invocation].i64     = subgroupInclusiveAnd(data[3].i64);
+
+    data[invocation].i64.x   = subgroupInclusiveOr(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupInclusiveOr(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupInclusiveOr(data[2].i64.xyz);
+    data[invocation].i64     = subgroupInclusiveOr(data[3].i64);
+
+    data[invocation].i64.x   = subgroupInclusiveXor(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupInclusiveXor(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupInclusiveXor(data[2].i64.xyz);
+    data[invocation].i64     = subgroupInclusiveXor(data[3].i64);
+
+    data[invocation].i64.x   = subgroupExclusiveAdd(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupExclusiveAdd(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupExclusiveAdd(data[2].i64.xyz);
+    data[invocation].i64     = subgroupExclusiveAdd(data[3].i64);
+
+    data[invocation].i64.x   = subgroupExclusiveMul(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupExclusiveMul(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupExclusiveMul(data[2].i64.xyz);
+    data[invocation].i64     = subgroupExclusiveMul(data[3].i64);
+
+    data[invocation].i64.x   = subgroupExclusiveMin(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupExclusiveMin(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupExclusiveMin(data[2].i64.xyz);
+    data[invocation].i64     = subgroupExclusiveMin(data[3].i64);
+
+    data[invocation].i64.x   = subgroupExclusiveMax(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupExclusiveMax(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupExclusiveMax(data[2].i64.xyz);
+    data[invocation].i64     = subgroupExclusiveMax(data[3].i64);
+
+    data[invocation].i64.x   = subgroupExclusiveAnd(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupExclusiveAnd(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupExclusiveAnd(data[2].i64.xyz);
+    data[invocation].i64     = subgroupExclusiveAnd(data[3].i64);
+
+    data[invocation].i64.x   = subgroupExclusiveOr(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupExclusiveOr(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupExclusiveOr(data[2].i64.xyz);
+    data[invocation].i64     = subgroupExclusiveOr(data[3].i64);
+
+    data[invocation].i64.x   = subgroupExclusiveXor(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupExclusiveXor(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupExclusiveXor(data[2].i64.xyz);
+    data[invocation].i64     = subgroupExclusiveXor(data[3].i64);
+
+    data[invocation].u64.x   = subgroupAdd(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupAdd(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupAdd(data[2].u64.xyz);
+    data[invocation].u64     = subgroupAdd(data[3].u64);
+
+    data[invocation].u64.x   = subgroupMul(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupMul(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupMul(data[2].u64.xyz);
+    data[invocation].u64     = subgroupMul(data[3].u64);
+
+    data[invocation].u64.x   = subgroupMin(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupMin(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupMin(data[2].u64.xyz);
+    data[invocation].u64     = subgroupMin(data[3].u64);
+
+    data[invocation].u64.x   = subgroupMax(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupMax(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupMax(data[2].u64.xyz);
+    data[invocation].u64     = subgroupMax(data[3].u64);
+
+    data[invocation].u64.x   = subgroupAnd(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupAnd(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupAnd(data[2].u64.xyz);
+    data[invocation].u64     = subgroupAnd(data[3].u64);
+
+    data[invocation].u64.x   = subgroupOr(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupOr(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupOr(data[2].u64.xyz);
+    data[invocation].u64     = subgroupOr(data[3].u64);
+
+    data[invocation].u64.x   = subgroupXor(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupXor(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupXor(data[2].u64.xyz);
+    data[invocation].u64     = subgroupXor(data[3].u64);
+
+    data[invocation].u64.x   = subgroupInclusiveAdd(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupInclusiveAdd(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupInclusiveAdd(data[2].u64.xyz);
+    data[invocation].u64     = subgroupInclusiveAdd(data[3].u64);
+
+    data[invocation].u64.x   = subgroupInclusiveMul(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupInclusiveMul(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupInclusiveMul(data[2].u64.xyz);
+    data[invocation].u64     = subgroupInclusiveMul(data[3].u64);
+
+    data[invocation].u64.x   = subgroupInclusiveMin(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupInclusiveMin(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupInclusiveMin(data[2].u64.xyz);
+    data[invocation].u64     = subgroupInclusiveMin(data[3].u64);
+
+    data[invocation].u64.x   = subgroupInclusiveMax(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupInclusiveMax(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupInclusiveMax(data[2].u64.xyz);
+    data[invocation].u64     = subgroupInclusiveMax(data[3].u64);
+
+    data[invocation].u64.x   = subgroupInclusiveAnd(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupInclusiveAnd(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupInclusiveAnd(data[2].u64.xyz);
+    data[invocation].u64     = subgroupInclusiveAnd(data[3].u64);
+
+    data[invocation].u64.x   = subgroupInclusiveOr(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupInclusiveOr(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupInclusiveOr(data[2].u64.xyz);
+    data[invocation].u64     = subgroupInclusiveOr(data[3].u64);
+
+    data[invocation].u64.x   = subgroupInclusiveXor(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupInclusiveXor(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupInclusiveXor(data[2].u64.xyz);
+    data[invocation].u64     = subgroupInclusiveXor(data[3].u64);
+
+    data[invocation].u64.x   = subgroupExclusiveAdd(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupExclusiveAdd(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupExclusiveAdd(data[2].u64.xyz);
+    data[invocation].u64     = subgroupExclusiveAdd(data[3].u64);
+
+    data[invocation].u64.x   = subgroupExclusiveMul(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupExclusiveMul(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupExclusiveMul(data[2].u64.xyz);
+    data[invocation].u64     = subgroupExclusiveMul(data[3].u64);
+
+    data[invocation].u64.x   = subgroupExclusiveMin(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupExclusiveMin(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupExclusiveMin(data[2].u64.xyz);
+    data[invocation].u64     = subgroupExclusiveMin(data[3].u64);
+
+    data[invocation].u64.x   = subgroupExclusiveMax(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupExclusiveMax(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupExclusiveMax(data[2].u64.xyz);
+    data[invocation].u64     = subgroupExclusiveMax(data[3].u64);
+
+    data[invocation].u64.x   = subgroupExclusiveAnd(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupExclusiveAnd(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupExclusiveAnd(data[2].u64.xyz);
+    data[invocation].u64     = subgroupExclusiveAnd(data[3].u64);
+
+    data[invocation].u64.x   = subgroupExclusiveOr(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupExclusiveOr(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupExclusiveOr(data[2].u64.xyz);
+    data[invocation].u64     = subgroupExclusiveOr(data[3].u64);
+
+    data[invocation].u64.x   = subgroupExclusiveXor(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupExclusiveXor(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupExclusiveXor(data[2].u64.xyz);
+    data[invocation].u64     = subgroupExclusiveXor(data[3].u64);
+
+    data[invocation].f16.x   = subgroupAdd(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupAdd(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupAdd(data[2].f16.xyz);
+    data[invocation].f16     = subgroupAdd(data[3].f16);
+
+    data[invocation].f16.x   = subgroupMul(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupMul(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupMul(data[2].f16.xyz);
+    data[invocation].f16     = subgroupMul(data[3].f16);
+
+    data[invocation].f16.x   = subgroupMin(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupMin(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupMin(data[2].f16.xyz);
+    data[invocation].f16     = subgroupMin(data[3].f16);
+
+    data[invocation].f16.x   = subgroupMax(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupMax(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupMax(data[2].f16.xyz);
+    data[invocation].f16     = subgroupMax(data[3].f16);
+
+    data[invocation].f16.x   = subgroupInclusiveAdd(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupInclusiveAdd(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupInclusiveAdd(data[2].f16.xyz);
+    data[invocation].f16     = subgroupInclusiveAdd(data[3].f16);
+
+    data[invocation].f16.x   = subgroupInclusiveMul(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupInclusiveMul(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupInclusiveMul(data[2].f16.xyz);
+    data[invocation].f16     = subgroupInclusiveMul(data[3].f16);
+
+    data[invocation].f16.x   = subgroupInclusiveMin(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupInclusiveMin(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupInclusiveMin(data[2].f16.xyz);
+    data[invocation].f16     = subgroupInclusiveMin(data[3].f16);
+
+    data[invocation].f16.x   = subgroupInclusiveMax(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupInclusiveMax(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupInclusiveMax(data[2].f16.xyz);
+    data[invocation].f16     = subgroupInclusiveMax(data[3].f16);
+
+    data[invocation].f16.x   = subgroupExclusiveAdd(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupExclusiveAdd(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupExclusiveAdd(data[2].f16.xyz);
+    data[invocation].f16     = subgroupExclusiveAdd(data[3].f16);
+
+    data[invocation].f16.x   = subgroupExclusiveMul(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupExclusiveMul(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupExclusiveMul(data[2].f16.xyz);
+    data[invocation].f16     = subgroupExclusiveMul(data[3].f16);
+
+    data[invocation].f16.x   = subgroupExclusiveMin(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupExclusiveMin(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupExclusiveMin(data[2].f16.xyz);
+    data[invocation].f16     = subgroupExclusiveMin(data[3].f16);
+
+    data[invocation].f16.x   = subgroupExclusiveMax(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupExclusiveMax(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupExclusiveMax(data[2].f16.xyz);
+    data[invocation].f16     = subgroupExclusiveMax(data[3].f16);
+}
diff --git a/Test/spv.subgroupExtendedTypesBallot.comp b/Test/spv.subgroupExtendedTypesBallot.comp
new file mode 100644
index 0000000..22d29cb
--- /dev/null
+++ b/Test/spv.subgroupExtendedTypesBallot.comp
@@ -0,0 +1,88 @@
+#version 450
+
+#extension GL_KHR_shader_subgroup_ballot: enable
+#extension GL_EXT_shader_subgroup_extended_types_int8: enable
+#extension GL_EXT_shader_subgroup_extended_types_int16: enable
+#extension GL_EXT_shader_subgroup_extended_types_int64: enable
+#extension GL_EXT_shader_subgroup_extended_types_float16: enable
+
+layout (local_size_x = 8) in;
+
+layout(binding = 0) buffer Buffers
+{
+    i8vec4 i8;
+    u8vec4 u8;
+    i16vec4 i16;
+    u16vec4 u16;
+    i64vec4 i64;
+    u64vec4 u64;
+    f16vec4 f16;
+} data[4];
+
+void main()
+{
+    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
+
+    data[invocation].i8.x   = subgroupBroadcast(data[0].i8.x,    3);
+    data[invocation].i8.xy  = subgroupBroadcast(data[1].i8.xy,   3);
+    data[invocation].i8.xyz = subgroupBroadcast(data[2].i8.xyz,  3);
+    data[invocation].i8     = subgroupBroadcast(data[3].i8,      3);
+    data[invocation].i8.x   = subgroupBroadcastFirst(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupBroadcastFirst(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupBroadcastFirst(data[2].i8.xyz);
+    data[invocation].i8     = subgroupBroadcastFirst(data[3].i8);
+
+    data[invocation].u8.x   = subgroupBroadcast(data[0].u8.x,    3);
+    data[invocation].u8.xy  = subgroupBroadcast(data[1].u8.xy,   3);
+    data[invocation].u8.xyz = subgroupBroadcast(data[2].u8.xyz,  3);
+    data[invocation].u8     = subgroupBroadcast(data[3].u8,      3);
+    data[invocation].u8.x   = subgroupBroadcastFirst(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupBroadcastFirst(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupBroadcastFirst(data[2].u8.xyz);
+    data[invocation].u8     = subgroupBroadcastFirst(data[3].u8);
+
+    data[invocation].i16.x   = subgroupBroadcast(data[0].i16.x,    3);
+    data[invocation].i16.xy  = subgroupBroadcast(data[1].i16.xy,   3);
+    data[invocation].i16.xyz = subgroupBroadcast(data[2].i16.xyz,  3);
+    data[invocation].i16     = subgroupBroadcast(data[3].i16,      3);
+    data[invocation].i16.x   = subgroupBroadcastFirst(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupBroadcastFirst(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupBroadcastFirst(data[2].i16.xyz);
+    data[invocation].i16     = subgroupBroadcastFirst(data[3].i16);
+
+    data[invocation].u16.x   = subgroupBroadcast(data[0].u16.x,    3);
+    data[invocation].u16.xy  = subgroupBroadcast(data[1].u16.xy,   3);
+    data[invocation].u16.xyz = subgroupBroadcast(data[2].u16.xyz,  3);
+    data[invocation].u16     = subgroupBroadcast(data[3].u16,      3);
+    data[invocation].u16.x   = subgroupBroadcastFirst(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupBroadcastFirst(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupBroadcastFirst(data[2].u16.xyz);
+    data[invocation].u16     = subgroupBroadcastFirst(data[3].u16);
+
+    data[invocation].i64.x   = subgroupBroadcast(data[0].i64.x,    3);
+    data[invocation].i64.xy  = subgroupBroadcast(data[1].i64.xy,   3);
+    data[invocation].i64.xyz = subgroupBroadcast(data[2].i64.xyz,  3);
+    data[invocation].i64     = subgroupBroadcast(data[3].i64,      3);
+    data[invocation].i64.x   = subgroupBroadcastFirst(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupBroadcastFirst(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupBroadcastFirst(data[2].i64.xyz);
+    data[invocation].i64     = subgroupBroadcastFirst(data[3].i64);
+
+    data[invocation].u64.x   = subgroupBroadcast(data[0].u64.x,    3);
+    data[invocation].u64.xy  = subgroupBroadcast(data[1].u64.xy,   3);
+    data[invocation].u64.xyz = subgroupBroadcast(data[2].u64.xyz,  3);
+    data[invocation].u64     = subgroupBroadcast(data[3].u64,      3);
+    data[invocation].u64.x   = subgroupBroadcastFirst(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupBroadcastFirst(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupBroadcastFirst(data[2].u64.xyz);
+    data[invocation].u64     = subgroupBroadcastFirst(data[3].u64);
+
+    data[invocation].f16.x   = subgroupBroadcast(data[0].f16.x,    3);
+    data[invocation].f16.xy  = subgroupBroadcast(data[1].f16.xy,   3);
+    data[invocation].f16.xyz = subgroupBroadcast(data[2].f16.xyz,  3);
+    data[invocation].f16     = subgroupBroadcast(data[3].f16,      3);
+    data[invocation].f16.x   = subgroupBroadcastFirst(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupBroadcastFirst(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupBroadcastFirst(data[2].f16.xyz);
+    data[invocation].f16     = subgroupBroadcastFirst(data[3].f16);
+}
diff --git a/Test/spv.subgroupExtendedTypesBallotNeg.comp b/Test/spv.subgroupExtendedTypesBallotNeg.comp
new file mode 100644
index 0000000..240ab59
--- /dev/null
+++ b/Test/spv.subgroupExtendedTypesBallotNeg.comp
@@ -0,0 +1,88 @@
+#version 450
+
+#extension GL_KHR_shader_subgroup_ballot: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int8: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int16: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int64: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_float16: enable
+
+layout (local_size_x = 8) in;
+
+layout(binding = 0) buffer Buffers
+{
+    i8vec4 i8;
+    u8vec4 u8;
+    i16vec4 i16;
+    u16vec4 u16;
+    i64vec4 i64;
+    u64vec4 u64;
+    f16vec4 f16;
+} data[4];
+
+void main()
+{
+    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
+
+    data[invocation].i8.x   = subgroupBroadcast(data[0].i8.x,    3);
+    data[invocation].i8.xy  = subgroupBroadcast(data[1].i8.xy,   3);
+    data[invocation].i8.xyz = subgroupBroadcast(data[2].i8.xyz,  3);
+    data[invocation].i8     = subgroupBroadcast(data[3].i8,      3);
+    data[invocation].i8.x   = subgroupBroadcastFirst(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupBroadcastFirst(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupBroadcastFirst(data[2].i8.xyz);
+    data[invocation].i8     = subgroupBroadcastFirst(data[3].i8);
+
+    data[invocation].u8.x   = subgroupBroadcast(data[0].u8.x,    3);
+    data[invocation].u8.xy  = subgroupBroadcast(data[1].u8.xy,   3);
+    data[invocation].u8.xyz = subgroupBroadcast(data[2].u8.xyz,  3);
+    data[invocation].u8     = subgroupBroadcast(data[3].u8,      3);
+    data[invocation].u8.x   = subgroupBroadcastFirst(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupBroadcastFirst(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupBroadcastFirst(data[2].u8.xyz);
+    data[invocation].u8     = subgroupBroadcastFirst(data[3].u8);
+
+    data[invocation].i16.x   = subgroupBroadcast(data[0].i16.x,    3);
+    data[invocation].i16.xy  = subgroupBroadcast(data[1].i16.xy,   3);
+    data[invocation].i16.xyz = subgroupBroadcast(data[2].i16.xyz,  3);
+    data[invocation].i16     = subgroupBroadcast(data[3].i16,      3);
+    data[invocation].i16.x   = subgroupBroadcastFirst(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupBroadcastFirst(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupBroadcastFirst(data[2].i16.xyz);
+    data[invocation].i16     = subgroupBroadcastFirst(data[3].i16);
+
+    data[invocation].u16.x   = subgroupBroadcast(data[0].u16.x,    3);
+    data[invocation].u16.xy  = subgroupBroadcast(data[1].u16.xy,   3);
+    data[invocation].u16.xyz = subgroupBroadcast(data[2].u16.xyz,  3);
+    data[invocation].u16     = subgroupBroadcast(data[3].u16,      3);
+    data[invocation].u16.x   = subgroupBroadcastFirst(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupBroadcastFirst(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupBroadcastFirst(data[2].u16.xyz);
+    data[invocation].u16     = subgroupBroadcastFirst(data[3].u16);
+
+    data[invocation].i64.x   = subgroupBroadcast(data[0].i64.x,    3);
+    data[invocation].i64.xy  = subgroupBroadcast(data[1].i64.xy,   3);
+    data[invocation].i64.xyz = subgroupBroadcast(data[2].i64.xyz,  3);
+    data[invocation].i64     = subgroupBroadcast(data[3].i64,      3);
+    data[invocation].i64.x   = subgroupBroadcastFirst(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupBroadcastFirst(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupBroadcastFirst(data[2].i64.xyz);
+    data[invocation].i64     = subgroupBroadcastFirst(data[3].i64);
+
+    data[invocation].u64.x   = subgroupBroadcast(data[0].u64.x,    3);
+    data[invocation].u64.xy  = subgroupBroadcast(data[1].u64.xy,   3);
+    data[invocation].u64.xyz = subgroupBroadcast(data[2].u64.xyz,  3);
+    data[invocation].u64     = subgroupBroadcast(data[3].u64,      3);
+    data[invocation].u64.x   = subgroupBroadcastFirst(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupBroadcastFirst(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupBroadcastFirst(data[2].u64.xyz);
+    data[invocation].u64     = subgroupBroadcastFirst(data[3].u64);
+
+    data[invocation].f16.x   = subgroupBroadcast(data[0].f16.x,    3);
+    data[invocation].f16.xy  = subgroupBroadcast(data[1].f16.xy,   3);
+    data[invocation].f16.xyz = subgroupBroadcast(data[2].f16.xyz,  3);
+    data[invocation].f16     = subgroupBroadcast(data[3].f16,      3);
+    data[invocation].f16.x   = subgroupBroadcastFirst(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupBroadcastFirst(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupBroadcastFirst(data[2].f16.xyz);
+    data[invocation].f16     = subgroupBroadcastFirst(data[3].f16);
+}
diff --git a/Test/spv.subgroupExtendedTypesClustered.comp b/Test/spv.subgroupExtendedTypesClustered.comp
new file mode 100644
index 0000000..a215cbf
--- /dev/null
+++ b/Test/spv.subgroupExtendedTypesClustered.comp
@@ -0,0 +1,255 @@
+#version 450
+
+#extension GL_KHR_shader_subgroup_clustered: enable
+#extension GL_EXT_shader_subgroup_extended_types_int8: enable
+#extension GL_EXT_shader_subgroup_extended_types_int16: enable
+#extension GL_EXT_shader_subgroup_extended_types_int64: enable
+#extension GL_EXT_shader_subgroup_extended_types_float16: enable
+
+layout (local_size_x = 8) in;
+
+layout(binding = 0) buffer Buffers
+{
+    i8vec4 i8;
+    u8vec4 u8;
+    i16vec4 i16;
+    u16vec4 u16;
+    i64vec4 i64;
+    u64vec4 u64;
+    f16vec4 f16;
+} data[4];
+
+void main()
+{
+    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
+
+    data[invocation].i8.x   = subgroupClusteredAdd(data[0].i8.x, 1);
+    data[invocation].i8.xy  = subgroupClusteredAdd(data[1].i8.xy, 1);
+    data[invocation].i8.xyz = subgroupClusteredAdd(data[2].i8.xyz, 1);
+    data[invocation].i8     = subgroupClusteredAdd(data[3].i8, 1);
+
+    data[invocation].i8.x   = subgroupClusteredMul(data[0].i8.x, 1);
+    data[invocation].i8.xy  = subgroupClusteredMul(data[1].i8.xy, 1);
+    data[invocation].i8.xyz = subgroupClusteredMul(data[2].i8.xyz, 1);
+    data[invocation].i8     = subgroupClusteredMul(data[3].i8, 1);
+
+    data[invocation].i8.x   = subgroupClusteredMin(data[0].i8.x, 1);
+    data[invocation].i8.xy  = subgroupClusteredMin(data[1].i8.xy, 1);
+    data[invocation].i8.xyz = subgroupClusteredMin(data[2].i8.xyz, 1);
+    data[invocation].i8     = subgroupClusteredMin(data[3].i8, 1);
+
+    data[invocation].i8.x   = subgroupClusteredMax(data[0].i8.x, 1);
+    data[invocation].i8.xy  = subgroupClusteredMax(data[1].i8.xy, 1);
+    data[invocation].i8.xyz = subgroupClusteredMax(data[2].i8.xyz, 1);
+    data[invocation].i8     = subgroupClusteredMax(data[3].i8, 1);
+
+    data[invocation].i8.x   = subgroupClusteredAnd(data[0].i8.x, 1);
+    data[invocation].i8.xy  = subgroupClusteredAnd(data[1].i8.xy, 1);
+    data[invocation].i8.xyz = subgroupClusteredAnd(data[2].i8.xyz, 1);
+    data[invocation].i8     = subgroupClusteredAnd(data[3].i8, 1);
+
+    data[invocation].i8.x   = subgroupClusteredOr(data[0].i8.x, 1);
+    data[invocation].i8.xy  = subgroupClusteredOr(data[1].i8.xy, 1);
+    data[invocation].i8.xyz = subgroupClusteredOr(data[2].i8.xyz, 1);
+    data[invocation].i8     = subgroupClusteredOr(data[3].i8, 1);
+
+    data[invocation].i8.x   = subgroupClusteredXor(data[0].i8.x, 1);
+    data[invocation].i8.xy  = subgroupClusteredXor(data[1].i8.xy, 1);
+    data[invocation].i8.xyz = subgroupClusteredXor(data[2].i8.xyz, 1);
+    data[invocation].i8     = subgroupClusteredXor(data[3].i8, 1);
+
+    data[invocation].u8.x   = subgroupClusteredAdd(data[0].u8.x, 1);
+    data[invocation].u8.xy  = subgroupClusteredAdd(data[1].u8.xy, 1);
+    data[invocation].u8.xyz = subgroupClusteredAdd(data[2].u8.xyz, 1);
+    data[invocation].u8     = subgroupClusteredAdd(data[3].u8, 1);
+
+    data[invocation].u8.x   = subgroupClusteredMul(data[0].u8.x, 1);
+    data[invocation].u8.xy  = subgroupClusteredMul(data[1].u8.xy, 1);
+    data[invocation].u8.xyz = subgroupClusteredMul(data[2].u8.xyz, 1);
+    data[invocation].u8     = subgroupClusteredMul(data[3].u8, 1);
+
+    data[invocation].u8.x   = subgroupClusteredMin(data[0].u8.x, 1);
+    data[invocation].u8.xy  = subgroupClusteredMin(data[1].u8.xy, 1);
+    data[invocation].u8.xyz = subgroupClusteredMin(data[2].u8.xyz, 1);
+    data[invocation].u8     = subgroupClusteredMin(data[3].u8, 1);
+
+    data[invocation].u8.x   = subgroupClusteredMax(data[0].u8.x, 1);
+    data[invocation].u8.xy  = subgroupClusteredMax(data[1].u8.xy, 1);
+    data[invocation].u8.xyz = subgroupClusteredMax(data[2].u8.xyz, 1);
+    data[invocation].u8     = subgroupClusteredMax(data[3].u8, 1);
+
+    data[invocation].u8.x   = subgroupClusteredAnd(data[0].u8.x, 1);
+    data[invocation].u8.xy  = subgroupClusteredAnd(data[1].u8.xy, 1);
+    data[invocation].u8.xyz = subgroupClusteredAnd(data[2].u8.xyz, 1);
+    data[invocation].u8     = subgroupClusteredAnd(data[3].u8, 1);
+
+    data[invocation].u8.x   = subgroupClusteredOr(data[0].u8.x, 1);
+    data[invocation].u8.xy  = subgroupClusteredOr(data[1].u8.xy, 1);
+    data[invocation].u8.xyz = subgroupClusteredOr(data[2].u8.xyz, 1);
+    data[invocation].u8     = subgroupClusteredOr(data[3].u8, 1);
+
+    data[invocation].u8.x   = subgroupClusteredXor(data[0].u8.x, 1);
+    data[invocation].u8.xy  = subgroupClusteredXor(data[1].u8.xy, 1);
+    data[invocation].u8.xyz = subgroupClusteredXor(data[2].u8.xyz, 1);
+    data[invocation].u8     = subgroupClusteredXor(data[3].u8, 1);
+
+    data[invocation].i16.x   = subgroupClusteredAdd(data[0].i16.x, 1);
+    data[invocation].i16.xy  = subgroupClusteredAdd(data[1].i16.xy, 1);
+    data[invocation].i16.xyz = subgroupClusteredAdd(data[2].i16.xyz, 1);
+    data[invocation].i16     = subgroupClusteredAdd(data[3].i16, 1);
+
+    data[invocation].i16.x   = subgroupClusteredMul(data[0].i16.x, 1);
+    data[invocation].i16.xy  = subgroupClusteredMul(data[1].i16.xy, 1);
+    data[invocation].i16.xyz = subgroupClusteredMul(data[2].i16.xyz, 1);
+    data[invocation].i16     = subgroupClusteredMul(data[3].i16, 1);
+
+    data[invocation].i16.x   = subgroupClusteredMin(data[0].i16.x, 1);
+    data[invocation].i16.xy  = subgroupClusteredMin(data[1].i16.xy, 1);
+    data[invocation].i16.xyz = subgroupClusteredMin(data[2].i16.xyz, 1);
+    data[invocation].i16     = subgroupClusteredMin(data[3].i16, 1);
+
+    data[invocation].i16.x   = subgroupClusteredMax(data[0].i16.x, 1);
+    data[invocation].i16.xy  = subgroupClusteredMax(data[1].i16.xy, 1);
+    data[invocation].i16.xyz = subgroupClusteredMax(data[2].i16.xyz, 1);
+    data[invocation].i16     = subgroupClusteredMax(data[3].i16, 1);
+
+    data[invocation].i16.x   = subgroupClusteredAnd(data[0].i16.x, 1);
+    data[invocation].i16.xy  = subgroupClusteredAnd(data[1].i16.xy, 1);
+    data[invocation].i16.xyz = subgroupClusteredAnd(data[2].i16.xyz, 1);
+    data[invocation].i16     = subgroupClusteredAnd(data[3].i16, 1);
+
+    data[invocation].i16.x   = subgroupClusteredOr(data[0].i16.x, 1);
+    data[invocation].i16.xy  = subgroupClusteredOr(data[1].i16.xy, 1);
+    data[invocation].i16.xyz = subgroupClusteredOr(data[2].i16.xyz, 1);
+    data[invocation].i16     = subgroupClusteredOr(data[3].i16, 1);
+
+    data[invocation].i16.x   = subgroupClusteredXor(data[0].i16.x, 1);
+    data[invocation].i16.xy  = subgroupClusteredXor(data[1].i16.xy, 1);
+    data[invocation].i16.xyz = subgroupClusteredXor(data[2].i16.xyz, 1);
+    data[invocation].i16     = subgroupClusteredXor(data[3].i16, 1);
+
+    data[invocation].u16.x   = subgroupClusteredAdd(data[0].u16.x, 1);
+    data[invocation].u16.xy  = subgroupClusteredAdd(data[1].u16.xy, 1);
+    data[invocation].u16.xyz = subgroupClusteredAdd(data[2].u16.xyz, 1);
+    data[invocation].u16     = subgroupClusteredAdd(data[3].u16, 1);
+
+    data[invocation].u16.x   = subgroupClusteredMul(data[0].u16.x, 1);
+    data[invocation].u16.xy  = subgroupClusteredMul(data[1].u16.xy, 1);
+    data[invocation].u16.xyz = subgroupClusteredMul(data[2].u16.xyz, 1);
+    data[invocation].u16     = subgroupClusteredMul(data[3].u16, 1);
+
+    data[invocation].u16.x   = subgroupClusteredMin(data[0].u16.x, 1);
+    data[invocation].u16.xy  = subgroupClusteredMin(data[1].u16.xy, 1);
+    data[invocation].u16.xyz = subgroupClusteredMin(data[2].u16.xyz, 1);
+    data[invocation].u16     = subgroupClusteredMin(data[3].u16, 1);
+
+    data[invocation].u16.x   = subgroupClusteredMax(data[0].u16.x, 1);
+    data[invocation].u16.xy  = subgroupClusteredMax(data[1].u16.xy, 1);
+    data[invocation].u16.xyz = subgroupClusteredMax(data[2].u16.xyz, 1);
+    data[invocation].u16     = subgroupClusteredMax(data[3].u16, 1);
+
+    data[invocation].u16.x   = subgroupClusteredAnd(data[0].u16.x, 1);
+    data[invocation].u16.xy  = subgroupClusteredAnd(data[1].u16.xy, 1);
+    data[invocation].u16.xyz = subgroupClusteredAnd(data[2].u16.xyz, 1);
+    data[invocation].u16     = subgroupClusteredAnd(data[3].u16, 1);
+
+    data[invocation].u16.x   = subgroupClusteredOr(data[0].u16.x, 1);
+    data[invocation].u16.xy  = subgroupClusteredOr(data[1].u16.xy, 1);
+    data[invocation].u16.xyz = subgroupClusteredOr(data[2].u16.xyz, 1);
+    data[invocation].u16     = subgroupClusteredOr(data[3].u16, 1);
+
+    data[invocation].u16.x   = subgroupClusteredXor(data[0].u16.x, 1);
+    data[invocation].u16.xy  = subgroupClusteredXor(data[1].u16.xy, 1);
+    data[invocation].u16.xyz = subgroupClusteredXor(data[2].u16.xyz, 1);
+    data[invocation].u16     = subgroupClusteredXor(data[3].u16, 1);
+
+    data[invocation].i64.x   = subgroupClusteredAdd(data[0].i64.x, 1);
+    data[invocation].i64.xy  = subgroupClusteredAdd(data[1].i64.xy, 1);
+    data[invocation].i64.xyz = subgroupClusteredAdd(data[2].i64.xyz, 1);
+    data[invocation].i64     = subgroupClusteredAdd(data[3].i64, 1);
+
+    data[invocation].i64.x   = subgroupClusteredMul(data[0].i64.x, 1);
+    data[invocation].i64.xy  = subgroupClusteredMul(data[1].i64.xy, 1);
+    data[invocation].i64.xyz = subgroupClusteredMul(data[2].i64.xyz, 1);
+    data[invocation].i64     = subgroupClusteredMul(data[3].i64, 1);
+
+    data[invocation].i64.x   = subgroupClusteredMin(data[0].i64.x, 1);
+    data[invocation].i64.xy  = subgroupClusteredMin(data[1].i64.xy, 1);
+    data[invocation].i64.xyz = subgroupClusteredMin(data[2].i64.xyz, 1);
+    data[invocation].i64     = subgroupClusteredMin(data[3].i64, 1);
+
+    data[invocation].i64.x   = subgroupClusteredMax(data[0].i64.x, 1);
+    data[invocation].i64.xy  = subgroupClusteredMax(data[1].i64.xy, 1);
+    data[invocation].i64.xyz = subgroupClusteredMax(data[2].i64.xyz, 1);
+    data[invocation].i64     = subgroupClusteredMax(data[3].i64, 1);
+
+    data[invocation].i64.x   = subgroupClusteredAnd(data[0].i64.x, 1);
+    data[invocation].i64.xy  = subgroupClusteredAnd(data[1].i64.xy, 1);
+    data[invocation].i64.xyz = subgroupClusteredAnd(data[2].i64.xyz, 1);
+    data[invocation].i64     = subgroupClusteredAnd(data[3].i64, 1);
+
+    data[invocation].i64.x   = subgroupClusteredOr(data[0].i64.x, 1);
+    data[invocation].i64.xy  = subgroupClusteredOr(data[1].i64.xy, 1);
+    data[invocation].i64.xyz = subgroupClusteredOr(data[2].i64.xyz, 1);
+    data[invocation].i64     = subgroupClusteredOr(data[3].i64, 1);
+
+    data[invocation].i64.x   = subgroupClusteredXor(data[0].i64.x, 1);
+    data[invocation].i64.xy  = subgroupClusteredXor(data[1].i64.xy, 1);
+    data[invocation].i64.xyz = subgroupClusteredXor(data[2].i64.xyz, 1);
+    data[invocation].i64     = subgroupClusteredXor(data[3].i64, 1);
+
+    data[invocation].u64.x   = subgroupClusteredAdd(data[0].u64.x, 1);
+    data[invocation].u64.xy  = subgroupClusteredAdd(data[1].u64.xy, 1);
+    data[invocation].u64.xyz = subgroupClusteredAdd(data[2].u64.xyz, 1);
+    data[invocation].u64     = subgroupClusteredAdd(data[3].u64, 1);
+
+    data[invocation].u64.x   = subgroupClusteredMul(data[0].u64.x, 1);
+    data[invocation].u64.xy  = subgroupClusteredMul(data[1].u64.xy, 1);
+    data[invocation].u64.xyz = subgroupClusteredMul(data[2].u64.xyz, 1);
+    data[invocation].u64     = subgroupClusteredMul(data[3].u64, 1);
+
+    data[invocation].u64.x   = subgroupClusteredMin(data[0].u64.x, 1);
+    data[invocation].u64.xy  = subgroupClusteredMin(data[1].u64.xy, 1);
+    data[invocation].u64.xyz = subgroupClusteredMin(data[2].u64.xyz, 1);
+    data[invocation].u64     = subgroupClusteredMin(data[3].u64, 1);
+
+    data[invocation].u64.x   = subgroupClusteredMax(data[0].u64.x, 1);
+    data[invocation].u64.xy  = subgroupClusteredMax(data[1].u64.xy, 1);
+    data[invocation].u64.xyz = subgroupClusteredMax(data[2].u64.xyz, 1);
+    data[invocation].u64     = subgroupClusteredMax(data[3].u64, 1);
+
+    data[invocation].u64.x   = subgroupClusteredAnd(data[0].u64.x, 1);
+    data[invocation].u64.xy  = subgroupClusteredAnd(data[1].u64.xy, 1);
+    data[invocation].u64.xyz = subgroupClusteredAnd(data[2].u64.xyz, 1);
+    data[invocation].u64     = subgroupClusteredAnd(data[3].u64, 1);
+
+    data[invocation].u64.x   = subgroupClusteredOr(data[0].u64.x, 1);
+    data[invocation].u64.xy  = subgroupClusteredOr(data[1].u64.xy, 1);
+    data[invocation].u64.xyz = subgroupClusteredOr(data[2].u64.xyz, 1);
+    data[invocation].u64     = subgroupClusteredOr(data[3].u64, 1);
+
+    data[invocation].u64.x   = subgroupClusteredXor(data[0].u64.x, 1);
+    data[invocation].u64.xy  = subgroupClusteredXor(data[1].u64.xy, 1);
+    data[invocation].u64.xyz = subgroupClusteredXor(data[2].u64.xyz, 1);
+    data[invocation].u64     = subgroupClusteredXor(data[3].u64, 1);
+
+    data[invocation].f16.x   = subgroupClusteredAdd(data[0].f16.x, 1);
+    data[invocation].f16.xy  = subgroupClusteredAdd(data[1].f16.xy, 1);
+    data[invocation].f16.xyz = subgroupClusteredAdd(data[2].f16.xyz, 1);
+    data[invocation].f16     = subgroupClusteredAdd(data[3].f16, 1);
+
+    data[invocation].f16.x   = subgroupClusteredMul(data[0].f16.x, 1);
+    data[invocation].f16.xy  = subgroupClusteredMul(data[1].f16.xy, 1);
+    data[invocation].f16.xyz = subgroupClusteredMul(data[2].f16.xyz, 1);
+    data[invocation].f16     = subgroupClusteredMul(data[3].f16, 1);
+
+    data[invocation].f16.x   = subgroupClusteredMin(data[0].f16.x, 1);
+    data[invocation].f16.xy  = subgroupClusteredMin(data[1].f16.xy, 1);
+    data[invocation].f16.xyz = subgroupClusteredMin(data[2].f16.xyz, 1);
+    data[invocation].f16     = subgroupClusteredMin(data[3].f16, 1);
+
+    data[invocation].f16.x   = subgroupClusteredMax(data[0].f16.x, 1);
+    data[invocation].f16.xy  = subgroupClusteredMax(data[1].f16.xy, 1);
+    data[invocation].f16.xyz = subgroupClusteredMax(data[2].f16.xyz, 1);
+    data[invocation].f16     = subgroupClusteredMax(data[3].f16, 1);
+}
diff --git a/Test/spv.subgroupExtendedTypesClusteredNeg.comp b/Test/spv.subgroupExtendedTypesClusteredNeg.comp
new file mode 100644
index 0000000..d521511
--- /dev/null
+++ b/Test/spv.subgroupExtendedTypesClusteredNeg.comp
@@ -0,0 +1,255 @@
+#version 450
+
+#extension GL_KHR_shader_subgroup_clustered: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int8: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int16: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int64: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_float16: enable
+
+layout (local_size_x = 8) in;
+
+layout(binding = 0) buffer Buffers
+{
+    i8vec4 i8;
+    u8vec4 u8;
+    i16vec4 i16;
+    u16vec4 u16;
+    i64vec4 i64;
+    u64vec4 u64;
+    f16vec4 f16;
+} data[4];
+
+void main()
+{
+    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
+
+    data[invocation].i8.x   = subgroupClusteredAdd(data[0].i8.x, 1);
+    data[invocation].i8.xy  = subgroupClusteredAdd(data[1].i8.xy, 1);
+    data[invocation].i8.xyz = subgroupClusteredAdd(data[2].i8.xyz, 1);
+    data[invocation].i8     = subgroupClusteredAdd(data[3].i8, 1);
+
+    data[invocation].i8.x   = subgroupClusteredMul(data[0].i8.x, 1);
+    data[invocation].i8.xy  = subgroupClusteredMul(data[1].i8.xy, 1);
+    data[invocation].i8.xyz = subgroupClusteredMul(data[2].i8.xyz, 1);
+    data[invocation].i8     = subgroupClusteredMul(data[3].i8, 1);
+
+    data[invocation].i8.x   = subgroupClusteredMin(data[0].i8.x, 1);
+    data[invocation].i8.xy  = subgroupClusteredMin(data[1].i8.xy, 1);
+    data[invocation].i8.xyz = subgroupClusteredMin(data[2].i8.xyz, 1);
+    data[invocation].i8     = subgroupClusteredMin(data[3].i8, 1);
+
+    data[invocation].i8.x   = subgroupClusteredMax(data[0].i8.x, 1);
+    data[invocation].i8.xy  = subgroupClusteredMax(data[1].i8.xy, 1);
+    data[invocation].i8.xyz = subgroupClusteredMax(data[2].i8.xyz, 1);
+    data[invocation].i8     = subgroupClusteredMax(data[3].i8, 1);
+
+    data[invocation].i8.x   = subgroupClusteredAnd(data[0].i8.x, 1);
+    data[invocation].i8.xy  = subgroupClusteredAnd(data[1].i8.xy, 1);
+    data[invocation].i8.xyz = subgroupClusteredAnd(data[2].i8.xyz, 1);
+    data[invocation].i8     = subgroupClusteredAnd(data[3].i8, 1);
+
+    data[invocation].i8.x   = subgroupClusteredOr(data[0].i8.x, 1);
+    data[invocation].i8.xy  = subgroupClusteredOr(data[1].i8.xy, 1);
+    data[invocation].i8.xyz = subgroupClusteredOr(data[2].i8.xyz, 1);
+    data[invocation].i8     = subgroupClusteredOr(data[3].i8, 1);
+
+    data[invocation].i8.x   = subgroupClusteredXor(data[0].i8.x, 1);
+    data[invocation].i8.xy  = subgroupClusteredXor(data[1].i8.xy, 1);
+    data[invocation].i8.xyz = subgroupClusteredXor(data[2].i8.xyz, 1);
+    data[invocation].i8     = subgroupClusteredXor(data[3].i8, 1);
+
+    data[invocation].u8.x   = subgroupClusteredAdd(data[0].u8.x, 1);
+    data[invocation].u8.xy  = subgroupClusteredAdd(data[1].u8.xy, 1);
+    data[invocation].u8.xyz = subgroupClusteredAdd(data[2].u8.xyz, 1);
+    data[invocation].u8     = subgroupClusteredAdd(data[3].u8, 1);
+
+    data[invocation].u8.x   = subgroupClusteredMul(data[0].u8.x, 1);
+    data[invocation].u8.xy  = subgroupClusteredMul(data[1].u8.xy, 1);
+    data[invocation].u8.xyz = subgroupClusteredMul(data[2].u8.xyz, 1);
+    data[invocation].u8     = subgroupClusteredMul(data[3].u8, 1);
+
+    data[invocation].u8.x   = subgroupClusteredMin(data[0].u8.x, 1);
+    data[invocation].u8.xy  = subgroupClusteredMin(data[1].u8.xy, 1);
+    data[invocation].u8.xyz = subgroupClusteredMin(data[2].u8.xyz, 1);
+    data[invocation].u8     = subgroupClusteredMin(data[3].u8, 1);
+
+    data[invocation].u8.x   = subgroupClusteredMax(data[0].u8.x, 1);
+    data[invocation].u8.xy  = subgroupClusteredMax(data[1].u8.xy, 1);
+    data[invocation].u8.xyz = subgroupClusteredMax(data[2].u8.xyz, 1);
+    data[invocation].u8     = subgroupClusteredMax(data[3].u8, 1);
+
+    data[invocation].u8.x   = subgroupClusteredAnd(data[0].u8.x, 1);
+    data[invocation].u8.xy  = subgroupClusteredAnd(data[1].u8.xy, 1);
+    data[invocation].u8.xyz = subgroupClusteredAnd(data[2].u8.xyz, 1);
+    data[invocation].u8     = subgroupClusteredAnd(data[3].u8, 1);
+
+    data[invocation].u8.x   = subgroupClusteredOr(data[0].u8.x, 1);
+    data[invocation].u8.xy  = subgroupClusteredOr(data[1].u8.xy, 1);
+    data[invocation].u8.xyz = subgroupClusteredOr(data[2].u8.xyz, 1);
+    data[invocation].u8     = subgroupClusteredOr(data[3].u8, 1);
+
+    data[invocation].u8.x   = subgroupClusteredXor(data[0].u8.x, 1);
+    data[invocation].u8.xy  = subgroupClusteredXor(data[1].u8.xy, 1);
+    data[invocation].u8.xyz = subgroupClusteredXor(data[2].u8.xyz, 1);
+    data[invocation].u8     = subgroupClusteredXor(data[3].u8, 1);
+
+    data[invocation].i16.x   = subgroupClusteredAdd(data[0].i16.x, 1);
+    data[invocation].i16.xy  = subgroupClusteredAdd(data[1].i16.xy, 1);
+    data[invocation].i16.xyz = subgroupClusteredAdd(data[2].i16.xyz, 1);
+    data[invocation].i16     = subgroupClusteredAdd(data[3].i16, 1);
+
+    data[invocation].i16.x   = subgroupClusteredMul(data[0].i16.x, 1);
+    data[invocation].i16.xy  = subgroupClusteredMul(data[1].i16.xy, 1);
+    data[invocation].i16.xyz = subgroupClusteredMul(data[2].i16.xyz, 1);
+    data[invocation].i16     = subgroupClusteredMul(data[3].i16, 1);
+
+    data[invocation].i16.x   = subgroupClusteredMin(data[0].i16.x, 1);
+    data[invocation].i16.xy  = subgroupClusteredMin(data[1].i16.xy, 1);
+    data[invocation].i16.xyz = subgroupClusteredMin(data[2].i16.xyz, 1);
+    data[invocation].i16     = subgroupClusteredMin(data[3].i16, 1);
+
+    data[invocation].i16.x   = subgroupClusteredMax(data[0].i16.x, 1);
+    data[invocation].i16.xy  = subgroupClusteredMax(data[1].i16.xy, 1);
+    data[invocation].i16.xyz = subgroupClusteredMax(data[2].i16.xyz, 1);
+    data[invocation].i16     = subgroupClusteredMax(data[3].i16, 1);
+
+    data[invocation].i16.x   = subgroupClusteredAnd(data[0].i16.x, 1);
+    data[invocation].i16.xy  = subgroupClusteredAnd(data[1].i16.xy, 1);
+    data[invocation].i16.xyz = subgroupClusteredAnd(data[2].i16.xyz, 1);
+    data[invocation].i16     = subgroupClusteredAnd(data[3].i16, 1);
+
+    data[invocation].i16.x   = subgroupClusteredOr(data[0].i16.x, 1);
+    data[invocation].i16.xy  = subgroupClusteredOr(data[1].i16.xy, 1);
+    data[invocation].i16.xyz = subgroupClusteredOr(data[2].i16.xyz, 1);
+    data[invocation].i16     = subgroupClusteredOr(data[3].i16, 1);
+
+    data[invocation].i16.x   = subgroupClusteredXor(data[0].i16.x, 1);
+    data[invocation].i16.xy  = subgroupClusteredXor(data[1].i16.xy, 1);
+    data[invocation].i16.xyz = subgroupClusteredXor(data[2].i16.xyz, 1);
+    data[invocation].i16     = subgroupClusteredXor(data[3].i16, 1);
+
+    data[invocation].u16.x   = subgroupClusteredAdd(data[0].u16.x, 1);
+    data[invocation].u16.xy  = subgroupClusteredAdd(data[1].u16.xy, 1);
+    data[invocation].u16.xyz = subgroupClusteredAdd(data[2].u16.xyz, 1);
+    data[invocation].u16     = subgroupClusteredAdd(data[3].u16, 1);
+
+    data[invocation].u16.x   = subgroupClusteredMul(data[0].u16.x, 1);
+    data[invocation].u16.xy  = subgroupClusteredMul(data[1].u16.xy, 1);
+    data[invocation].u16.xyz = subgroupClusteredMul(data[2].u16.xyz, 1);
+    data[invocation].u16     = subgroupClusteredMul(data[3].u16, 1);
+
+    data[invocation].u16.x   = subgroupClusteredMin(data[0].u16.x, 1);
+    data[invocation].u16.xy  = subgroupClusteredMin(data[1].u16.xy, 1);
+    data[invocation].u16.xyz = subgroupClusteredMin(data[2].u16.xyz, 1);
+    data[invocation].u16     = subgroupClusteredMin(data[3].u16, 1);
+
+    data[invocation].u16.x   = subgroupClusteredMax(data[0].u16.x, 1);
+    data[invocation].u16.xy  = subgroupClusteredMax(data[1].u16.xy, 1);
+    data[invocation].u16.xyz = subgroupClusteredMax(data[2].u16.xyz, 1);
+    data[invocation].u16     = subgroupClusteredMax(data[3].u16, 1);
+
+    data[invocation].u16.x   = subgroupClusteredAnd(data[0].u16.x, 1);
+    data[invocation].u16.xy  = subgroupClusteredAnd(data[1].u16.xy, 1);
+    data[invocation].u16.xyz = subgroupClusteredAnd(data[2].u16.xyz, 1);
+    data[invocation].u16     = subgroupClusteredAnd(data[3].u16, 1);
+
+    data[invocation].u16.x   = subgroupClusteredOr(data[0].u16.x, 1);
+    data[invocation].u16.xy  = subgroupClusteredOr(data[1].u16.xy, 1);
+    data[invocation].u16.xyz = subgroupClusteredOr(data[2].u16.xyz, 1);
+    data[invocation].u16     = subgroupClusteredOr(data[3].u16, 1);
+
+    data[invocation].u16.x   = subgroupClusteredXor(data[0].u16.x, 1);
+    data[invocation].u16.xy  = subgroupClusteredXor(data[1].u16.xy, 1);
+    data[invocation].u16.xyz = subgroupClusteredXor(data[2].u16.xyz, 1);
+    data[invocation].u16     = subgroupClusteredXor(data[3].u16, 1);
+
+    data[invocation].i64.x   = subgroupClusteredAdd(data[0].i64.x, 1);
+    data[invocation].i64.xy  = subgroupClusteredAdd(data[1].i64.xy, 1);
+    data[invocation].i64.xyz = subgroupClusteredAdd(data[2].i64.xyz, 1);
+    data[invocation].i64     = subgroupClusteredAdd(data[3].i64, 1);
+
+    data[invocation].i64.x   = subgroupClusteredMul(data[0].i64.x, 1);
+    data[invocation].i64.xy  = subgroupClusteredMul(data[1].i64.xy, 1);
+    data[invocation].i64.xyz = subgroupClusteredMul(data[2].i64.xyz, 1);
+    data[invocation].i64     = subgroupClusteredMul(data[3].i64, 1);
+
+    data[invocation].i64.x   = subgroupClusteredMin(data[0].i64.x, 1);
+    data[invocation].i64.xy  = subgroupClusteredMin(data[1].i64.xy, 1);
+    data[invocation].i64.xyz = subgroupClusteredMin(data[2].i64.xyz, 1);
+    data[invocation].i64     = subgroupClusteredMin(data[3].i64, 1);
+
+    data[invocation].i64.x   = subgroupClusteredMax(data[0].i64.x, 1);
+    data[invocation].i64.xy  = subgroupClusteredMax(data[1].i64.xy, 1);
+    data[invocation].i64.xyz = subgroupClusteredMax(data[2].i64.xyz, 1);
+    data[invocation].i64     = subgroupClusteredMax(data[3].i64, 1);
+
+    data[invocation].i64.x   = subgroupClusteredAnd(data[0].i64.x, 1);
+    data[invocation].i64.xy  = subgroupClusteredAnd(data[1].i64.xy, 1);
+    data[invocation].i64.xyz = subgroupClusteredAnd(data[2].i64.xyz, 1);
+    data[invocation].i64     = subgroupClusteredAnd(data[3].i64, 1);
+
+    data[invocation].i64.x   = subgroupClusteredOr(data[0].i64.x, 1);
+    data[invocation].i64.xy  = subgroupClusteredOr(data[1].i64.xy, 1);
+    data[invocation].i64.xyz = subgroupClusteredOr(data[2].i64.xyz, 1);
+    data[invocation].i64     = subgroupClusteredOr(data[3].i64, 1);
+
+    data[invocation].i64.x   = subgroupClusteredXor(data[0].i64.x, 1);
+    data[invocation].i64.xy  = subgroupClusteredXor(data[1].i64.xy, 1);
+    data[invocation].i64.xyz = subgroupClusteredXor(data[2].i64.xyz, 1);
+    data[invocation].i64     = subgroupClusteredXor(data[3].i64, 1);
+
+    data[invocation].u64.x   = subgroupClusteredAdd(data[0].u64.x, 1);
+    data[invocation].u64.xy  = subgroupClusteredAdd(data[1].u64.xy, 1);
+    data[invocation].u64.xyz = subgroupClusteredAdd(data[2].u64.xyz, 1);
+    data[invocation].u64     = subgroupClusteredAdd(data[3].u64, 1);
+
+    data[invocation].u64.x   = subgroupClusteredMul(data[0].u64.x, 1);
+    data[invocation].u64.xy  = subgroupClusteredMul(data[1].u64.xy, 1);
+    data[invocation].u64.xyz = subgroupClusteredMul(data[2].u64.xyz, 1);
+    data[invocation].u64     = subgroupClusteredMul(data[3].u64, 1);
+
+    data[invocation].u64.x   = subgroupClusteredMin(data[0].u64.x, 1);
+    data[invocation].u64.xy  = subgroupClusteredMin(data[1].u64.xy, 1);
+    data[invocation].u64.xyz = subgroupClusteredMin(data[2].u64.xyz, 1);
+    data[invocation].u64     = subgroupClusteredMin(data[3].u64, 1);
+
+    data[invocation].u64.x   = subgroupClusteredMax(data[0].u64.x, 1);
+    data[invocation].u64.xy  = subgroupClusteredMax(data[1].u64.xy, 1);
+    data[invocation].u64.xyz = subgroupClusteredMax(data[2].u64.xyz, 1);
+    data[invocation].u64     = subgroupClusteredMax(data[3].u64, 1);
+
+    data[invocation].u64.x   = subgroupClusteredAnd(data[0].u64.x, 1);
+    data[invocation].u64.xy  = subgroupClusteredAnd(data[1].u64.xy, 1);
+    data[invocation].u64.xyz = subgroupClusteredAnd(data[2].u64.xyz, 1);
+    data[invocation].u64     = subgroupClusteredAnd(data[3].u64, 1);
+
+    data[invocation].u64.x   = subgroupClusteredOr(data[0].u64.x, 1);
+    data[invocation].u64.xy  = subgroupClusteredOr(data[1].u64.xy, 1);
+    data[invocation].u64.xyz = subgroupClusteredOr(data[2].u64.xyz, 1);
+    data[invocation].u64     = subgroupClusteredOr(data[3].u64, 1);
+
+    data[invocation].u64.x   = subgroupClusteredXor(data[0].u64.x, 1);
+    data[invocation].u64.xy  = subgroupClusteredXor(data[1].u64.xy, 1);
+    data[invocation].u64.xyz = subgroupClusteredXor(data[2].u64.xyz, 1);
+    data[invocation].u64     = subgroupClusteredXor(data[3].u64, 1);
+
+    data[invocation].f16.x   = subgroupClusteredAdd(data[0].f16.x, 1);
+    data[invocation].f16.xy  = subgroupClusteredAdd(data[1].f16.xy, 1);
+    data[invocation].f16.xyz = subgroupClusteredAdd(data[2].f16.xyz, 1);
+    data[invocation].f16     = subgroupClusteredAdd(data[3].f16, 1);
+
+    data[invocation].f16.x   = subgroupClusteredMul(data[0].f16.x, 1);
+    data[invocation].f16.xy  = subgroupClusteredMul(data[1].f16.xy, 1);
+    data[invocation].f16.xyz = subgroupClusteredMul(data[2].f16.xyz, 1);
+    data[invocation].f16     = subgroupClusteredMul(data[3].f16, 1);
+
+    data[invocation].f16.x   = subgroupClusteredMin(data[0].f16.x, 1);
+    data[invocation].f16.xy  = subgroupClusteredMin(data[1].f16.xy, 1);
+    data[invocation].f16.xyz = subgroupClusteredMin(data[2].f16.xyz, 1);
+    data[invocation].f16     = subgroupClusteredMin(data[3].f16, 1);
+
+    data[invocation].f16.x   = subgroupClusteredMax(data[0].f16.x, 1);
+    data[invocation].f16.xy  = subgroupClusteredMax(data[1].f16.xy, 1);
+    data[invocation].f16.xyz = subgroupClusteredMax(data[2].f16.xyz, 1);
+    data[invocation].f16     = subgroupClusteredMax(data[3].f16, 1);
+}
diff --git a/Test/spv.subgroupExtendedTypesPartitioned.comp b/Test/spv.subgroupExtendedTypesPartitioned.comp
new file mode 100644
index 0000000..382a5b3
--- /dev/null
+++ b/Test/spv.subgroupExtendedTypesPartitioned.comp
@@ -0,0 +1,291 @@
+#version 450
+
+#extension GL_NV_shader_subgroup_partitioned: enable
+#extension GL_EXT_shader_subgroup_extended_types_int8: enable
+#extension GL_EXT_shader_subgroup_extended_types_int16: enable
+#extension GL_EXT_shader_subgroup_extended_types_int64: enable
+#extension GL_EXT_shader_subgroup_extended_types_float16: enable
+
+layout (local_size_x = 8) in;
+
+layout(binding = 0) buffer Buffers
+{
+    i8vec4 i8;
+    u8vec4 u8;
+    i16vec4 i16;
+    u16vec4 u16;
+    i64vec4 i64;
+    u64vec4 u64;
+    f16vec4 f16;
+} data[4];
+
+void main()
+{
+    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
+
+    uvec4 ballot;
+    ballot = subgroupPartitionNV(data[0].i8.x);
+    ballot = subgroupPartitionNV(data[1].i8.xy);
+    ballot = subgroupPartitionNV(data[2].i8.xyz);
+    ballot = subgroupPartitionNV(data[3].i8);
+
+    ballot = subgroupPartitionNV(data[0].u8.x);
+    ballot = subgroupPartitionNV(data[1].u8.xy);
+    ballot = subgroupPartitionNV(data[2].u8.xyz);
+    ballot = subgroupPartitionNV(data[3].u8);
+
+    ballot = subgroupPartitionNV(data[0].i16.x);
+    ballot = subgroupPartitionNV(data[1].i16.xy);
+    ballot = subgroupPartitionNV(data[2].i16.xyz);
+    ballot = subgroupPartitionNV(data[3].i16);
+
+    ballot = subgroupPartitionNV(data[0].u16.x);
+    ballot = subgroupPartitionNV(data[1].u16.xy);
+    ballot = subgroupPartitionNV(data[2].u16.xyz);
+    ballot = subgroupPartitionNV(data[3].u16);
+
+    ballot = subgroupPartitionNV(data[0].i64.x);
+    ballot = subgroupPartitionNV(data[1].i64.xy);
+    ballot = subgroupPartitionNV(data[2].i64.xyz);
+    ballot = subgroupPartitionNV(data[3].i64);
+
+    ballot = subgroupPartitionNV(data[0].u64.x);
+    ballot = subgroupPartitionNV(data[1].u64.xy);
+    ballot = subgroupPartitionNV(data[2].u64.xyz);
+    ballot = subgroupPartitionNV(data[3].u64);
+
+    ballot = subgroupPartitionNV(data[0].f16.x);
+    ballot = subgroupPartitionNV(data[1].f16.xy);
+    ballot = subgroupPartitionNV(data[2].f16.xyz);
+    ballot = subgroupPartitionNV(data[3].f16);
+
+    data[invocation].i8.x   = subgroupPartitionedAddNV(data[0].i8.x, ballot);
+    data[invocation].i8.xy  = subgroupPartitionedAddNV(data[1].i8.xy, ballot);
+    data[invocation].i8.xyz = subgroupPartitionedAddNV(data[2].i8.xyz, ballot);
+    data[invocation].i8     = subgroupPartitionedAddNV(data[3].i8, ballot);
+
+    data[invocation].i8.x   = subgroupPartitionedMulNV(data[0].i8.x, ballot);
+    data[invocation].i8.xy  = subgroupPartitionedMulNV(data[1].i8.xy, ballot);
+    data[invocation].i8.xyz = subgroupPartitionedMulNV(data[2].i8.xyz, ballot);
+    data[invocation].i8     = subgroupPartitionedMulNV(data[3].i8, ballot);
+
+    data[invocation].i8.x   = subgroupPartitionedMinNV(data[0].i8.x, ballot);
+    data[invocation].i8.xy  = subgroupPartitionedMinNV(data[1].i8.xy, ballot);
+    data[invocation].i8.xyz = subgroupPartitionedMinNV(data[2].i8.xyz, ballot);
+    data[invocation].i8     = subgroupPartitionedMinNV(data[3].i8, ballot);
+
+    data[invocation].i8.x   = subgroupPartitionedMaxNV(data[0].i8.x, ballot);
+    data[invocation].i8.xy  = subgroupPartitionedMaxNV(data[1].i8.xy, ballot);
+    data[invocation].i8.xyz = subgroupPartitionedMaxNV(data[2].i8.xyz, ballot);
+    data[invocation].i8     = subgroupPartitionedMaxNV(data[3].i8, ballot);
+
+    data[invocation].i8.x   = subgroupPartitionedAndNV(data[0].i8.x, ballot);
+    data[invocation].i8.xy  = subgroupPartitionedAndNV(data[1].i8.xy, ballot);
+    data[invocation].i8.xyz = subgroupPartitionedAndNV(data[2].i8.xyz, ballot);
+    data[invocation].i8     = subgroupPartitionedAndNV(data[3].i8, ballot);
+
+    data[invocation].i8.x   = subgroupPartitionedOrNV(data[0].i8.x, ballot);
+    data[invocation].i8.xy  = subgroupPartitionedOrNV(data[1].i8.xy, ballot);
+    data[invocation].i8.xyz = subgroupPartitionedOrNV(data[2].i8.xyz, ballot);
+    data[invocation].i8     = subgroupPartitionedOrNV(data[3].i8, ballot);
+
+    data[invocation].i8.x   = subgroupPartitionedXorNV(data[0].i8.x, ballot);
+    data[invocation].i8.xy  = subgroupPartitionedXorNV(data[1].i8.xy, ballot);
+    data[invocation].i8.xyz = subgroupPartitionedXorNV(data[2].i8.xyz, ballot);
+    data[invocation].i8     = subgroupPartitionedXorNV(data[3].i8, ballot);
+
+    data[invocation].u8.x   = subgroupPartitionedAddNV(data[0].u8.x, ballot);
+    data[invocation].u8.xy  = subgroupPartitionedAddNV(data[1].u8.xy, ballot);
+    data[invocation].u8.xyz = subgroupPartitionedAddNV(data[2].u8.xyz, ballot);
+    data[invocation].u8     = subgroupPartitionedAddNV(data[3].u8, ballot);
+
+    data[invocation].u8.x   = subgroupPartitionedMulNV(data[0].u8.x, ballot);
+    data[invocation].u8.xy  = subgroupPartitionedMulNV(data[1].u8.xy, ballot);
+    data[invocation].u8.xyz = subgroupPartitionedMulNV(data[2].u8.xyz, ballot);
+    data[invocation].u8     = subgroupPartitionedMulNV(data[3].u8, ballot);
+
+    data[invocation].u8.x   = subgroupPartitionedMinNV(data[0].u8.x, ballot);
+    data[invocation].u8.xy  = subgroupPartitionedMinNV(data[1].u8.xy, ballot);
+    data[invocation].u8.xyz = subgroupPartitionedMinNV(data[2].u8.xyz, ballot);
+    data[invocation].u8     = subgroupPartitionedMinNV(data[3].u8, ballot);
+
+    data[invocation].u8.x   = subgroupPartitionedMaxNV(data[0].u8.x, ballot);
+    data[invocation].u8.xy  = subgroupPartitionedMaxNV(data[1].u8.xy, ballot);
+    data[invocation].u8.xyz = subgroupPartitionedMaxNV(data[2].u8.xyz, ballot);
+    data[invocation].u8     = subgroupPartitionedMaxNV(data[3].u8, ballot);
+
+    data[invocation].u8.x   = subgroupPartitionedAndNV(data[0].u8.x, ballot);
+    data[invocation].u8.xy  = subgroupPartitionedAndNV(data[1].u8.xy, ballot);
+    data[invocation].u8.xyz = subgroupPartitionedAndNV(data[2].u8.xyz, ballot);
+    data[invocation].u8     = subgroupPartitionedAndNV(data[3].u8, ballot);
+
+    data[invocation].u8.x   = subgroupPartitionedOrNV(data[0].u8.x, ballot);
+    data[invocation].u8.xy  = subgroupPartitionedOrNV(data[1].u8.xy, ballot);
+    data[invocation].u8.xyz = subgroupPartitionedOrNV(data[2].u8.xyz, ballot);
+    data[invocation].u8     = subgroupPartitionedOrNV(data[3].u8, ballot);
+
+    data[invocation].u8.x   = subgroupPartitionedXorNV(data[0].u8.x, ballot);
+    data[invocation].u8.xy  = subgroupPartitionedXorNV(data[1].u8.xy, ballot);
+    data[invocation].u8.xyz = subgroupPartitionedXorNV(data[2].u8.xyz, ballot);
+    data[invocation].u8     = subgroupPartitionedXorNV(data[3].u8, ballot);
+
+    data[invocation].i16.x   = subgroupPartitionedAddNV(data[0].i16.x, ballot);
+    data[invocation].i16.xy  = subgroupPartitionedAddNV(data[1].i16.xy, ballot);
+    data[invocation].i16.xyz = subgroupPartitionedAddNV(data[2].i16.xyz, ballot);
+    data[invocation].i16     = subgroupPartitionedAddNV(data[3].i16, ballot);
+
+    data[invocation].i16.x   = subgroupPartitionedMulNV(data[0].i16.x, ballot);
+    data[invocation].i16.xy  = subgroupPartitionedMulNV(data[1].i16.xy, ballot);
+    data[invocation].i16.xyz = subgroupPartitionedMulNV(data[2].i16.xyz, ballot);
+    data[invocation].i16     = subgroupPartitionedMulNV(data[3].i16, ballot);
+
+    data[invocation].i16.x   = subgroupPartitionedMinNV(data[0].i16.x, ballot);
+    data[invocation].i16.xy  = subgroupPartitionedMinNV(data[1].i16.xy, ballot);
+    data[invocation].i16.xyz = subgroupPartitionedMinNV(data[2].i16.xyz, ballot);
+    data[invocation].i16     = subgroupPartitionedMinNV(data[3].i16, ballot);
+
+    data[invocation].i16.x   = subgroupPartitionedMaxNV(data[0].i16.x, ballot);
+    data[invocation].i16.xy  = subgroupPartitionedMaxNV(data[1].i16.xy, ballot);
+    data[invocation].i16.xyz = subgroupPartitionedMaxNV(data[2].i16.xyz, ballot);
+    data[invocation].i16     = subgroupPartitionedMaxNV(data[3].i16, ballot);
+
+    data[invocation].i16.x   = subgroupPartitionedAndNV(data[0].i16.x, ballot);
+    data[invocation].i16.xy  = subgroupPartitionedAndNV(data[1].i16.xy, ballot);
+    data[invocation].i16.xyz = subgroupPartitionedAndNV(data[2].i16.xyz, ballot);
+    data[invocation].i16     = subgroupPartitionedAndNV(data[3].i16, ballot);
+
+    data[invocation].i16.x   = subgroupPartitionedOrNV(data[0].i16.x, ballot);
+    data[invocation].i16.xy  = subgroupPartitionedOrNV(data[1].i16.xy, ballot);
+    data[invocation].i16.xyz = subgroupPartitionedOrNV(data[2].i16.xyz, ballot);
+    data[invocation].i16     = subgroupPartitionedOrNV(data[3].i16, ballot);
+
+    data[invocation].i16.x   = subgroupPartitionedXorNV(data[0].i16.x, ballot);
+    data[invocation].i16.xy  = subgroupPartitionedXorNV(data[1].i16.xy, ballot);
+    data[invocation].i16.xyz = subgroupPartitionedXorNV(data[2].i16.xyz, ballot);
+    data[invocation].i16     = subgroupPartitionedXorNV(data[3].i16, ballot);
+
+    data[invocation].u16.x   = subgroupPartitionedAddNV(data[0].u16.x, ballot);
+    data[invocation].u16.xy  = subgroupPartitionedAddNV(data[1].u16.xy, ballot);
+    data[invocation].u16.xyz = subgroupPartitionedAddNV(data[2].u16.xyz, ballot);
+    data[invocation].u16     = subgroupPartitionedAddNV(data[3].u16, ballot);
+
+    data[invocation].u16.x   = subgroupPartitionedMulNV(data[0].u16.x, ballot);
+    data[invocation].u16.xy  = subgroupPartitionedMulNV(data[1].u16.xy, ballot);
+    data[invocation].u16.xyz = subgroupPartitionedMulNV(data[2].u16.xyz, ballot);
+    data[invocation].u16     = subgroupPartitionedMulNV(data[3].u16, ballot);
+
+    data[invocation].u16.x   = subgroupPartitionedMinNV(data[0].u16.x, ballot);
+    data[invocation].u16.xy  = subgroupPartitionedMinNV(data[1].u16.xy, ballot);
+    data[invocation].u16.xyz = subgroupPartitionedMinNV(data[2].u16.xyz, ballot);
+    data[invocation].u16     = subgroupPartitionedMinNV(data[3].u16, ballot);
+
+    data[invocation].u16.x   = subgroupPartitionedMaxNV(data[0].u16.x, ballot);
+    data[invocation].u16.xy  = subgroupPartitionedMaxNV(data[1].u16.xy, ballot);
+    data[invocation].u16.xyz = subgroupPartitionedMaxNV(data[2].u16.xyz, ballot);
+    data[invocation].u16     = subgroupPartitionedMaxNV(data[3].u16, ballot);
+
+    data[invocation].u16.x   = subgroupPartitionedAndNV(data[0].u16.x, ballot);
+    data[invocation].u16.xy  = subgroupPartitionedAndNV(data[1].u16.xy, ballot);
+    data[invocation].u16.xyz = subgroupPartitionedAndNV(data[2].u16.xyz, ballot);
+    data[invocation].u16     = subgroupPartitionedAndNV(data[3].u16, ballot);
+
+    data[invocation].u16.x   = subgroupPartitionedOrNV(data[0].u16.x, ballot);
+    data[invocation].u16.xy  = subgroupPartitionedOrNV(data[1].u16.xy, ballot);
+    data[invocation].u16.xyz = subgroupPartitionedOrNV(data[2].u16.xyz, ballot);
+    data[invocation].u16     = subgroupPartitionedOrNV(data[3].u16, ballot);
+
+    data[invocation].u16.x   = subgroupPartitionedXorNV(data[0].u16.x, ballot);
+    data[invocation].u16.xy  = subgroupPartitionedXorNV(data[1].u16.xy, ballot);
+    data[invocation].u16.xyz = subgroupPartitionedXorNV(data[2].u16.xyz, ballot);
+    data[invocation].u16     = subgroupPartitionedXorNV(data[3].u16, ballot);
+
+    data[invocation].i64.x   = subgroupPartitionedAddNV(data[0].i64.x, ballot);
+    data[invocation].i64.xy  = subgroupPartitionedAddNV(data[1].i64.xy, ballot);
+    data[invocation].i64.xyz = subgroupPartitionedAddNV(data[2].i64.xyz, ballot);
+    data[invocation].i64     = subgroupPartitionedAddNV(data[3].i64, ballot);
+
+    data[invocation].i64.x   = subgroupPartitionedMulNV(data[0].i64.x, ballot);
+    data[invocation].i64.xy  = subgroupPartitionedMulNV(data[1].i64.xy, ballot);
+    data[invocation].i64.xyz = subgroupPartitionedMulNV(data[2].i64.xyz, ballot);
+    data[invocation].i64     = subgroupPartitionedMulNV(data[3].i64, ballot);
+
+    data[invocation].i64.x   = subgroupPartitionedMinNV(data[0].i64.x, ballot);
+    data[invocation].i64.xy  = subgroupPartitionedMinNV(data[1].i64.xy, ballot);
+    data[invocation].i64.xyz = subgroupPartitionedMinNV(data[2].i64.xyz, ballot);
+    data[invocation].i64     = subgroupPartitionedMinNV(data[3].i64, ballot);
+
+    data[invocation].i64.x   = subgroupPartitionedMaxNV(data[0].i64.x, ballot);
+    data[invocation].i64.xy  = subgroupPartitionedMaxNV(data[1].i64.xy, ballot);
+    data[invocation].i64.xyz = subgroupPartitionedMaxNV(data[2].i64.xyz, ballot);
+    data[invocation].i64     = subgroupPartitionedMaxNV(data[3].i64, ballot);
+
+    data[invocation].i64.x   = subgroupPartitionedAndNV(data[0].i64.x, ballot);
+    data[invocation].i64.xy  = subgroupPartitionedAndNV(data[1].i64.xy, ballot);
+    data[invocation].i64.xyz = subgroupPartitionedAndNV(data[2].i64.xyz, ballot);
+    data[invocation].i64     = subgroupPartitionedAndNV(data[3].i64, ballot);
+
+    data[invocation].i64.x   = subgroupPartitionedOrNV(data[0].i64.x, ballot);
+    data[invocation].i64.xy  = subgroupPartitionedOrNV(data[1].i64.xy, ballot);
+    data[invocation].i64.xyz = subgroupPartitionedOrNV(data[2].i64.xyz, ballot);
+    data[invocation].i64     = subgroupPartitionedOrNV(data[3].i64, ballot);
+
+    data[invocation].i64.x   = subgroupPartitionedXorNV(data[0].i64.x, ballot);
+    data[invocation].i64.xy  = subgroupPartitionedXorNV(data[1].i64.xy, ballot);
+    data[invocation].i64.xyz = subgroupPartitionedXorNV(data[2].i64.xyz, ballot);
+    data[invocation].i64     = subgroupPartitionedXorNV(data[3].i64, ballot);
+
+    data[invocation].u64.x   = subgroupPartitionedAddNV(data[0].u64.x, ballot);
+    data[invocation].u64.xy  = subgroupPartitionedAddNV(data[1].u64.xy, ballot);
+    data[invocation].u64.xyz = subgroupPartitionedAddNV(data[2].u64.xyz, ballot);
+    data[invocation].u64     = subgroupPartitionedAddNV(data[3].u64, ballot);
+
+    data[invocation].u64.x   = subgroupPartitionedMulNV(data[0].u64.x, ballot);
+    data[invocation].u64.xy  = subgroupPartitionedMulNV(data[1].u64.xy, ballot);
+    data[invocation].u64.xyz = subgroupPartitionedMulNV(data[2].u64.xyz, ballot);
+    data[invocation].u64     = subgroupPartitionedMulNV(data[3].u64, ballot);
+
+    data[invocation].u64.x   = subgroupPartitionedMinNV(data[0].u64.x, ballot);
+    data[invocation].u64.xy  = subgroupPartitionedMinNV(data[1].u64.xy, ballot);
+    data[invocation].u64.xyz = subgroupPartitionedMinNV(data[2].u64.xyz, ballot);
+    data[invocation].u64     = subgroupPartitionedMinNV(data[3].u64, ballot);
+
+    data[invocation].u64.x   = subgroupPartitionedMaxNV(data[0].u64.x, ballot);
+    data[invocation].u64.xy  = subgroupPartitionedMaxNV(data[1].u64.xy, ballot);
+    data[invocation].u64.xyz = subgroupPartitionedMaxNV(data[2].u64.xyz, ballot);
+    data[invocation].u64     = subgroupPartitionedMaxNV(data[3].u64, ballot);
+
+    data[invocation].u64.x   = subgroupPartitionedAndNV(data[0].u64.x, ballot);
+    data[invocation].u64.xy  = subgroupPartitionedAndNV(data[1].u64.xy, ballot);
+    data[invocation].u64.xyz = subgroupPartitionedAndNV(data[2].u64.xyz, ballot);
+    data[invocation].u64     = subgroupPartitionedAndNV(data[3].u64, ballot);
+
+    data[invocation].u64.x   = subgroupPartitionedOrNV(data[0].u64.x, ballot);
+    data[invocation].u64.xy  = subgroupPartitionedOrNV(data[1].u64.xy, ballot);
+    data[invocation].u64.xyz = subgroupPartitionedOrNV(data[2].u64.xyz, ballot);
+    data[invocation].u64     = subgroupPartitionedOrNV(data[3].u64, ballot);
+
+    data[invocation].u64.x   = subgroupPartitionedXorNV(data[0].u64.x, ballot);
+    data[invocation].u64.xy  = subgroupPartitionedXorNV(data[1].u64.xy, ballot);
+    data[invocation].u64.xyz = subgroupPartitionedXorNV(data[2].u64.xyz, ballot);
+    data[invocation].u64     = subgroupPartitionedXorNV(data[3].u64, ballot);
+
+    data[invocation].f16.x   = subgroupPartitionedAddNV(data[0].f16.x, ballot);
+    data[invocation].f16.xy  = subgroupPartitionedAddNV(data[1].f16.xy, ballot);
+    data[invocation].f16.xyz = subgroupPartitionedAddNV(data[2].f16.xyz, ballot);
+    data[invocation].f16     = subgroupPartitionedAddNV(data[3].f16, ballot);
+
+    data[invocation].f16.x   = subgroupPartitionedMulNV(data[0].f16.x, ballot);
+    data[invocation].f16.xy  = subgroupPartitionedMulNV(data[1].f16.xy, ballot);
+    data[invocation].f16.xyz = subgroupPartitionedMulNV(data[2].f16.xyz, ballot);
+    data[invocation].f16     = subgroupPartitionedMulNV(data[3].f16, ballot);
+
+    data[invocation].f16.x   = subgroupPartitionedMinNV(data[0].f16.x, ballot);
+    data[invocation].f16.xy  = subgroupPartitionedMinNV(data[1].f16.xy, ballot);
+    data[invocation].f16.xyz = subgroupPartitionedMinNV(data[2].f16.xyz, ballot);
+    data[invocation].f16     = subgroupPartitionedMinNV(data[3].f16, ballot);
+
+    data[invocation].f16.x   = subgroupPartitionedMaxNV(data[0].f16.x, ballot);
+    data[invocation].f16.xy  = subgroupPartitionedMaxNV(data[1].f16.xy, ballot);
+    data[invocation].f16.xyz = subgroupPartitionedMaxNV(data[2].f16.xyz, ballot);
+    data[invocation].f16     = subgroupPartitionedMaxNV(data[3].f16, ballot);
+}
diff --git a/Test/spv.subgroupExtendedTypesPartitionedNeg.comp b/Test/spv.subgroupExtendedTypesPartitionedNeg.comp
new file mode 100644
index 0000000..ad94e35
--- /dev/null
+++ b/Test/spv.subgroupExtendedTypesPartitionedNeg.comp
@@ -0,0 +1,291 @@
+#version 450
+
+#extension GL_NV_shader_subgroup_partitioned: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int8: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int16: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int64: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_float16: enable
+
+layout (local_size_x = 8) in;
+
+layout(binding = 0) buffer Buffers
+{
+    i8vec4 i8;
+    u8vec4 u8;
+    i16vec4 i16;
+    u16vec4 u16;
+    i64vec4 i64;
+    u64vec4 u64;
+    f16vec4 f16;
+} data[4];
+
+void main()
+{
+    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
+
+    uvec4 ballot;
+    ballot = subgroupPartitionNV(data[0].i8.x);
+    ballot = subgroupPartitionNV(data[1].i8.xy);
+    ballot = subgroupPartitionNV(data[2].i8.xyz);
+    ballot = subgroupPartitionNV(data[3].i8);
+
+    ballot = subgroupPartitionNV(data[0].u8.x);
+    ballot = subgroupPartitionNV(data[1].u8.xy);
+    ballot = subgroupPartitionNV(data[2].u8.xyz);
+    ballot = subgroupPartitionNV(data[3].u8);
+
+    ballot = subgroupPartitionNV(data[0].i16.x);
+    ballot = subgroupPartitionNV(data[1].i16.xy);
+    ballot = subgroupPartitionNV(data[2].i16.xyz);
+    ballot = subgroupPartitionNV(data[3].i16);
+
+    ballot = subgroupPartitionNV(data[0].u16.x);
+    ballot = subgroupPartitionNV(data[1].u16.xy);
+    ballot = subgroupPartitionNV(data[2].u16.xyz);
+    ballot = subgroupPartitionNV(data[3].u16);
+
+    ballot = subgroupPartitionNV(data[0].i64.x);
+    ballot = subgroupPartitionNV(data[1].i64.xy);
+    ballot = subgroupPartitionNV(data[2].i64.xyz);
+    ballot = subgroupPartitionNV(data[3].i64);
+
+    ballot = subgroupPartitionNV(data[0].u64.x);
+    ballot = subgroupPartitionNV(data[1].u64.xy);
+    ballot = subgroupPartitionNV(data[2].u64.xyz);
+    ballot = subgroupPartitionNV(data[3].u64);
+
+    ballot = subgroupPartitionNV(data[0].f16.x);
+    ballot = subgroupPartitionNV(data[1].f16.xy);
+    ballot = subgroupPartitionNV(data[2].f16.xyz);
+    ballot = subgroupPartitionNV(data[3].f16);
+
+    data[invocation].i8.x   = subgroupPartitionedAddNV(data[0].i8.x, ballot);
+    data[invocation].i8.xy  = subgroupPartitionedAddNV(data[1].i8.xy, ballot);
+    data[invocation].i8.xyz = subgroupPartitionedAddNV(data[2].i8.xyz, ballot);
+    data[invocation].i8     = subgroupPartitionedAddNV(data[3].i8, ballot);
+
+    data[invocation].i8.x   = subgroupPartitionedMulNV(data[0].i8.x, ballot);
+    data[invocation].i8.xy  = subgroupPartitionedMulNV(data[1].i8.xy, ballot);
+    data[invocation].i8.xyz = subgroupPartitionedMulNV(data[2].i8.xyz, ballot);
+    data[invocation].i8     = subgroupPartitionedMulNV(data[3].i8, ballot);
+
+    data[invocation].i8.x   = subgroupPartitionedMinNV(data[0].i8.x, ballot);
+    data[invocation].i8.xy  = subgroupPartitionedMinNV(data[1].i8.xy, ballot);
+    data[invocation].i8.xyz = subgroupPartitionedMinNV(data[2].i8.xyz, ballot);
+    data[invocation].i8     = subgroupPartitionedMinNV(data[3].i8, ballot);
+
+    data[invocation].i8.x   = subgroupPartitionedMaxNV(data[0].i8.x, ballot);
+    data[invocation].i8.xy  = subgroupPartitionedMaxNV(data[1].i8.xy, ballot);
+    data[invocation].i8.xyz = subgroupPartitionedMaxNV(data[2].i8.xyz, ballot);
+    data[invocation].i8     = subgroupPartitionedMaxNV(data[3].i8, ballot);
+
+    data[invocation].i8.x   = subgroupPartitionedAndNV(data[0].i8.x, ballot);
+    data[invocation].i8.xy  = subgroupPartitionedAndNV(data[1].i8.xy, ballot);
+    data[invocation].i8.xyz = subgroupPartitionedAndNV(data[2].i8.xyz, ballot);
+    data[invocation].i8     = subgroupPartitionedAndNV(data[3].i8, ballot);
+
+    data[invocation].i8.x   = subgroupPartitionedOrNV(data[0].i8.x, ballot);
+    data[invocation].i8.xy  = subgroupPartitionedOrNV(data[1].i8.xy, ballot);
+    data[invocation].i8.xyz = subgroupPartitionedOrNV(data[2].i8.xyz, ballot);
+    data[invocation].i8     = subgroupPartitionedOrNV(data[3].i8, ballot);
+
+    data[invocation].i8.x   = subgroupPartitionedXorNV(data[0].i8.x, ballot);
+    data[invocation].i8.xy  = subgroupPartitionedXorNV(data[1].i8.xy, ballot);
+    data[invocation].i8.xyz = subgroupPartitionedXorNV(data[2].i8.xyz, ballot);
+    data[invocation].i8     = subgroupPartitionedXorNV(data[3].i8, ballot);
+
+    data[invocation].u8.x   = subgroupPartitionedAddNV(data[0].u8.x, ballot);
+    data[invocation].u8.xy  = subgroupPartitionedAddNV(data[1].u8.xy, ballot);
+    data[invocation].u8.xyz = subgroupPartitionedAddNV(data[2].u8.xyz, ballot);
+    data[invocation].u8     = subgroupPartitionedAddNV(data[3].u8, ballot);
+
+    data[invocation].u8.x   = subgroupPartitionedMulNV(data[0].u8.x, ballot);
+    data[invocation].u8.xy  = subgroupPartitionedMulNV(data[1].u8.xy, ballot);
+    data[invocation].u8.xyz = subgroupPartitionedMulNV(data[2].u8.xyz, ballot);
+    data[invocation].u8     = subgroupPartitionedMulNV(data[3].u8, ballot);
+
+    data[invocation].u8.x   = subgroupPartitionedMinNV(data[0].u8.x, ballot);
+    data[invocation].u8.xy  = subgroupPartitionedMinNV(data[1].u8.xy, ballot);
+    data[invocation].u8.xyz = subgroupPartitionedMinNV(data[2].u8.xyz, ballot);
+    data[invocation].u8     = subgroupPartitionedMinNV(data[3].u8, ballot);
+
+    data[invocation].u8.x   = subgroupPartitionedMaxNV(data[0].u8.x, ballot);
+    data[invocation].u8.xy  = subgroupPartitionedMaxNV(data[1].u8.xy, ballot);
+    data[invocation].u8.xyz = subgroupPartitionedMaxNV(data[2].u8.xyz, ballot);
+    data[invocation].u8     = subgroupPartitionedMaxNV(data[3].u8, ballot);
+
+    data[invocation].u8.x   = subgroupPartitionedAndNV(data[0].u8.x, ballot);
+    data[invocation].u8.xy  = subgroupPartitionedAndNV(data[1].u8.xy, ballot);
+    data[invocation].u8.xyz = subgroupPartitionedAndNV(data[2].u8.xyz, ballot);
+    data[invocation].u8     = subgroupPartitionedAndNV(data[3].u8, ballot);
+
+    data[invocation].u8.x   = subgroupPartitionedOrNV(data[0].u8.x, ballot);
+    data[invocation].u8.xy  = subgroupPartitionedOrNV(data[1].u8.xy, ballot);
+    data[invocation].u8.xyz = subgroupPartitionedOrNV(data[2].u8.xyz, ballot);
+    data[invocation].u8     = subgroupPartitionedOrNV(data[3].u8, ballot);
+
+    data[invocation].u8.x   = subgroupPartitionedXorNV(data[0].u8.x, ballot);
+    data[invocation].u8.xy  = subgroupPartitionedXorNV(data[1].u8.xy, ballot);
+    data[invocation].u8.xyz = subgroupPartitionedXorNV(data[2].u8.xyz, ballot);
+    data[invocation].u8     = subgroupPartitionedXorNV(data[3].u8, ballot);
+
+    data[invocation].i16.x   = subgroupPartitionedAddNV(data[0].i16.x, ballot);
+    data[invocation].i16.xy  = subgroupPartitionedAddNV(data[1].i16.xy, ballot);
+    data[invocation].i16.xyz = subgroupPartitionedAddNV(data[2].i16.xyz, ballot);
+    data[invocation].i16     = subgroupPartitionedAddNV(data[3].i16, ballot);
+
+    data[invocation].i16.x   = subgroupPartitionedMulNV(data[0].i16.x, ballot);
+    data[invocation].i16.xy  = subgroupPartitionedMulNV(data[1].i16.xy, ballot);
+    data[invocation].i16.xyz = subgroupPartitionedMulNV(data[2].i16.xyz, ballot);
+    data[invocation].i16     = subgroupPartitionedMulNV(data[3].i16, ballot);
+
+    data[invocation].i16.x   = subgroupPartitionedMinNV(data[0].i16.x, ballot);
+    data[invocation].i16.xy  = subgroupPartitionedMinNV(data[1].i16.xy, ballot);
+    data[invocation].i16.xyz = subgroupPartitionedMinNV(data[2].i16.xyz, ballot);
+    data[invocation].i16     = subgroupPartitionedMinNV(data[3].i16, ballot);
+
+    data[invocation].i16.x   = subgroupPartitionedMaxNV(data[0].i16.x, ballot);
+    data[invocation].i16.xy  = subgroupPartitionedMaxNV(data[1].i16.xy, ballot);
+    data[invocation].i16.xyz = subgroupPartitionedMaxNV(data[2].i16.xyz, ballot);
+    data[invocation].i16     = subgroupPartitionedMaxNV(data[3].i16, ballot);
+
+    data[invocation].i16.x   = subgroupPartitionedAndNV(data[0].i16.x, ballot);
+    data[invocation].i16.xy  = subgroupPartitionedAndNV(data[1].i16.xy, ballot);
+    data[invocation].i16.xyz = subgroupPartitionedAndNV(data[2].i16.xyz, ballot);
+    data[invocation].i16     = subgroupPartitionedAndNV(data[3].i16, ballot);
+
+    data[invocation].i16.x   = subgroupPartitionedOrNV(data[0].i16.x, ballot);
+    data[invocation].i16.xy  = subgroupPartitionedOrNV(data[1].i16.xy, ballot);
+    data[invocation].i16.xyz = subgroupPartitionedOrNV(data[2].i16.xyz, ballot);
+    data[invocation].i16     = subgroupPartitionedOrNV(data[3].i16, ballot);
+
+    data[invocation].i16.x   = subgroupPartitionedXorNV(data[0].i16.x, ballot);
+    data[invocation].i16.xy  = subgroupPartitionedXorNV(data[1].i16.xy, ballot);
+    data[invocation].i16.xyz = subgroupPartitionedXorNV(data[2].i16.xyz, ballot);
+    data[invocation].i16     = subgroupPartitionedXorNV(data[3].i16, ballot);
+
+    data[invocation].u16.x   = subgroupPartitionedAddNV(data[0].u16.x, ballot);
+    data[invocation].u16.xy  = subgroupPartitionedAddNV(data[1].u16.xy, ballot);
+    data[invocation].u16.xyz = subgroupPartitionedAddNV(data[2].u16.xyz, ballot);
+    data[invocation].u16     = subgroupPartitionedAddNV(data[3].u16, ballot);
+
+    data[invocation].u16.x   = subgroupPartitionedMulNV(data[0].u16.x, ballot);
+    data[invocation].u16.xy  = subgroupPartitionedMulNV(data[1].u16.xy, ballot);
+    data[invocation].u16.xyz = subgroupPartitionedMulNV(data[2].u16.xyz, ballot);
+    data[invocation].u16     = subgroupPartitionedMulNV(data[3].u16, ballot);
+
+    data[invocation].u16.x   = subgroupPartitionedMinNV(data[0].u16.x, ballot);
+    data[invocation].u16.xy  = subgroupPartitionedMinNV(data[1].u16.xy, ballot);
+    data[invocation].u16.xyz = subgroupPartitionedMinNV(data[2].u16.xyz, ballot);
+    data[invocation].u16     = subgroupPartitionedMinNV(data[3].u16, ballot);
+
+    data[invocation].u16.x   = subgroupPartitionedMaxNV(data[0].u16.x, ballot);
+    data[invocation].u16.xy  = subgroupPartitionedMaxNV(data[1].u16.xy, ballot);
+    data[invocation].u16.xyz = subgroupPartitionedMaxNV(data[2].u16.xyz, ballot);
+    data[invocation].u16     = subgroupPartitionedMaxNV(data[3].u16, ballot);
+
+    data[invocation].u16.x   = subgroupPartitionedAndNV(data[0].u16.x, ballot);
+    data[invocation].u16.xy  = subgroupPartitionedAndNV(data[1].u16.xy, ballot);
+    data[invocation].u16.xyz = subgroupPartitionedAndNV(data[2].u16.xyz, ballot);
+    data[invocation].u16     = subgroupPartitionedAndNV(data[3].u16, ballot);
+
+    data[invocation].u16.x   = subgroupPartitionedOrNV(data[0].u16.x, ballot);
+    data[invocation].u16.xy  = subgroupPartitionedOrNV(data[1].u16.xy, ballot);
+    data[invocation].u16.xyz = subgroupPartitionedOrNV(data[2].u16.xyz, ballot);
+    data[invocation].u16     = subgroupPartitionedOrNV(data[3].u16, ballot);
+
+    data[invocation].u16.x   = subgroupPartitionedXorNV(data[0].u16.x, ballot);
+    data[invocation].u16.xy  = subgroupPartitionedXorNV(data[1].u16.xy, ballot);
+    data[invocation].u16.xyz = subgroupPartitionedXorNV(data[2].u16.xyz, ballot);
+    data[invocation].u16     = subgroupPartitionedXorNV(data[3].u16, ballot);
+
+    data[invocation].i64.x   = subgroupPartitionedAddNV(data[0].i64.x, ballot);
+    data[invocation].i64.xy  = subgroupPartitionedAddNV(data[1].i64.xy, ballot);
+    data[invocation].i64.xyz = subgroupPartitionedAddNV(data[2].i64.xyz, ballot);
+    data[invocation].i64     = subgroupPartitionedAddNV(data[3].i64, ballot);
+
+    data[invocation].i64.x   = subgroupPartitionedMulNV(data[0].i64.x, ballot);
+    data[invocation].i64.xy  = subgroupPartitionedMulNV(data[1].i64.xy, ballot);
+    data[invocation].i64.xyz = subgroupPartitionedMulNV(data[2].i64.xyz, ballot);
+    data[invocation].i64     = subgroupPartitionedMulNV(data[3].i64, ballot);
+
+    data[invocation].i64.x   = subgroupPartitionedMinNV(data[0].i64.x, ballot);
+    data[invocation].i64.xy  = subgroupPartitionedMinNV(data[1].i64.xy, ballot);
+    data[invocation].i64.xyz = subgroupPartitionedMinNV(data[2].i64.xyz, ballot);
+    data[invocation].i64     = subgroupPartitionedMinNV(data[3].i64, ballot);
+
+    data[invocation].i64.x   = subgroupPartitionedMaxNV(data[0].i64.x, ballot);
+    data[invocation].i64.xy  = subgroupPartitionedMaxNV(data[1].i64.xy, ballot);
+    data[invocation].i64.xyz = subgroupPartitionedMaxNV(data[2].i64.xyz, ballot);
+    data[invocation].i64     = subgroupPartitionedMaxNV(data[3].i64, ballot);
+
+    data[invocation].i64.x   = subgroupPartitionedAndNV(data[0].i64.x, ballot);
+    data[invocation].i64.xy  = subgroupPartitionedAndNV(data[1].i64.xy, ballot);
+    data[invocation].i64.xyz = subgroupPartitionedAndNV(data[2].i64.xyz, ballot);
+    data[invocation].i64     = subgroupPartitionedAndNV(data[3].i64, ballot);
+
+    data[invocation].i64.x   = subgroupPartitionedOrNV(data[0].i64.x, ballot);
+    data[invocation].i64.xy  = subgroupPartitionedOrNV(data[1].i64.xy, ballot);
+    data[invocation].i64.xyz = subgroupPartitionedOrNV(data[2].i64.xyz, ballot);
+    data[invocation].i64     = subgroupPartitionedOrNV(data[3].i64, ballot);
+
+    data[invocation].i64.x   = subgroupPartitionedXorNV(data[0].i64.x, ballot);
+    data[invocation].i64.xy  = subgroupPartitionedXorNV(data[1].i64.xy, ballot);
+    data[invocation].i64.xyz = subgroupPartitionedXorNV(data[2].i64.xyz, ballot);
+    data[invocation].i64     = subgroupPartitionedXorNV(data[3].i64, ballot);
+
+    data[invocation].u64.x   = subgroupPartitionedAddNV(data[0].u64.x, ballot);
+    data[invocation].u64.xy  = subgroupPartitionedAddNV(data[1].u64.xy, ballot);
+    data[invocation].u64.xyz = subgroupPartitionedAddNV(data[2].u64.xyz, ballot);
+    data[invocation].u64     = subgroupPartitionedAddNV(data[3].u64, ballot);
+
+    data[invocation].u64.x   = subgroupPartitionedMulNV(data[0].u64.x, ballot);
+    data[invocation].u64.xy  = subgroupPartitionedMulNV(data[1].u64.xy, ballot);
+    data[invocation].u64.xyz = subgroupPartitionedMulNV(data[2].u64.xyz, ballot);
+    data[invocation].u64     = subgroupPartitionedMulNV(data[3].u64, ballot);
+
+    data[invocation].u64.x   = subgroupPartitionedMinNV(data[0].u64.x, ballot);
+    data[invocation].u64.xy  = subgroupPartitionedMinNV(data[1].u64.xy, ballot);
+    data[invocation].u64.xyz = subgroupPartitionedMinNV(data[2].u64.xyz, ballot);
+    data[invocation].u64     = subgroupPartitionedMinNV(data[3].u64, ballot);
+
+    data[invocation].u64.x   = subgroupPartitionedMaxNV(data[0].u64.x, ballot);
+    data[invocation].u64.xy  = subgroupPartitionedMaxNV(data[1].u64.xy, ballot);
+    data[invocation].u64.xyz = subgroupPartitionedMaxNV(data[2].u64.xyz, ballot);
+    data[invocation].u64     = subgroupPartitionedMaxNV(data[3].u64, ballot);
+
+    data[invocation].u64.x   = subgroupPartitionedAndNV(data[0].u64.x, ballot);
+    data[invocation].u64.xy  = subgroupPartitionedAndNV(data[1].u64.xy, ballot);
+    data[invocation].u64.xyz = subgroupPartitionedAndNV(data[2].u64.xyz, ballot);
+    data[invocation].u64     = subgroupPartitionedAndNV(data[3].u64, ballot);
+
+    data[invocation].u64.x   = subgroupPartitionedOrNV(data[0].u64.x, ballot);
+    data[invocation].u64.xy  = subgroupPartitionedOrNV(data[1].u64.xy, ballot);
+    data[invocation].u64.xyz = subgroupPartitionedOrNV(data[2].u64.xyz, ballot);
+    data[invocation].u64     = subgroupPartitionedOrNV(data[3].u64, ballot);
+
+    data[invocation].u64.x   = subgroupPartitionedXorNV(data[0].u64.x, ballot);
+    data[invocation].u64.xy  = subgroupPartitionedXorNV(data[1].u64.xy, ballot);
+    data[invocation].u64.xyz = subgroupPartitionedXorNV(data[2].u64.xyz, ballot);
+    data[invocation].u64     = subgroupPartitionedXorNV(data[3].u64, ballot);
+
+    data[invocation].f16.x   = subgroupPartitionedAddNV(data[0].f16.x, ballot);
+    data[invocation].f16.xy  = subgroupPartitionedAddNV(data[1].f16.xy, ballot);
+    data[invocation].f16.xyz = subgroupPartitionedAddNV(data[2].f16.xyz, ballot);
+    data[invocation].f16     = subgroupPartitionedAddNV(data[3].f16, ballot);
+
+    data[invocation].f16.x   = subgroupPartitionedMulNV(data[0].f16.x, ballot);
+    data[invocation].f16.xy  = subgroupPartitionedMulNV(data[1].f16.xy, ballot);
+    data[invocation].f16.xyz = subgroupPartitionedMulNV(data[2].f16.xyz, ballot);
+    data[invocation].f16     = subgroupPartitionedMulNV(data[3].f16, ballot);
+
+    data[invocation].f16.x   = subgroupPartitionedMinNV(data[0].f16.x, ballot);
+    data[invocation].f16.xy  = subgroupPartitionedMinNV(data[1].f16.xy, ballot);
+    data[invocation].f16.xyz = subgroupPartitionedMinNV(data[2].f16.xyz, ballot);
+    data[invocation].f16     = subgroupPartitionedMinNV(data[3].f16, ballot);
+
+    data[invocation].f16.x   = subgroupPartitionedMaxNV(data[0].f16.x, ballot);
+    data[invocation].f16.xy  = subgroupPartitionedMaxNV(data[1].f16.xy, ballot);
+    data[invocation].f16.xyz = subgroupPartitionedMaxNV(data[2].f16.xyz, ballot);
+    data[invocation].f16     = subgroupPartitionedMaxNV(data[3].f16, ballot);
+}
diff --git a/Test/spv.subgroupExtendedTypesQuad.comp b/Test/spv.subgroupExtendedTypesQuad.comp
new file mode 100644
index 0000000..1ef2ecf
--- /dev/null
+++ b/Test/spv.subgroupExtendedTypesQuad.comp
@@ -0,0 +1,165 @@
+#version 450
+
+#extension GL_KHR_shader_subgroup_quad: enable
+#extension GL_EXT_shader_subgroup_extended_types_int8: enable
+#extension GL_EXT_shader_subgroup_extended_types_int16: enable
+#extension GL_EXT_shader_subgroup_extended_types_int64: enable
+#extension GL_EXT_shader_subgroup_extended_types_float16: enable
+
+layout (local_size_x = 8) in;
+
+layout(binding = 0) buffer Buffers
+{
+    i8vec4 i8;
+    u8vec4 u8;
+    i16vec4 i16;
+    u16vec4 u16;
+    i64vec4 i64;
+    u64vec4 u64;
+    f16vec4 f16;
+} data[4];
+
+void main()
+{
+    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
+
+    data[invocation].i8.x   = subgroupQuadBroadcast(data[0].i8.x, 1);
+    data[invocation].i8.xy  = subgroupQuadBroadcast(data[1].i8.xy, 1);
+    data[invocation].i8.xyz = subgroupQuadBroadcast(data[2].i8.xyz, 1);
+    data[invocation].i8     = subgroupQuadBroadcast(data[3].i8, 1);
+
+    data[invocation].i8.x   = subgroupQuadSwapHorizontal(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupQuadSwapHorizontal(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupQuadSwapHorizontal(data[2].i8.xyz);
+    data[invocation].i8     = subgroupQuadSwapHorizontal(data[3].i8);
+
+    data[invocation].i8.x   = subgroupQuadSwapVertical(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupQuadSwapVertical(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupQuadSwapVertical(data[2].i8.xyz);
+    data[invocation].i8     = subgroupQuadSwapVertical(data[3].i8);
+
+    data[invocation].i8.x   = subgroupQuadSwapDiagonal(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupQuadSwapDiagonal(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupQuadSwapDiagonal(data[2].i8.xyz);
+    data[invocation].i8     = subgroupQuadSwapDiagonal(data[3].i8);
+
+    data[invocation].u8.x   = subgroupQuadBroadcast(data[0].u8.x, 1);
+    data[invocation].u8.xy  = subgroupQuadBroadcast(data[1].u8.xy, 1);
+    data[invocation].u8.xyz = subgroupQuadBroadcast(data[2].u8.xyz, 1);
+    data[invocation].u8     = subgroupQuadBroadcast(data[3].u8, 1);
+
+    data[invocation].u8.x   = subgroupQuadSwapHorizontal(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupQuadSwapHorizontal(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupQuadSwapHorizontal(data[2].u8.xyz);
+    data[invocation].u8     = subgroupQuadSwapHorizontal(data[3].u8);
+
+    data[invocation].u8.x   = subgroupQuadSwapVertical(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupQuadSwapVertical(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupQuadSwapVertical(data[2].u8.xyz);
+    data[invocation].u8     = subgroupQuadSwapVertical(data[3].u8);
+
+    data[invocation].u8.x   = subgroupQuadSwapDiagonal(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupQuadSwapDiagonal(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupQuadSwapDiagonal(data[2].u8.xyz);
+    data[invocation].u8     = subgroupQuadSwapDiagonal(data[3].u8);
+
+    data[invocation].i16.x   = subgroupQuadBroadcast(data[0].i16.x, 1);
+    data[invocation].i16.xy  = subgroupQuadBroadcast(data[1].i16.xy, 1);
+    data[invocation].i16.xyz = subgroupQuadBroadcast(data[2].i16.xyz, 1);
+    data[invocation].i16     = subgroupQuadBroadcast(data[3].i16, 1);
+
+    data[invocation].i16.x   = subgroupQuadSwapHorizontal(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupQuadSwapHorizontal(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupQuadSwapHorizontal(data[2].i16.xyz);
+    data[invocation].i16     = subgroupQuadSwapHorizontal(data[3].i16);
+
+    data[invocation].i16.x   = subgroupQuadSwapVertical(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupQuadSwapVertical(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupQuadSwapVertical(data[2].i16.xyz);
+    data[invocation].i16     = subgroupQuadSwapVertical(data[3].i16);
+
+    data[invocation].i16.x   = subgroupQuadSwapDiagonal(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupQuadSwapDiagonal(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupQuadSwapDiagonal(data[2].i16.xyz);
+    data[invocation].i16     = subgroupQuadSwapDiagonal(data[3].i16);
+
+    data[invocation].u16.x   = subgroupQuadBroadcast(data[0].u16.x, 1);
+    data[invocation].u16.xy  = subgroupQuadBroadcast(data[1].u16.xy, 1);
+    data[invocation].u16.xyz = subgroupQuadBroadcast(data[2].u16.xyz, 1);
+    data[invocation].u16     = subgroupQuadBroadcast(data[3].u16, 1);
+
+    data[invocation].u16.x   = subgroupQuadSwapHorizontal(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupQuadSwapHorizontal(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupQuadSwapHorizontal(data[2].u16.xyz);
+    data[invocation].u16     = subgroupQuadSwapHorizontal(data[3].u16);
+
+    data[invocation].u16.x   = subgroupQuadSwapVertical(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupQuadSwapVertical(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupQuadSwapVertical(data[2].u16.xyz);
+    data[invocation].u16     = subgroupQuadSwapVertical(data[3].u16);
+
+    data[invocation].u16.x   = subgroupQuadSwapDiagonal(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupQuadSwapDiagonal(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupQuadSwapDiagonal(data[2].u16.xyz);
+    data[invocation].u16     = subgroupQuadSwapDiagonal(data[3].u16);
+
+    data[invocation].i64.x   = subgroupQuadBroadcast(data[0].i64.x, 1);
+    data[invocation].i64.xy  = subgroupQuadBroadcast(data[1].i64.xy, 1);
+    data[invocation].i64.xyz = subgroupQuadBroadcast(data[2].i64.xyz, 1);
+    data[invocation].i64     = subgroupQuadBroadcast(data[3].i64, 1);
+
+    data[invocation].i64.x   = subgroupQuadSwapHorizontal(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupQuadSwapHorizontal(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupQuadSwapHorizontal(data[2].i64.xyz);
+    data[invocation].i64     = subgroupQuadSwapHorizontal(data[3].i64);
+
+    data[invocation].i64.x   = subgroupQuadSwapVertical(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupQuadSwapVertical(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupQuadSwapVertical(data[2].i64.xyz);
+    data[invocation].i64     = subgroupQuadSwapVertical(data[3].i64);
+
+    data[invocation].i64.x   = subgroupQuadSwapDiagonal(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupQuadSwapDiagonal(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupQuadSwapDiagonal(data[2].i64.xyz);
+    data[invocation].i64     = subgroupQuadSwapDiagonal(data[3].i64);
+
+    data[invocation].u64.x   = subgroupQuadBroadcast(data[0].u64.x, 1);
+    data[invocation].u64.xy  = subgroupQuadBroadcast(data[1].u64.xy, 1);
+    data[invocation].u64.xyz = subgroupQuadBroadcast(data[2].u64.xyz, 1);
+    data[invocation].u64     = subgroupQuadBroadcast(data[3].u64, 1);
+
+    data[invocation].u64.x   = subgroupQuadSwapHorizontal(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupQuadSwapHorizontal(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupQuadSwapHorizontal(data[2].u64.xyz);
+    data[invocation].u64     = subgroupQuadSwapHorizontal(data[3].u64);
+
+    data[invocation].u64.x   = subgroupQuadSwapVertical(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupQuadSwapVertical(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupQuadSwapVertical(data[2].u64.xyz);
+    data[invocation].u64     = subgroupQuadSwapVertical(data[3].u64);
+
+    data[invocation].u64.x   = subgroupQuadSwapDiagonal(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupQuadSwapDiagonal(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupQuadSwapDiagonal(data[2].u64.xyz);
+    data[invocation].u64     = subgroupQuadSwapDiagonal(data[3].u64);
+
+    data[invocation].f16.x   = subgroupQuadBroadcast(data[0].f16.x, 1);
+    data[invocation].f16.xy  = subgroupQuadBroadcast(data[1].f16.xy, 1);
+    data[invocation].f16.xyz = subgroupQuadBroadcast(data[2].f16.xyz, 1);
+    data[invocation].f16     = subgroupQuadBroadcast(data[3].f16, 1);
+
+    data[invocation].f16.x   = subgroupQuadSwapHorizontal(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupQuadSwapHorizontal(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupQuadSwapHorizontal(data[2].f16.xyz);
+    data[invocation].f16     = subgroupQuadSwapHorizontal(data[3].f16);
+
+    data[invocation].f16.x   = subgroupQuadSwapVertical(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupQuadSwapVertical(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupQuadSwapVertical(data[2].f16.xyz);
+    data[invocation].f16     = subgroupQuadSwapVertical(data[3].f16);
+
+    data[invocation].f16.x   = subgroupQuadSwapDiagonal(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupQuadSwapDiagonal(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupQuadSwapDiagonal(data[2].f16.xyz);
+    data[invocation].f16     = subgroupQuadSwapDiagonal(data[3].f16);
+}
diff --git a/Test/spv.subgroupExtendedTypesQuadNeg.comp b/Test/spv.subgroupExtendedTypesQuadNeg.comp
new file mode 100644
index 0000000..0724776
--- /dev/null
+++ b/Test/spv.subgroupExtendedTypesQuadNeg.comp
@@ -0,0 +1,165 @@
+#version 450
+
+#extension GL_KHR_shader_subgroup_quad: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int8: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int16: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int64: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_float16: enable
+
+layout (local_size_x = 8) in;
+
+layout(binding = 0) buffer Buffers
+{
+    i8vec4 i8;
+    u8vec4 u8;
+    i16vec4 i16;
+    u16vec4 u16;
+    i64vec4 i64;
+    u64vec4 u64;
+    f16vec4 f16;
+} data[4];
+
+void main()
+{
+    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
+
+    data[invocation].i8.x   = subgroupQuadBroadcast(data[0].i8.x, 1);
+    data[invocation].i8.xy  = subgroupQuadBroadcast(data[1].i8.xy, 1);
+    data[invocation].i8.xyz = subgroupQuadBroadcast(data[2].i8.xyz, 1);
+    data[invocation].i8     = subgroupQuadBroadcast(data[3].i8, 1);
+
+    data[invocation].i8.x   = subgroupQuadSwapHorizontal(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupQuadSwapHorizontal(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupQuadSwapHorizontal(data[2].i8.xyz);
+    data[invocation].i8     = subgroupQuadSwapHorizontal(data[3].i8);
+
+    data[invocation].i8.x   = subgroupQuadSwapVertical(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupQuadSwapVertical(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupQuadSwapVertical(data[2].i8.xyz);
+    data[invocation].i8     = subgroupQuadSwapVertical(data[3].i8);
+
+    data[invocation].i8.x   = subgroupQuadSwapDiagonal(data[0].i8.x);
+    data[invocation].i8.xy  = subgroupQuadSwapDiagonal(data[1].i8.xy);
+    data[invocation].i8.xyz = subgroupQuadSwapDiagonal(data[2].i8.xyz);
+    data[invocation].i8     = subgroupQuadSwapDiagonal(data[3].i8);
+
+    data[invocation].u8.x   = subgroupQuadBroadcast(data[0].u8.x, 1);
+    data[invocation].u8.xy  = subgroupQuadBroadcast(data[1].u8.xy, 1);
+    data[invocation].u8.xyz = subgroupQuadBroadcast(data[2].u8.xyz, 1);
+    data[invocation].u8     = subgroupQuadBroadcast(data[3].u8, 1);
+
+    data[invocation].u8.x   = subgroupQuadSwapHorizontal(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupQuadSwapHorizontal(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupQuadSwapHorizontal(data[2].u8.xyz);
+    data[invocation].u8     = subgroupQuadSwapHorizontal(data[3].u8);
+
+    data[invocation].u8.x   = subgroupQuadSwapVertical(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupQuadSwapVertical(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupQuadSwapVertical(data[2].u8.xyz);
+    data[invocation].u8     = subgroupQuadSwapVertical(data[3].u8);
+
+    data[invocation].u8.x   = subgroupQuadSwapDiagonal(data[0].u8.x);
+    data[invocation].u8.xy  = subgroupQuadSwapDiagonal(data[1].u8.xy);
+    data[invocation].u8.xyz = subgroupQuadSwapDiagonal(data[2].u8.xyz);
+    data[invocation].u8     = subgroupQuadSwapDiagonal(data[3].u8);
+
+    data[invocation].i16.x   = subgroupQuadBroadcast(data[0].i16.x, 1);
+    data[invocation].i16.xy  = subgroupQuadBroadcast(data[1].i16.xy, 1);
+    data[invocation].i16.xyz = subgroupQuadBroadcast(data[2].i16.xyz, 1);
+    data[invocation].i16     = subgroupQuadBroadcast(data[3].i16, 1);
+
+    data[invocation].i16.x   = subgroupQuadSwapHorizontal(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupQuadSwapHorizontal(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupQuadSwapHorizontal(data[2].i16.xyz);
+    data[invocation].i16     = subgroupQuadSwapHorizontal(data[3].i16);
+
+    data[invocation].i16.x   = subgroupQuadSwapVertical(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupQuadSwapVertical(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupQuadSwapVertical(data[2].i16.xyz);
+    data[invocation].i16     = subgroupQuadSwapVertical(data[3].i16);
+
+    data[invocation].i16.x   = subgroupQuadSwapDiagonal(data[0].i16.x);
+    data[invocation].i16.xy  = subgroupQuadSwapDiagonal(data[1].i16.xy);
+    data[invocation].i16.xyz = subgroupQuadSwapDiagonal(data[2].i16.xyz);
+    data[invocation].i16     = subgroupQuadSwapDiagonal(data[3].i16);
+
+    data[invocation].u16.x   = subgroupQuadBroadcast(data[0].u16.x, 1);
+    data[invocation].u16.xy  = subgroupQuadBroadcast(data[1].u16.xy, 1);
+    data[invocation].u16.xyz = subgroupQuadBroadcast(data[2].u16.xyz, 1);
+    data[invocation].u16     = subgroupQuadBroadcast(data[3].u16, 1);
+
+    data[invocation].u16.x   = subgroupQuadSwapHorizontal(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupQuadSwapHorizontal(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupQuadSwapHorizontal(data[2].u16.xyz);
+    data[invocation].u16     = subgroupQuadSwapHorizontal(data[3].u16);
+
+    data[invocation].u16.x   = subgroupQuadSwapVertical(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupQuadSwapVertical(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupQuadSwapVertical(data[2].u16.xyz);
+    data[invocation].u16     = subgroupQuadSwapVertical(data[3].u16);
+
+    data[invocation].u16.x   = subgroupQuadSwapDiagonal(data[0].u16.x);
+    data[invocation].u16.xy  = subgroupQuadSwapDiagonal(data[1].u16.xy);
+    data[invocation].u16.xyz = subgroupQuadSwapDiagonal(data[2].u16.xyz);
+    data[invocation].u16     = subgroupQuadSwapDiagonal(data[3].u16);
+
+    data[invocation].i64.x   = subgroupQuadBroadcast(data[0].i64.x, 1);
+    data[invocation].i64.xy  = subgroupQuadBroadcast(data[1].i64.xy, 1);
+    data[invocation].i64.xyz = subgroupQuadBroadcast(data[2].i64.xyz, 1);
+    data[invocation].i64     = subgroupQuadBroadcast(data[3].i64, 1);
+
+    data[invocation].i64.x   = subgroupQuadSwapHorizontal(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupQuadSwapHorizontal(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupQuadSwapHorizontal(data[2].i64.xyz);
+    data[invocation].i64     = subgroupQuadSwapHorizontal(data[3].i64);
+
+    data[invocation].i64.x   = subgroupQuadSwapVertical(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupQuadSwapVertical(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupQuadSwapVertical(data[2].i64.xyz);
+    data[invocation].i64     = subgroupQuadSwapVertical(data[3].i64);
+
+    data[invocation].i64.x   = subgroupQuadSwapDiagonal(data[0].i64.x);
+    data[invocation].i64.xy  = subgroupQuadSwapDiagonal(data[1].i64.xy);
+    data[invocation].i64.xyz = subgroupQuadSwapDiagonal(data[2].i64.xyz);
+    data[invocation].i64     = subgroupQuadSwapDiagonal(data[3].i64);
+
+    data[invocation].u64.x   = subgroupQuadBroadcast(data[0].u64.x, 1);
+    data[invocation].u64.xy  = subgroupQuadBroadcast(data[1].u64.xy, 1);
+    data[invocation].u64.xyz = subgroupQuadBroadcast(data[2].u64.xyz, 1);
+    data[invocation].u64     = subgroupQuadBroadcast(data[3].u64, 1);
+
+    data[invocation].u64.x   = subgroupQuadSwapHorizontal(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupQuadSwapHorizontal(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupQuadSwapHorizontal(data[2].u64.xyz);
+    data[invocation].u64     = subgroupQuadSwapHorizontal(data[3].u64);
+
+    data[invocation].u64.x   = subgroupQuadSwapVertical(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupQuadSwapVertical(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupQuadSwapVertical(data[2].u64.xyz);
+    data[invocation].u64     = subgroupQuadSwapVertical(data[3].u64);
+
+    data[invocation].u64.x   = subgroupQuadSwapDiagonal(data[0].u64.x);
+    data[invocation].u64.xy  = subgroupQuadSwapDiagonal(data[1].u64.xy);
+    data[invocation].u64.xyz = subgroupQuadSwapDiagonal(data[2].u64.xyz);
+    data[invocation].u64     = subgroupQuadSwapDiagonal(data[3].u64);
+
+    data[invocation].f16.x   = subgroupQuadBroadcast(data[0].f16.x, 1);
+    data[invocation].f16.xy  = subgroupQuadBroadcast(data[1].f16.xy, 1);
+    data[invocation].f16.xyz = subgroupQuadBroadcast(data[2].f16.xyz, 1);
+    data[invocation].f16     = subgroupQuadBroadcast(data[3].f16, 1);
+
+    data[invocation].f16.x   = subgroupQuadSwapHorizontal(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupQuadSwapHorizontal(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupQuadSwapHorizontal(data[2].f16.xyz);
+    data[invocation].f16     = subgroupQuadSwapHorizontal(data[3].f16);
+
+    data[invocation].f16.x   = subgroupQuadSwapVertical(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupQuadSwapVertical(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupQuadSwapVertical(data[2].f16.xyz);
+    data[invocation].f16     = subgroupQuadSwapVertical(data[3].f16);
+
+    data[invocation].f16.x   = subgroupQuadSwapDiagonal(data[0].f16.x);
+    data[invocation].f16.xy  = subgroupQuadSwapDiagonal(data[1].f16.xy);
+    data[invocation].f16.xyz = subgroupQuadSwapDiagonal(data[2].f16.xyz);
+    data[invocation].f16     = subgroupQuadSwapDiagonal(data[3].f16);
+}
diff --git a/Test/spv.subgroupExtendedTypesShuffle.comp b/Test/spv.subgroupExtendedTypesShuffle.comp
new file mode 100644
index 0000000..de733e6
--- /dev/null
+++ b/Test/spv.subgroupExtendedTypesShuffle.comp
@@ -0,0 +1,95 @@
+#version 450
+
+#extension GL_KHR_shader_subgroup_shuffle: enable
+#extension GL_EXT_shader_subgroup_extended_types_int8: enable
+#extension GL_EXT_shader_subgroup_extended_types_int16: enable
+#extension GL_EXT_shader_subgroup_extended_types_int64: enable
+#extension GL_EXT_shader_subgroup_extended_types_float16: enable
+
+layout (local_size_x = 8) in;
+
+layout(binding = 0) buffer Buffers
+{
+    i8vec4 i8;
+    u8vec4 u8;
+    i16vec4 i16;
+    u16vec4 u16;
+    i64vec4 i64;
+    u64vec4 u64;
+    f16vec4 f16;
+} data[4];
+
+void main()
+{
+    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
+
+    data[invocation].i8.x   = subgroupShuffle(data[0].i8.x,    invocation);
+    data[invocation].i8.xy  = subgroupShuffle(data[1].i8.xy,   invocation);
+    data[invocation].i8.xyz = subgroupShuffle(data[2].i8.xyz,  invocation);
+    data[invocation].i8     = subgroupShuffle(data[3].i8,      invocation);
+
+    data[invocation].i8.x   = subgroupShuffleXor(data[0].i8.x,    invocation);
+    data[invocation].i8.xy  = subgroupShuffleXor(data[1].i8.xy,   invocation);
+    data[invocation].i8.xyz = subgroupShuffleXor(data[2].i8.xyz,  invocation);
+    data[invocation].i8     = subgroupShuffleXor(data[3].i8,      invocation);
+
+    data[invocation].u8.x   = subgroupShuffle(data[0].u8.x,    invocation);
+    data[invocation].u8.xy  = subgroupShuffle(data[1].u8.xy,   invocation);
+    data[invocation].u8.xyz = subgroupShuffle(data[2].u8.xyz,  invocation);
+    data[invocation].u8     = subgroupShuffle(data[3].u8,      invocation);
+
+    data[invocation].u8.x   = subgroupShuffleXor(data[0].u8.x,    invocation);
+    data[invocation].u8.xy  = subgroupShuffleXor(data[1].u8.xy,   invocation);
+    data[invocation].u8.xyz = subgroupShuffleXor(data[2].u8.xyz,  invocation);
+    data[invocation].u8     = subgroupShuffleXor(data[3].u8,      invocation);
+
+    data[invocation].i16.x   = subgroupShuffle(data[0].i16.x,    invocation);
+    data[invocation].i16.xy  = subgroupShuffle(data[1].i16.xy,   invocation);
+    data[invocation].i16.xyz = subgroupShuffle(data[2].i16.xyz,  invocation);
+    data[invocation].i16     = subgroupShuffle(data[3].i16,      invocation);
+
+    data[invocation].i16.x   = subgroupShuffleXor(data[0].i16.x,    invocation);
+    data[invocation].i16.xy  = subgroupShuffleXor(data[1].i16.xy,   invocation);
+    data[invocation].i16.xyz = subgroupShuffleXor(data[2].i16.xyz,  invocation);
+    data[invocation].i16     = subgroupShuffleXor(data[3].i16,      invocation);
+
+    data[invocation].u16.x   = subgroupShuffle(data[0].u16.x,    invocation);
+    data[invocation].u16.xy  = subgroupShuffle(data[1].u16.xy,   invocation);
+    data[invocation].u16.xyz = subgroupShuffle(data[2].u16.xyz,  invocation);
+    data[invocation].u16     = subgroupShuffle(data[3].u16,      invocation);
+
+    data[invocation].u16.x   = subgroupShuffleXor(data[0].u16.x,    invocation);
+    data[invocation].u16.xy  = subgroupShuffleXor(data[1].u16.xy,   invocation);
+    data[invocation].u16.xyz = subgroupShuffleXor(data[2].u16.xyz,  invocation);
+    data[invocation].u16     = subgroupShuffleXor(data[3].u16,      invocation);
+
+    data[invocation].i64.x   = subgroupShuffle(data[0].i64.x,    invocation);
+    data[invocation].i64.xy  = subgroupShuffle(data[1].i64.xy,   invocation);
+    data[invocation].i64.xyz = subgroupShuffle(data[2].i64.xyz,  invocation);
+    data[invocation].i64     = subgroupShuffle(data[3].i64,      invocation);
+
+    data[invocation].i64.x   = subgroupShuffleXor(data[0].i64.x,    invocation);
+    data[invocation].i64.xy  = subgroupShuffleXor(data[1].i64.xy,   invocation);
+    data[invocation].i64.xyz = subgroupShuffleXor(data[2].i64.xyz,  invocation);
+    data[invocation].i64     = subgroupShuffleXor(data[3].i64,      invocation);
+
+    data[invocation].u64.x   = subgroupShuffle(data[0].u64.x,    invocation);
+    data[invocation].u64.xy  = subgroupShuffle(data[1].u64.xy,   invocation);
+    data[invocation].u64.xyz = subgroupShuffle(data[2].u64.xyz,  invocation);
+    data[invocation].u64     = subgroupShuffle(data[3].u64,      invocation);
+
+    data[invocation].u64.x   = subgroupShuffleXor(data[0].u64.x,    invocation);
+    data[invocation].u64.xy  = subgroupShuffleXor(data[1].u64.xy,   invocation);
+    data[invocation].u64.xyz = subgroupShuffleXor(data[2].u64.xyz,  invocation);
+    data[invocation].u64     = subgroupShuffleXor(data[3].u64,      invocation);
+
+    data[invocation].f16.x   = subgroupShuffle(data[0].f16.x,    invocation);
+    data[invocation].f16.xy  = subgroupShuffle(data[1].f16.xy,   invocation);
+    data[invocation].f16.xyz = subgroupShuffle(data[2].f16.xyz,  invocation);
+    data[invocation].f16     = subgroupShuffle(data[3].f16,      invocation);
+
+    data[invocation].f16.x   = subgroupShuffleXor(data[0].f16.x,    invocation);
+    data[invocation].f16.xy  = subgroupShuffleXor(data[1].f16.xy,   invocation);
+    data[invocation].f16.xyz = subgroupShuffleXor(data[2].f16.xyz,  invocation);
+    data[invocation].f16     = subgroupShuffleXor(data[3].f16,      invocation);
+}
diff --git a/Test/spv.subgroupExtendedTypesShuffleNeg.comp b/Test/spv.subgroupExtendedTypesShuffleNeg.comp
new file mode 100644
index 0000000..af73b54
--- /dev/null
+++ b/Test/spv.subgroupExtendedTypesShuffleNeg.comp
@@ -0,0 +1,95 @@
+#version 450
+
+#extension GL_KHR_shader_subgroup_shuffle: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int8: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int16: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int64: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_float16: enable
+
+layout (local_size_x = 8) in;
+
+layout(binding = 0) buffer Buffers
+{
+    i8vec4 i8;
+    u8vec4 u8;
+    i16vec4 i16;
+    u16vec4 u16;
+    i64vec4 i64;
+    u64vec4 u64;
+    f16vec4 f16;
+} data[4];
+
+void main()
+{
+    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
+
+    data[invocation].i8.x   = subgroupShuffle(data[0].i8.x,    invocation);
+    data[invocation].i8.xy  = subgroupShuffle(data[1].i8.xy,   invocation);
+    data[invocation].i8.xyz = subgroupShuffle(data[2].i8.xyz,  invocation);
+    data[invocation].i8     = subgroupShuffle(data[3].i8,      invocation);
+
+    data[invocation].i8.x   = subgroupShuffleXor(data[0].i8.x,    invocation);
+    data[invocation].i8.xy  = subgroupShuffleXor(data[1].i8.xy,   invocation);
+    data[invocation].i8.xyz = subgroupShuffleXor(data[2].i8.xyz,  invocation);
+    data[invocation].i8     = subgroupShuffleXor(data[3].i8,      invocation);
+
+    data[invocation].u8.x   = subgroupShuffle(data[0].u8.x,    invocation);
+    data[invocation].u8.xy  = subgroupShuffle(data[1].u8.xy,   invocation);
+    data[invocation].u8.xyz = subgroupShuffle(data[2].u8.xyz,  invocation);
+    data[invocation].u8     = subgroupShuffle(data[3].u8,      invocation);
+
+    data[invocation].u8.x   = subgroupShuffleXor(data[0].u8.x,    invocation);
+    data[invocation].u8.xy  = subgroupShuffleXor(data[1].u8.xy,   invocation);
+    data[invocation].u8.xyz = subgroupShuffleXor(data[2].u8.xyz,  invocation);
+    data[invocation].u8     = subgroupShuffleXor(data[3].u8,      invocation);
+
+    data[invocation].i16.x   = subgroupShuffle(data[0].i16.x,    invocation);
+    data[invocation].i16.xy  = subgroupShuffle(data[1].i16.xy,   invocation);
+    data[invocation].i16.xyz = subgroupShuffle(data[2].i16.xyz,  invocation);
+    data[invocation].i16     = subgroupShuffle(data[3].i16,      invocation);
+
+    data[invocation].i16.x   = subgroupShuffleXor(data[0].i16.x,    invocation);
+    data[invocation].i16.xy  = subgroupShuffleXor(data[1].i16.xy,   invocation);
+    data[invocation].i16.xyz = subgroupShuffleXor(data[2].i16.xyz,  invocation);
+    data[invocation].i16     = subgroupShuffleXor(data[3].i16,      invocation);
+
+    data[invocation].u16.x   = subgroupShuffle(data[0].u16.x,    invocation);
+    data[invocation].u16.xy  = subgroupShuffle(data[1].u16.xy,   invocation);
+    data[invocation].u16.xyz = subgroupShuffle(data[2].u16.xyz,  invocation);
+    data[invocation].u16     = subgroupShuffle(data[3].u16,      invocation);
+
+    data[invocation].u16.x   = subgroupShuffleXor(data[0].u16.x,    invocation);
+    data[invocation].u16.xy  = subgroupShuffleXor(data[1].u16.xy,   invocation);
+    data[invocation].u16.xyz = subgroupShuffleXor(data[2].u16.xyz,  invocation);
+    data[invocation].u16     = subgroupShuffleXor(data[3].u16,      invocation);
+
+    data[invocation].i64.x   = subgroupShuffle(data[0].i64.x,    invocation);
+    data[invocation].i64.xy  = subgroupShuffle(data[1].i64.xy,   invocation);
+    data[invocation].i64.xyz = subgroupShuffle(data[2].i64.xyz,  invocation);
+    data[invocation].i64     = subgroupShuffle(data[3].i64,      invocation);
+
+    data[invocation].i64.x   = subgroupShuffleXor(data[0].i64.x,    invocation);
+    data[invocation].i64.xy  = subgroupShuffleXor(data[1].i64.xy,   invocation);
+    data[invocation].i64.xyz = subgroupShuffleXor(data[2].i64.xyz,  invocation);
+    data[invocation].i64     = subgroupShuffleXor(data[3].i64,      invocation);
+
+    data[invocation].u64.x   = subgroupShuffle(data[0].u64.x,    invocation);
+    data[invocation].u64.xy  = subgroupShuffle(data[1].u64.xy,   invocation);
+    data[invocation].u64.xyz = subgroupShuffle(data[2].u64.xyz,  invocation);
+    data[invocation].u64     = subgroupShuffle(data[3].u64,      invocation);
+
+    data[invocation].u64.x   = subgroupShuffleXor(data[0].u64.x,    invocation);
+    data[invocation].u64.xy  = subgroupShuffleXor(data[1].u64.xy,   invocation);
+    data[invocation].u64.xyz = subgroupShuffleXor(data[2].u64.xyz,  invocation);
+    data[invocation].u64     = subgroupShuffleXor(data[3].u64,      invocation);
+
+    data[invocation].f16.x   = subgroupShuffle(data[0].f16.x,    invocation);
+    data[invocation].f16.xy  = subgroupShuffle(data[1].f16.xy,   invocation);
+    data[invocation].f16.xyz = subgroupShuffle(data[2].f16.xyz,  invocation);
+    data[invocation].f16     = subgroupShuffle(data[3].f16,      invocation);
+
+    data[invocation].f16.x   = subgroupShuffleXor(data[0].f16.x,    invocation);
+    data[invocation].f16.xy  = subgroupShuffleXor(data[1].f16.xy,   invocation);
+    data[invocation].f16.xyz = subgroupShuffleXor(data[2].f16.xyz,  invocation);
+    data[invocation].f16     = subgroupShuffleXor(data[3].f16,      invocation);
+}
diff --git a/Test/spv.subgroupExtendedTypesShuffleRelative.comp b/Test/spv.subgroupExtendedTypesShuffleRelative.comp
new file mode 100644
index 0000000..73f5970
--- /dev/null
+++ b/Test/spv.subgroupExtendedTypesShuffleRelative.comp
@@ -0,0 +1,95 @@
+#version 450
+
+#extension GL_KHR_shader_subgroup_shuffle_relative: enable
+#extension GL_EXT_shader_subgroup_extended_types_int8: enable
+#extension GL_EXT_shader_subgroup_extended_types_int16: enable
+#extension GL_EXT_shader_subgroup_extended_types_int64: enable
+#extension GL_EXT_shader_subgroup_extended_types_float16: enable
+
+layout (local_size_x = 8) in;
+
+layout(binding = 0) buffer Buffers
+{
+    i8vec4 i8;
+    u8vec4 u8;
+    i16vec4 i16;
+    u16vec4 u16;
+    i64vec4 i64;
+    u64vec4 u64;
+    f16vec4 f16;
+} data[4];
+
+void main()
+{
+    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
+
+    data[invocation].i8.x   = subgroupShuffleUp(data[0].i8.x,    invocation);
+    data[invocation].i8.xy  = subgroupShuffleUp(data[1].i8.xy,   invocation);
+    data[invocation].i8.xyz = subgroupShuffleUp(data[2].i8.xyz,  invocation);
+    data[invocation].i8     = subgroupShuffleUp(data[3].i8,      invocation);
+
+    data[invocation].i8.x   = subgroupShuffleDown(data[0].i8.x,    invocation);
+    data[invocation].i8.xy  = subgroupShuffleDown(data[1].i8.xy,   invocation);
+    data[invocation].i8.xyz = subgroupShuffleDown(data[2].i8.xyz,  invocation);
+    data[invocation].i8     = subgroupShuffleDown(data[3].i8,      invocation);
+
+    data[invocation].u8.x   = subgroupShuffleUp(data[0].u8.x,    invocation);
+    data[invocation].u8.xy  = subgroupShuffleUp(data[1].u8.xy,   invocation);
+    data[invocation].u8.xyz = subgroupShuffleUp(data[2].u8.xyz,  invocation);
+    data[invocation].u8     = subgroupShuffleUp(data[3].u8,      invocation);
+
+    data[invocation].u8.x   = subgroupShuffleDown(data[0].u8.x,    invocation);
+    data[invocation].u8.xy  = subgroupShuffleDown(data[1].u8.xy,   invocation);
+    data[invocation].u8.xyz = subgroupShuffleDown(data[2].u8.xyz,  invocation);
+    data[invocation].u8     = subgroupShuffleDown(data[3].u8,      invocation);
+
+    data[invocation].i16.x   = subgroupShuffleUp(data[0].i16.x,    invocation);
+    data[invocation].i16.xy  = subgroupShuffleUp(data[1].i16.xy,   invocation);
+    data[invocation].i16.xyz = subgroupShuffleUp(data[2].i16.xyz,  invocation);
+    data[invocation].i16     = subgroupShuffleUp(data[3].i16,      invocation);
+
+    data[invocation].i16.x   = subgroupShuffleDown(data[0].i16.x,    invocation);
+    data[invocation].i16.xy  = subgroupShuffleDown(data[1].i16.xy,   invocation);
+    data[invocation].i16.xyz = subgroupShuffleDown(data[2].i16.xyz,  invocation);
+    data[invocation].i16     = subgroupShuffleDown(data[3].i16,      invocation);
+
+    data[invocation].u16.x   = subgroupShuffleUp(data[0].u16.x,    invocation);
+    data[invocation].u16.xy  = subgroupShuffleUp(data[1].u16.xy,   invocation);
+    data[invocation].u16.xyz = subgroupShuffleUp(data[2].u16.xyz,  invocation);
+    data[invocation].u16     = subgroupShuffleUp(data[3].u16,      invocation);
+
+    data[invocation].u16.x   = subgroupShuffleDown(data[0].u16.x,    invocation);
+    data[invocation].u16.xy  = subgroupShuffleDown(data[1].u16.xy,   invocation);
+    data[invocation].u16.xyz = subgroupShuffleDown(data[2].u16.xyz,  invocation);
+    data[invocation].u16     = subgroupShuffleDown(data[3].u16,      invocation);
+
+    data[invocation].i64.x   = subgroupShuffleUp(data[0].i64.x,    invocation);
+    data[invocation].i64.xy  = subgroupShuffleUp(data[1].i64.xy,   invocation);
+    data[invocation].i64.xyz = subgroupShuffleUp(data[2].i64.xyz,  invocation);
+    data[invocation].i64     = subgroupShuffleUp(data[3].i64,      invocation);
+
+    data[invocation].i64.x   = subgroupShuffleDown(data[0].i64.x,    invocation);
+    data[invocation].i64.xy  = subgroupShuffleDown(data[1].i64.xy,   invocation);
+    data[invocation].i64.xyz = subgroupShuffleDown(data[2].i64.xyz,  invocation);
+    data[invocation].i64     = subgroupShuffleDown(data[3].i64,      invocation);
+
+    data[invocation].u64.x   = subgroupShuffleUp(data[0].u64.x,    invocation);
+    data[invocation].u64.xy  = subgroupShuffleUp(data[1].u64.xy,   invocation);
+    data[invocation].u64.xyz = subgroupShuffleUp(data[2].u64.xyz,  invocation);
+    data[invocation].u64     = subgroupShuffleUp(data[3].u64,      invocation);
+
+    data[invocation].u64.x   = subgroupShuffleDown(data[0].u64.x,    invocation);
+    data[invocation].u64.xy  = subgroupShuffleDown(data[1].u64.xy,   invocation);
+    data[invocation].u64.xyz = subgroupShuffleDown(data[2].u64.xyz,  invocation);
+    data[invocation].u64     = subgroupShuffleDown(data[3].u64,      invocation);
+
+    data[invocation].f16.x   = subgroupShuffleUp(data[0].f16.x,    invocation);
+    data[invocation].f16.xy  = subgroupShuffleUp(data[1].f16.xy,   invocation);
+    data[invocation].f16.xyz = subgroupShuffleUp(data[2].f16.xyz,  invocation);
+    data[invocation].f16     = subgroupShuffleUp(data[3].f16,      invocation);
+
+    data[invocation].f16.x   = subgroupShuffleDown(data[0].f16.x,    invocation);
+    data[invocation].f16.xy  = subgroupShuffleDown(data[1].f16.xy,   invocation);
+    data[invocation].f16.xyz = subgroupShuffleDown(data[2].f16.xyz,  invocation);
+    data[invocation].f16     = subgroupShuffleDown(data[3].f16,      invocation);
+}
diff --git a/Test/spv.subgroupExtendedTypesShuffleRelativeNeg.comp b/Test/spv.subgroupExtendedTypesShuffleRelativeNeg.comp
new file mode 100644
index 0000000..9857444
--- /dev/null
+++ b/Test/spv.subgroupExtendedTypesShuffleRelativeNeg.comp
@@ -0,0 +1,95 @@
+#version 450
+
+#extension GL_KHR_shader_subgroup_shuffle_relative: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int8: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int16: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int64: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_float16: enable
+
+layout (local_size_x = 8) in;
+
+layout(binding = 0) buffer Buffers
+{
+    i8vec4 i8;
+    u8vec4 u8;
+    i16vec4 i16;
+    u16vec4 u16;
+    i64vec4 i64;
+    u64vec4 u64;
+    f16vec4 f16;
+} data[4];
+
+void main()
+{
+    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
+
+    data[invocation].i8.x   = subgroupShuffleUp(data[0].i8.x,    invocation);
+    data[invocation].i8.xy  = subgroupShuffleUp(data[1].i8.xy,   invocation);
+    data[invocation].i8.xyz = subgroupShuffleUp(data[2].i8.xyz,  invocation);
+    data[invocation].i8     = subgroupShuffleUp(data[3].i8,      invocation);
+
+    data[invocation].i8.x   = subgroupShuffleDown(data[0].i8.x,    invocation);
+    data[invocation].i8.xy  = subgroupShuffleDown(data[1].i8.xy,   invocation);
+    data[invocation].i8.xyz = subgroupShuffleDown(data[2].i8.xyz,  invocation);
+    data[invocation].i8     = subgroupShuffleDown(data[3].i8,      invocation);
+
+    data[invocation].u8.x   = subgroupShuffleUp(data[0].u8.x,    invocation);
+    data[invocation].u8.xy  = subgroupShuffleUp(data[1].u8.xy,   invocation);
+    data[invocation].u8.xyz = subgroupShuffleUp(data[2].u8.xyz,  invocation);
+    data[invocation].u8     = subgroupShuffleUp(data[3].u8,      invocation);
+
+    data[invocation].u8.x   = subgroupShuffleDown(data[0].u8.x,    invocation);
+    data[invocation].u8.xy  = subgroupShuffleDown(data[1].u8.xy,   invocation);
+    data[invocation].u8.xyz = subgroupShuffleDown(data[2].u8.xyz,  invocation);
+    data[invocation].u8     = subgroupShuffleDown(data[3].u8,      invocation);
+
+    data[invocation].i16.x   = subgroupShuffleUp(data[0].i16.x,    invocation);
+    data[invocation].i16.xy  = subgroupShuffleUp(data[1].i16.xy,   invocation);
+    data[invocation].i16.xyz = subgroupShuffleUp(data[2].i16.xyz,  invocation);
+    data[invocation].i16     = subgroupShuffleUp(data[3].i16,      invocation);
+
+    data[invocation].i16.x   = subgroupShuffleDown(data[0].i16.x,    invocation);
+    data[invocation].i16.xy  = subgroupShuffleDown(data[1].i16.xy,   invocation);
+    data[invocation].i16.xyz = subgroupShuffleDown(data[2].i16.xyz,  invocation);
+    data[invocation].i16     = subgroupShuffleDown(data[3].i16,      invocation);
+
+    data[invocation].u16.x   = subgroupShuffleUp(data[0].u16.x,    invocation);
+    data[invocation].u16.xy  = subgroupShuffleUp(data[1].u16.xy,   invocation);
+    data[invocation].u16.xyz = subgroupShuffleUp(data[2].u16.xyz,  invocation);
+    data[invocation].u16     = subgroupShuffleUp(data[3].u16,      invocation);
+
+    data[invocation].u16.x   = subgroupShuffleDown(data[0].u16.x,    invocation);
+    data[invocation].u16.xy  = subgroupShuffleDown(data[1].u16.xy,   invocation);
+    data[invocation].u16.xyz = subgroupShuffleDown(data[2].u16.xyz,  invocation);
+    data[invocation].u16     = subgroupShuffleDown(data[3].u16,      invocation);
+
+    data[invocation].i64.x   = subgroupShuffleUp(data[0].i64.x,    invocation);
+    data[invocation].i64.xy  = subgroupShuffleUp(data[1].i64.xy,   invocation);
+    data[invocation].i64.xyz = subgroupShuffleUp(data[2].i64.xyz,  invocation);
+    data[invocation].i64     = subgroupShuffleUp(data[3].i64,      invocation);
+
+    data[invocation].i64.x   = subgroupShuffleDown(data[0].i64.x,    invocation);
+    data[invocation].i64.xy  = subgroupShuffleDown(data[1].i64.xy,   invocation);
+    data[invocation].i64.xyz = subgroupShuffleDown(data[2].i64.xyz,  invocation);
+    data[invocation].i64     = subgroupShuffleDown(data[3].i64,      invocation);
+
+    data[invocation].u64.x   = subgroupShuffleUp(data[0].u64.x,    invocation);
+    data[invocation].u64.xy  = subgroupShuffleUp(data[1].u64.xy,   invocation);
+    data[invocation].u64.xyz = subgroupShuffleUp(data[2].u64.xyz,  invocation);
+    data[invocation].u64     = subgroupShuffleUp(data[3].u64,      invocation);
+
+    data[invocation].u64.x   = subgroupShuffleDown(data[0].u64.x,    invocation);
+    data[invocation].u64.xy  = subgroupShuffleDown(data[1].u64.xy,   invocation);
+    data[invocation].u64.xyz = subgroupShuffleDown(data[2].u64.xyz,  invocation);
+    data[invocation].u64     = subgroupShuffleDown(data[3].u64,      invocation);
+
+    data[invocation].f16.x   = subgroupShuffleUp(data[0].f16.x,    invocation);
+    data[invocation].f16.xy  = subgroupShuffleUp(data[1].f16.xy,   invocation);
+    data[invocation].f16.xyz = subgroupShuffleUp(data[2].f16.xyz,  invocation);
+    data[invocation].f16     = subgroupShuffleUp(data[3].f16,      invocation);
+
+    data[invocation].f16.x   = subgroupShuffleDown(data[0].f16.x,    invocation);
+    data[invocation].f16.xy  = subgroupShuffleDown(data[1].f16.xy,   invocation);
+    data[invocation].f16.xyz = subgroupShuffleDown(data[2].f16.xyz,  invocation);
+    data[invocation].f16     = subgroupShuffleDown(data[3].f16,      invocation);
+}
diff --git a/Test/spv.subgroupExtendedTypesVote.comp b/Test/spv.subgroupExtendedTypesVote.comp
new file mode 100644
index 0000000..960156a
--- /dev/null
+++ b/Test/spv.subgroupExtendedTypesVote.comp
@@ -0,0 +1,66 @@
+#version 450
+
+#extension GL_KHR_shader_subgroup_vote: enable
+#extension GL_EXT_shader_subgroup_extended_types_int8: enable
+#extension GL_EXT_shader_subgroup_extended_types_int16: enable
+#extension GL_EXT_shader_subgroup_extended_types_int64: enable
+#extension GL_EXT_shader_subgroup_extended_types_float16: enable
+
+layout (local_size_x = 8) in;
+
+layout(binding = 0) buffer Buffers
+{
+    i8vec4 i8;
+    u8vec4 u8;
+    i16vec4 i16;
+    u16vec4 u16;
+    i64vec4 i64;
+    u64vec4 u64;
+    f16vec4 f16;
+    int r;
+} data[4];
+
+void main()
+{
+    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
+
+    if (subgroupAll(data[invocation].r < 0))
+    {
+        data[invocation].r = int(subgroupAllEqual(data[0].i8.x));
+        data[invocation].r = int(subgroupAllEqual(data[1].i8.xy));
+        data[invocation].r = int(subgroupAllEqual(data[2].i8.xyz));
+        data[invocation].r = int(subgroupAllEqual(data[3].i8));
+
+        data[invocation].r = int(subgroupAllEqual(data[0].u8.x));
+        data[invocation].r = int(subgroupAllEqual(data[1].u8.xy));
+        data[invocation].r = int(subgroupAllEqual(data[2].u8.xyz));
+        data[invocation].r = int(subgroupAllEqual(data[3].u8));
+
+        data[invocation].r = int(subgroupAllEqual(data[0].i16.x));
+        data[invocation].r = int(subgroupAllEqual(data[1].i16.xy));
+        data[invocation].r = int(subgroupAllEqual(data[2].i16.xyz));
+        data[invocation].r = int(subgroupAllEqual(data[3].i16));
+
+        data[invocation].r = int(subgroupAllEqual(data[0].u16.x));
+        data[invocation].r = int(subgroupAllEqual(data[1].u16.xy));
+        data[invocation].r = int(subgroupAllEqual(data[2].u16.xyz));
+        data[invocation].r = int(subgroupAllEqual(data[3].u16));
+    }
+    else if (subgroupAny(data[invocation].r < 0))
+    {
+        data[invocation].r = int(subgroupAllEqual(data[0].i64.x));
+        data[invocation].r = int(subgroupAllEqual(data[1].i64.xy));
+        data[invocation].r = int(subgroupAllEqual(data[2].i64.xyz));
+        data[invocation].r = int(subgroupAllEqual(data[3].i64));
+        
+        data[invocation].r = int(subgroupAllEqual(data[0].u64.x));
+        data[invocation].r = int(subgroupAllEqual(data[1].u64.xy));
+        data[invocation].r = int(subgroupAllEqual(data[2].u64.xyz));
+        data[invocation].r = int(subgroupAllEqual(data[3].u64));
+        
+        data[invocation].r = int(subgroupAllEqual(data[0].f16.x));
+        data[invocation].r = int(subgroupAllEqual(data[1].f16.xy));
+        data[invocation].r = int(subgroupAllEqual(data[2].f16.xyz));
+        data[invocation].r = int(subgroupAllEqual(data[3].f16));
+    }
+}
diff --git a/Test/spv.subgroupExtendedTypesVoteNeg.comp b/Test/spv.subgroupExtendedTypesVoteNeg.comp
new file mode 100644
index 0000000..be8d1bb
--- /dev/null
+++ b/Test/spv.subgroupExtendedTypesVoteNeg.comp
@@ -0,0 +1,66 @@
+#version 450
+
+#extension GL_KHR_shader_subgroup_vote: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int8: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int16: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int64: enable
+#extension GL_EXT_shader_explicit_arithmetic_types_float16: enable
+
+layout (local_size_x = 8) in;
+
+layout(binding = 0) buffer Buffers
+{
+    i8vec4 i8;
+    u8vec4 u8;
+    i16vec4 i16;
+    u16vec4 u16;
+    i64vec4 i64;
+    u64vec4 u64;
+    f16vec4 f16;
+    int r;
+} data[4];
+
+void main()
+{
+    uint invocation = (gl_SubgroupInvocationID + gl_SubgroupSize) % 4;
+
+    if (subgroupAll(data[invocation].r < 0))
+    {
+        data[invocation].r = int(subgroupAllEqual(data[0].i8.x));
+        data[invocation].r = int(subgroupAllEqual(data[1].i8.xy));
+        data[invocation].r = int(subgroupAllEqual(data[2].i8.xyz));
+        data[invocation].r = int(subgroupAllEqual(data[3].i8));
+
+        data[invocation].r = int(subgroupAllEqual(data[0].u8.x));
+        data[invocation].r = int(subgroupAllEqual(data[1].u8.xy));
+        data[invocation].r = int(subgroupAllEqual(data[2].u8.xyz));
+        data[invocation].r = int(subgroupAllEqual(data[3].u8));
+
+        data[invocation].r = int(subgroupAllEqual(data[0].i16.x));
+        data[invocation].r = int(subgroupAllEqual(data[1].i16.xy));
+        data[invocation].r = int(subgroupAllEqual(data[2].i16.xyz));
+        data[invocation].r = int(subgroupAllEqual(data[3].i16));
+
+        data[invocation].r = int(subgroupAllEqual(data[0].u16.x));
+        data[invocation].r = int(subgroupAllEqual(data[1].u16.xy));
+        data[invocation].r = int(subgroupAllEqual(data[2].u16.xyz));
+        data[invocation].r = int(subgroupAllEqual(data[3].u16));
+    }
+    else if (subgroupAny(data[invocation].r < 0))
+    {
+        data[invocation].r = int(subgroupAllEqual(data[0].i64.x));
+        data[invocation].r = int(subgroupAllEqual(data[1].i64.xy));
+        data[invocation].r = int(subgroupAllEqual(data[2].i64.xyz));
+        data[invocation].r = int(subgroupAllEqual(data[3].i64));
+        
+        data[invocation].r = int(subgroupAllEqual(data[0].u64.x));
+        data[invocation].r = int(subgroupAllEqual(data[1].u64.xy));
+        data[invocation].r = int(subgroupAllEqual(data[2].u64.xyz));
+        data[invocation].r = int(subgroupAllEqual(data[3].u64));
+        
+        data[invocation].r = int(subgroupAllEqual(data[0].f16.x));
+        data[invocation].r = int(subgroupAllEqual(data[1].f16.xy));
+        data[invocation].r = int(subgroupAllEqual(data[2].f16.xyz));
+        data[invocation].r = int(subgroupAllEqual(data[3].f16));
+    }
+}
diff --git a/Test/spv.volatileAtomic.comp b/Test/spv.volatileAtomic.comp
new file mode 100644
index 0000000..2b7e6c6
--- /dev/null
+++ b/Test/spv.volatileAtomic.comp
@@ -0,0 +1,8 @@
+#version 450 core
+
+layout(set=0, binding=3) volatile buffer D { uint d[]; } d;
+
+void main()
+{
+    atomicExchange(d.d[0], 0);
+}
diff --git a/WORKSPACE b/WORKSPACE
new file mode 100644
index 0000000..3c38e61
--- /dev/null
+++ b/WORKSPACE
@@ -0,0 +1,27 @@
+workspace(name = "org_khronos_glslang")
+load(
+    "@bazel_tools//tools/build_defs/repo:http.bzl",
+    "http_archive",
+)
+
+http_archive(
+    name = "com_google_googletest",
+    sha256 = "ef9e2e12e7bf115ee48b427ae171fc869eeaf1b532c0fcfd982f6a353d2471b4",
+    strip_prefix = "googletest-37ae1fc5e6be26f367d76c078beabd7024fed53a",
+    urls = ["https://github.com/google/googletest/archive/37ae1fc5e6be26f367d76c078beabd7024fed53a.zip"],  # 2018-07-16
+)
+
+http_archive(
+    name = "com_googlesource_code_re2",
+    sha256 = "b885bb965ab4b6cf8718bbb8154d8f6474cd00331481b6d3e390babb3532263e",
+    strip_prefix = "re2-e860767c86e577b87deadf24cc4567ea83c4f162/",
+    urls = ["https://github.com/google/re2/archive/e860767c86e577b87deadf24cc4567ea83c4f162.zip"],
+)
+
+http_archive(
+    name = "com_google_effcee",
+    build_file = "BUILD.effcee.bazel",
+    sha256 = "b0c21a01995fdf9792510566d78d5e7fe6f83cb4ba986eba691f4926f127cb34",
+    strip_prefix = "effcee-8f0a61dc95e0df18c18e0ac56d83b3fa9d2fe90b/",
+    urls = ["https://github.com/google/effcee/archive/8f0a61dc95e0df18c18e0ac56d83b3fa9d2fe90b.zip"],
+)
diff --git a/glslang/CMakeLists.txt b/glslang/CMakeLists.txt
index 73124c5..42df1d1 100644
--- a/glslang/CMakeLists.txt
+++ b/glslang/CMakeLists.txt
@@ -6,6 +6,10 @@
     message("unknown platform")
 endif(WIN32)
 
+if(EMSCRIPTEN OR ENABLE_GLSLANG_WEB)
+    add_subdirectory(OSDependent/Web)
+endif(EMSCRIPTEN OR ENABLE_GLSLANG_WEB)
+
 set(SOURCES
     MachineIndependent/glslang.m4
     MachineIndependent/glslang.y
@@ -78,7 +82,9 @@
 set_property(TARGET glslang PROPERTY FOLDER glslang)
 set_property(TARGET glslang PROPERTY POSITION_INDEPENDENT_CODE ON)
 target_link_libraries(glslang OGLCompiler OSDependent)
-target_include_directories(glslang PUBLIC ..)
+target_include_directories(glslang PUBLIC 
+	$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>
+	$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
 
 if(WIN32 AND BUILD_SHARED_LIBS)
     set_target_properties(glslang PROPERTIES PREFIX "")
@@ -98,14 +104,15 @@
 
 if(ENABLE_GLSLANG_INSTALL)
     if(BUILD_SHARED_LIBS)
-        install(TARGETS glslang
+        install(TARGETS glslang EXPORT glslangTargets
                 ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
                 LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
                 RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
     else()
-        install(TARGETS glslang
+        install(TARGETS glslang EXPORT glslangTargets
                 ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
     endif()
+	install(EXPORT glslangTargets DESTINATION lib/cmake)
 endif(ENABLE_GLSLANG_INSTALL)
 
 if(ENABLE_GLSLANG_INSTALL)
@@ -114,16 +121,3 @@
         install(FILES ${file} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/glslang/${dir})
     endforeach()
 endif(ENABLE_GLSLANG_INSTALL)
-
-if(ENABLE_GLSLANG_WEB)
-    add_executable(glslang.js glslang.js.cpp)
-    glslang_set_link_args(glslang.js)
-    target_link_libraries(glslang.js glslang SPIRV)
-    if(EMSCRIPTEN)
-        set_target_properties(glslang.js PROPERTIES
-          OUTPUT_NAME "glslang"
-          SUFFIX ".js"
-          LINK_FLAGS "--bind -s EXPORT_NAME=\"glslangModule\"")
-        em_link_pre_js(glslang.js ${CMAKE_CURRENT_SOURCE_DIR}/glslang.pre.js)
-    endif(EMSCRIPTEN)
-endif(ENABLE_GLSLANG_WEB)
diff --git a/glslang/Include/Types.h b/glslang/Include/Types.h
index a419cb0..ef933b4 100644
--- a/glslang/Include/Types.h
+++ b/glslang/Include/Types.h
@@ -135,6 +135,8 @@
     bool isYuv()         const { return yuv; }
 #endif
     void setCombined(bool c) { combined = c; }
+    void setBasicType(TBasicType t) { type = t; }
+    TBasicType getBasicType()  const { return type; }
     bool isShadow()      const { return shadow; }
     bool isArrayed()     const { return arrayed; }
 
@@ -1195,6 +1197,7 @@
     TVertexOrder order;
     bool pointMode;
     int localSize[3];         // compute shader
+    bool localSizeNotDefault[3];        // compute shader
     int localSizeSpecId[3];   // compute shader specialization id for gl_WorkGroupSize
 #ifndef GLSLANG_WEB
     bool earlyFragmentTests;  // fragment input
@@ -1225,6 +1228,9 @@
         localSize[0] = 1;
         localSize[1] = 1;
         localSize[2] = 1;
+        localSizeNotDefault[0] = false;
+        localSizeNotDefault[1] = false;
+        localSizeNotDefault[2] = false;
         localSizeSpecId[0] = TQualifier::layoutNotSet;
         localSizeSpecId[1] = TQualifier::layoutNotSet;
         localSizeSpecId[2] = TQualifier::layoutNotSet;
@@ -1273,6 +1279,9 @@
                 localSize[i] = src.localSize[i];
         }
         for (int i = 0; i < 3; ++i) {
+            localSizeNotDefault[i] = src.localSizeNotDefault[i] || localSizeNotDefault[i];
+        }
+        for (int i = 0; i < 3; ++i) {
             if (src.localSizeSpecId[i] != TQualifier::layoutNotSet)
                 localSizeSpecId[i] = src.localSizeSpecId[i];
         }
@@ -2186,7 +2195,8 @@
     const TTypeList* getStruct() const { assert(isStruct()); return structure; }
     void setStruct(TTypeList* s) { assert(isStruct()); structure = s; }
     TTypeList* getWritableStruct() const { assert(isStruct()); return structure; }  // This should only be used when known to not be sharing with other threads
-
+    void setBasicType(const TBasicType& t) { basicType = t; }
+    
     int computeNumComponents() const
     {
         int components = 0;
diff --git a/glslang/Include/intermediate.h b/glslang/Include/intermediate.h
index 3a7405a..f966899 100644
--- a/glslang/Include/intermediate.h
+++ b/glslang/Include/intermediate.h
@@ -275,6 +275,10 @@
     EOpConvUint64ToPtr,
     EOpConvPtrToUint64,
 
+    // uvec2 <-> pointer
+    EOpConvUvec2ToPtr,
+    EOpConvPtrToUvec2,
+
     //
     // binary operations
     //
@@ -1185,6 +1189,7 @@
     virtual void traverse(TIntermTraverser*);
     TOperator getFlowOp() const { return flowOp; }
     TIntermTyped* getExpression() const { return expression; }
+    void setExpression(TIntermTyped* pExpression) { expression = pExpression; }
 protected:
     TOperator flowOp;
     TIntermTyped* expression;
diff --git a/glslang/MachineIndependent/Constant.cpp b/glslang/MachineIndependent/Constant.cpp
old mode 100755
new mode 100644
diff --git a/glslang/MachineIndependent/Initialize.cpp b/glslang/MachineIndependent/Initialize.cpp
index 8ea58cc..2344d36 100644
--- a/glslang/MachineIndependent/Initialize.cpp
+++ b/glslang/MachineIndependent/Initialize.cpp
@@ -152,12 +152,12 @@
                                                   { EDesktopProfile, 0, 130, 0, nullptr },
                                                   { EBadProfile } };
     const Versioning* Es300Desktop130 = &Es300Desktop130Version[0];
-    
+
     const Versioning Es310Desktop430Version[] = { { EEsProfile,      0, 310, 0, nullptr },
                                                   { EDesktopProfile, 0, 430, 0, nullptr },
                                                   { EBadProfile } };
     const Versioning* Es310Desktop430 = &Es310Desktop430Version[0];
-    
+
     const Versioning Es310Desktop450Version[] = { { EEsProfile,      0, 310, 0, nullptr },
                                                   { EDesktopProfile, 0, 450, 0, nullptr },
                                                   { EBadProfile } };
@@ -357,7 +357,7 @@
 }
 
 // See if the tabled versioning information allows the current version.
-bool ValidVersion(const BuiltInFunction& function, int version, EProfile profile, const SpvVersion& spvVersion)
+bool ValidVersion(const BuiltInFunction& function, int version, EProfile profile, const SpvVersion& /* spVersion */)
 {
 #ifdef GLSLANG_WEB
     // all entries in table are valid
@@ -417,8 +417,7 @@
 }
 
 // Relate all tables of built-ins to the AST operators.
-void TBuiltIns::relateTabledBuiltins(int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage stage,
-    TSymbolTable& symbolTable)
+void TBuiltIns::relateTabledBuiltins(int /* version */, EProfile /* profile */, const SpvVersion& /* spvVersion */, EShLanguage /* stage */, TSymbolTable& symbolTable)
 {
     RelateTabledBuiltins(BaseFunctions, symbolTable);
     RelateTabledBuiltins(DerivativeFunctions, symbolTable);
@@ -996,25 +995,25 @@
             "bvec3 notEqual(u64vec3, u64vec3);"
             "bvec4 notEqual(u64vec4, u64vec4);"
 
-            "int   findLSB(int64_t);"
-            "ivec2 findLSB(i64vec2);"
-            "ivec3 findLSB(i64vec3);"
-            "ivec4 findLSB(i64vec4);"
+            "int64_t findLSB(int64_t);"
+            "i64vec2 findLSB(i64vec2);"
+            "i64vec3 findLSB(i64vec3);"
+            "i64vec4 findLSB(i64vec4);"
 
-            "int   findLSB(uint64_t);"
-            "ivec2 findLSB(u64vec2);"
-            "ivec3 findLSB(u64vec3);"
-            "ivec4 findLSB(u64vec4);"
+            "int64_t findLSB(uint64_t);"
+            "i64vec2 findLSB(u64vec2);"
+            "i64vec3 findLSB(u64vec3);"
+            "i64vec4 findLSB(u64vec4);"
 
-            "int   findMSB(int64_t);"
-            "ivec2 findMSB(i64vec2);"
-            "ivec3 findMSB(i64vec3);"
-            "ivec4 findMSB(i64vec4);"
+            "int64_t findMSB(int64_t);"
+            "i64vec2 findMSB(i64vec2);"
+            "i64vec3 findMSB(i64vec3);"
+            "i64vec4 findMSB(i64vec4);"
 
-            "int   findMSB(uint64_t);"
-            "ivec2 findMSB(u64vec2);"
-            "ivec3 findMSB(u64vec3);"
-            "ivec4 findMSB(u64vec4);"
+            "int64_t findMSB(uint64_t);"
+            "i64vec2 findMSB(u64vec2);"
+            "i64vec3 findMSB(u64vec3);"
+            "i64vec4 findMSB(u64vec4);"
 
             "\n"
         );
@@ -1744,58 +1743,6 @@
 
             "bool   subgroupAll(bool);\n"
             "bool   subgroupAny(bool);\n"
-
-            "bool   subgroupAllEqual(float);\n"
-            "bool   subgroupAllEqual(vec2);\n"
-            "bool   subgroupAllEqual(vec3);\n"
-            "bool   subgroupAllEqual(vec4);\n"
-            "bool   subgroupAllEqual(int);\n"
-            "bool   subgroupAllEqual(ivec2);\n"
-            "bool   subgroupAllEqual(ivec3);\n"
-            "bool   subgroupAllEqual(ivec4);\n"
-            "bool   subgroupAllEqual(uint);\n"
-            "bool   subgroupAllEqual(uvec2);\n"
-            "bool   subgroupAllEqual(uvec3);\n"
-            "bool   subgroupAllEqual(uvec4);\n"
-            "bool   subgroupAllEqual(bool);\n"
-            "bool   subgroupAllEqual(bvec2);\n"
-            "bool   subgroupAllEqual(bvec3);\n"
-            "bool   subgroupAllEqual(bvec4);\n"
-
-            "float  subgroupBroadcast(float, uint);\n"
-            "vec2   subgroupBroadcast(vec2, uint);\n"
-            "vec3   subgroupBroadcast(vec3, uint);\n"
-            "vec4   subgroupBroadcast(vec4, uint);\n"
-            "int    subgroupBroadcast(int, uint);\n"
-            "ivec2  subgroupBroadcast(ivec2, uint);\n"
-            "ivec3  subgroupBroadcast(ivec3, uint);\n"
-            "ivec4  subgroupBroadcast(ivec4, uint);\n"
-            "uint   subgroupBroadcast(uint, uint);\n"
-            "uvec2  subgroupBroadcast(uvec2, uint);\n"
-            "uvec3  subgroupBroadcast(uvec3, uint);\n"
-            "uvec4  subgroupBroadcast(uvec4, uint);\n"
-            "bool   subgroupBroadcast(bool, uint);\n"
-            "bvec2  subgroupBroadcast(bvec2, uint);\n"
-            "bvec3  subgroupBroadcast(bvec3, uint);\n"
-            "bvec4  subgroupBroadcast(bvec4, uint);\n"
-
-            "float  subgroupBroadcastFirst(float);\n"
-            "vec2   subgroupBroadcastFirst(vec2);\n"
-            "vec3   subgroupBroadcastFirst(vec3);\n"
-            "vec4   subgroupBroadcastFirst(vec4);\n"
-            "int    subgroupBroadcastFirst(int);\n"
-            "ivec2  subgroupBroadcastFirst(ivec2);\n"
-            "ivec3  subgroupBroadcastFirst(ivec3);\n"
-            "ivec4  subgroupBroadcastFirst(ivec4);\n"
-            "uint   subgroupBroadcastFirst(uint);\n"
-            "uvec2  subgroupBroadcastFirst(uvec2);\n"
-            "uvec3  subgroupBroadcastFirst(uvec3);\n"
-            "uvec4  subgroupBroadcastFirst(uvec4);\n"
-            "bool   subgroupBroadcastFirst(bool);\n"
-            "bvec2  subgroupBroadcastFirst(bvec2);\n"
-            "bvec3  subgroupBroadcastFirst(bvec3);\n"
-            "bvec4  subgroupBroadcastFirst(bvec4);\n"
-
             "uvec4  subgroupBallot(bool);\n"
             "bool   subgroupInverseBallot(uvec4);\n"
             "bool   subgroupBallotBitExtract(uvec4, uint);\n"
@@ -1804,1002 +1751,130 @@
             "uint   subgroupBallotExclusiveBitCount(uvec4);\n"
             "uint   subgroupBallotFindLSB(uvec4);\n"
             "uint   subgroupBallotFindMSB(uvec4);\n"
+            );
 
-            "float  subgroupShuffle(float, uint);\n"
-            "vec2   subgroupShuffle(vec2, uint);\n"
-            "vec3   subgroupShuffle(vec3, uint);\n"
-            "vec4   subgroupShuffle(vec4, uint);\n"
-            "int    subgroupShuffle(int, uint);\n"
-            "ivec2  subgroupShuffle(ivec2, uint);\n"
-            "ivec3  subgroupShuffle(ivec3, uint);\n"
-            "ivec4  subgroupShuffle(ivec4, uint);\n"
-            "uint   subgroupShuffle(uint, uint);\n"
-            "uvec2  subgroupShuffle(uvec2, uint);\n"
-            "uvec3  subgroupShuffle(uvec3, uint);\n"
-            "uvec4  subgroupShuffle(uvec4, uint);\n"
-            "bool   subgroupShuffle(bool, uint);\n"
-            "bvec2  subgroupShuffle(bvec2, uint);\n"
-            "bvec3  subgroupShuffle(bvec3, uint);\n"
-            "bvec4  subgroupShuffle(bvec4, uint);\n"
+        // Generate all flavors of subgroup ops.
+        static const char *subgroupOps[] = 
+        {
+            "bool   subgroupAllEqual(%s);\n",
+            "%s     subgroupBroadcast(%s, uint);\n",
+            "%s     subgroupBroadcastFirst(%s);\n",
+            "%s     subgroupShuffle(%s, uint);\n",
+            "%s     subgroupShuffleXor(%s, uint);\n",
+            "%s     subgroupShuffleUp(%s, uint delta);\n",
+            "%s     subgroupShuffleDown(%s, uint delta);\n",
+            "%s     subgroupAdd(%s);\n",
+            "%s     subgroupMul(%s);\n",
+            "%s     subgroupMin(%s);\n",
+            "%s     subgroupMax(%s);\n",
+            "%s     subgroupAnd(%s);\n",
+            "%s     subgroupOr(%s);\n",
+            "%s     subgroupXor(%s);\n",
+            "%s     subgroupInclusiveAdd(%s);\n",
+            "%s     subgroupInclusiveMul(%s);\n",
+            "%s     subgroupInclusiveMin(%s);\n",
+            "%s     subgroupInclusiveMax(%s);\n",
+            "%s     subgroupInclusiveAnd(%s);\n",
+            "%s     subgroupInclusiveOr(%s);\n",
+            "%s     subgroupInclusiveXor(%s);\n",
+            "%s     subgroupExclusiveAdd(%s);\n",
+            "%s     subgroupExclusiveMul(%s);\n",
+            "%s     subgroupExclusiveMin(%s);\n",
+            "%s     subgroupExclusiveMax(%s);\n",
+            "%s     subgroupExclusiveAnd(%s);\n",
+            "%s     subgroupExclusiveOr(%s);\n",
+            "%s     subgroupExclusiveXor(%s);\n",
+            "%s     subgroupClusteredAdd(%s, uint);\n",
+            "%s     subgroupClusteredMul(%s, uint);\n",
+            "%s     subgroupClusteredMin(%s, uint);\n",
+            "%s     subgroupClusteredMax(%s, uint);\n",
+            "%s     subgroupClusteredAnd(%s, uint);\n",
+            "%s     subgroupClusteredOr(%s, uint);\n",
+            "%s     subgroupClusteredXor(%s, uint);\n",
+            "%s     subgroupQuadBroadcast(%s, uint);\n",
+            "%s     subgroupQuadSwapHorizontal(%s);\n",
+            "%s     subgroupQuadSwapVertical(%s);\n",
+            "%s     subgroupQuadSwapDiagonal(%s);\n",
+            "uvec4  subgroupPartitionNV(%s);\n",
+            "%s     subgroupPartitionedAddNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedMulNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedMinNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedMaxNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedAndNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedOrNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedXorNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedInclusiveAddNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedInclusiveMulNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedInclusiveMinNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedInclusiveMaxNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedInclusiveAndNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedInclusiveOrNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedInclusiveXorNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedExclusiveAddNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedExclusiveMulNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedExclusiveMinNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedExclusiveMaxNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedExclusiveAndNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedExclusiveOrNV(%s, uvec4 ballot);\n",
+            "%s     subgroupPartitionedExclusiveXorNV(%s, uvec4 ballot);\n",
+        };
 
-            "float  subgroupShuffleXor(float, uint);\n"
-            "vec2   subgroupShuffleXor(vec2, uint);\n"
-            "vec3   subgroupShuffleXor(vec3, uint);\n"
-            "vec4   subgroupShuffleXor(vec4, uint);\n"
-            "int    subgroupShuffleXor(int, uint);\n"
-            "ivec2  subgroupShuffleXor(ivec2, uint);\n"
-            "ivec3  subgroupShuffleXor(ivec3, uint);\n"
-            "ivec4  subgroupShuffleXor(ivec4, uint);\n"
-            "uint   subgroupShuffleXor(uint, uint);\n"
-            "uvec2  subgroupShuffleXor(uvec2, uint);\n"
-            "uvec3  subgroupShuffleXor(uvec3, uint);\n"
-            "uvec4  subgroupShuffleXor(uvec4, uint);\n"
-            "bool   subgroupShuffleXor(bool, uint);\n"
-            "bvec2  subgroupShuffleXor(bvec2, uint);\n"
-            "bvec3  subgroupShuffleXor(bvec3, uint);\n"
-            "bvec4  subgroupShuffleXor(bvec4, uint);\n"
+        static const char *floatTypes[] = { 
+            "float", "vec2", "vec3", "vec4", 
+            "float16_t", "f16vec2", "f16vec3", "f16vec4", 
+        };
+        static const char *doubleTypes[] = { 
+            "double", "dvec2", "dvec3", "dvec4", 
+        };
+        static const char *intTypes[] = { 
+            "int8_t", "i8vec2", "i8vec3", "i8vec4", 
+            "int16_t", "i16vec2", "i16vec3", "i16vec4", 
+            "int", "ivec2", "ivec3", "ivec4", 
+            "int64_t", "i64vec2", "i64vec3", "i64vec4", 
+            "uint8_t", "u8vec2", "u8vec3", "u8vec4", 
+            "uint16_t", "u16vec2", "u16vec3", "u16vec4", 
+            "uint", "uvec2", "uvec3", "uvec4", 
+            "uint64_t", "u64vec2", "u64vec3", "u64vec4", 
+        };
+        static const char *boolTypes[] = { 
+            "bool", "bvec2", "bvec3", "bvec4", 
+        };
 
-            "float  subgroupShuffleUp(float, uint delta);\n"
-            "vec2   subgroupShuffleUp(vec2, uint delta);\n"
-            "vec3   subgroupShuffleUp(vec3, uint delta);\n"
-            "vec4   subgroupShuffleUp(vec4, uint delta);\n"
-            "int    subgroupShuffleUp(int, uint delta);\n"
-            "ivec2  subgroupShuffleUp(ivec2, uint delta);\n"
-            "ivec3  subgroupShuffleUp(ivec3, uint delta);\n"
-            "ivec4  subgroupShuffleUp(ivec4, uint delta);\n"
-            "uint   subgroupShuffleUp(uint, uint delta);\n"
-            "uvec2  subgroupShuffleUp(uvec2, uint delta);\n"
-            "uvec3  subgroupShuffleUp(uvec3, uint delta);\n"
-            "uvec4  subgroupShuffleUp(uvec4, uint delta);\n"
-            "bool   subgroupShuffleUp(bool, uint delta);\n"
-            "bvec2  subgroupShuffleUp(bvec2, uint delta);\n"
-            "bvec3  subgroupShuffleUp(bvec3, uint delta);\n"
-            "bvec4  subgroupShuffleUp(bvec4, uint delta);\n"
+        for (size_t i = 0; i < sizeof(subgroupOps)/sizeof(subgroupOps[0]); ++i) {
+            const char *op = subgroupOps[i];
 
-            "float  subgroupShuffleDown(float, uint delta);\n"
-            "vec2   subgroupShuffleDown(vec2, uint delta);\n"
-            "vec3   subgroupShuffleDown(vec3, uint delta);\n"
-            "vec4   subgroupShuffleDown(vec4, uint delta);\n"
-            "int    subgroupShuffleDown(int, uint delta);\n"
-            "ivec2  subgroupShuffleDown(ivec2, uint delta);\n"
-            "ivec3  subgroupShuffleDown(ivec3, uint delta);\n"
-            "ivec4  subgroupShuffleDown(ivec4, uint delta);\n"
-            "uint   subgroupShuffleDown(uint, uint delta);\n"
-            "uvec2  subgroupShuffleDown(uvec2, uint delta);\n"
-            "uvec3  subgroupShuffleDown(uvec3, uint delta);\n"
-            "uvec4  subgroupShuffleDown(uvec4, uint delta);\n"
-            "bool   subgroupShuffleDown(bool, uint delta);\n"
-            "bvec2  subgroupShuffleDown(bvec2, uint delta);\n"
-            "bvec3  subgroupShuffleDown(bvec3, uint delta);\n"
-            "bvec4  subgroupShuffleDown(bvec4, uint delta);\n"
+            // Logical operations don't support float
+            bool logicalOp = strstr(op, "Or") || strstr(op, "And") ||
+                             (strstr(op, "Xor") && !strstr(op, "ShuffleXor"));
+            // Math operations don't support bool
+            bool mathOp = strstr(op, "Add") || strstr(op, "Mul") || strstr(op, "Min") || strstr(op, "Max");
 
-            "float  subgroupAdd(float);\n"
-            "vec2   subgroupAdd(vec2);\n"
-            "vec3   subgroupAdd(vec3);\n"
-            "vec4   subgroupAdd(vec4);\n"
-            "int    subgroupAdd(int);\n"
-            "ivec2  subgroupAdd(ivec2);\n"
-            "ivec3  subgroupAdd(ivec3);\n"
-            "ivec4  subgroupAdd(ivec4);\n"
-            "uint   subgroupAdd(uint);\n"
-            "uvec2  subgroupAdd(uvec2);\n"
-            "uvec3  subgroupAdd(uvec3);\n"
-            "uvec4  subgroupAdd(uvec4);\n"
+            const int bufSize = 256;
+            char buf[bufSize];
 
-            "float  subgroupMul(float);\n"
-            "vec2   subgroupMul(vec2);\n"
-            "vec3   subgroupMul(vec3);\n"
-            "vec4   subgroupMul(vec4);\n"
-            "int    subgroupMul(int);\n"
-            "ivec2  subgroupMul(ivec2);\n"
-            "ivec3  subgroupMul(ivec3);\n"
-            "ivec4  subgroupMul(ivec4);\n"
-            "uint   subgroupMul(uint);\n"
-            "uvec2  subgroupMul(uvec2);\n"
-            "uvec3  subgroupMul(uvec3);\n"
-            "uvec4  subgroupMul(uvec4);\n"
-
-            "float  subgroupMin(float);\n"
-            "vec2   subgroupMin(vec2);\n"
-            "vec3   subgroupMin(vec3);\n"
-            "vec4   subgroupMin(vec4);\n"
-            "int    subgroupMin(int);\n"
-            "ivec2  subgroupMin(ivec2);\n"
-            "ivec3  subgroupMin(ivec3);\n"
-            "ivec4  subgroupMin(ivec4);\n"
-            "uint   subgroupMin(uint);\n"
-            "uvec2  subgroupMin(uvec2);\n"
-            "uvec3  subgroupMin(uvec3);\n"
-            "uvec4  subgroupMin(uvec4);\n"
-
-            "float  subgroupMax(float);\n"
-            "vec2   subgroupMax(vec2);\n"
-            "vec3   subgroupMax(vec3);\n"
-            "vec4   subgroupMax(vec4);\n"
-            "int    subgroupMax(int);\n"
-            "ivec2  subgroupMax(ivec2);\n"
-            "ivec3  subgroupMax(ivec3);\n"
-            "ivec4  subgroupMax(ivec4);\n"
-            "uint   subgroupMax(uint);\n"
-            "uvec2  subgroupMax(uvec2);\n"
-            "uvec3  subgroupMax(uvec3);\n"
-            "uvec4  subgroupMax(uvec4);\n"
-
-            "int    subgroupAnd(int);\n"
-            "ivec2  subgroupAnd(ivec2);\n"
-            "ivec3  subgroupAnd(ivec3);\n"
-            "ivec4  subgroupAnd(ivec4);\n"
-            "uint   subgroupAnd(uint);\n"
-            "uvec2  subgroupAnd(uvec2);\n"
-            "uvec3  subgroupAnd(uvec3);\n"
-            "uvec4  subgroupAnd(uvec4);\n"
-            "bool   subgroupAnd(bool);\n"
-            "bvec2  subgroupAnd(bvec2);\n"
-            "bvec3  subgroupAnd(bvec3);\n"
-            "bvec4  subgroupAnd(bvec4);\n"
-
-            "int    subgroupOr(int);\n"
-            "ivec2  subgroupOr(ivec2);\n"
-            "ivec3  subgroupOr(ivec3);\n"
-            "ivec4  subgroupOr(ivec4);\n"
-            "uint   subgroupOr(uint);\n"
-            "uvec2  subgroupOr(uvec2);\n"
-            "uvec3  subgroupOr(uvec3);\n"
-            "uvec4  subgroupOr(uvec4);\n"
-            "bool   subgroupOr(bool);\n"
-            "bvec2  subgroupOr(bvec2);\n"
-            "bvec3  subgroupOr(bvec3);\n"
-            "bvec4  subgroupOr(bvec4);\n"
-
-            "int    subgroupXor(int);\n"
-            "ivec2  subgroupXor(ivec2);\n"
-            "ivec3  subgroupXor(ivec3);\n"
-            "ivec4  subgroupXor(ivec4);\n"
-            "uint   subgroupXor(uint);\n"
-            "uvec2  subgroupXor(uvec2);\n"
-            "uvec3  subgroupXor(uvec3);\n"
-            "uvec4  subgroupXor(uvec4);\n"
-            "bool   subgroupXor(bool);\n"
-            "bvec2  subgroupXor(bvec2);\n"
-            "bvec3  subgroupXor(bvec3);\n"
-            "bvec4  subgroupXor(bvec4);\n"
-
-            "float  subgroupInclusiveAdd(float);\n"
-            "vec2   subgroupInclusiveAdd(vec2);\n"
-            "vec3   subgroupInclusiveAdd(vec3);\n"
-            "vec4   subgroupInclusiveAdd(vec4);\n"
-            "int    subgroupInclusiveAdd(int);\n"
-            "ivec2  subgroupInclusiveAdd(ivec2);\n"
-            "ivec3  subgroupInclusiveAdd(ivec3);\n"
-            "ivec4  subgroupInclusiveAdd(ivec4);\n"
-            "uint   subgroupInclusiveAdd(uint);\n"
-            "uvec2  subgroupInclusiveAdd(uvec2);\n"
-            "uvec3  subgroupInclusiveAdd(uvec3);\n"
-            "uvec4  subgroupInclusiveAdd(uvec4);\n"
-
-            "float  subgroupInclusiveMul(float);\n"
-            "vec2   subgroupInclusiveMul(vec2);\n"
-            "vec3   subgroupInclusiveMul(vec3);\n"
-            "vec4   subgroupInclusiveMul(vec4);\n"
-            "int    subgroupInclusiveMul(int);\n"
-            "ivec2  subgroupInclusiveMul(ivec2);\n"
-            "ivec3  subgroupInclusiveMul(ivec3);\n"
-            "ivec4  subgroupInclusiveMul(ivec4);\n"
-            "uint   subgroupInclusiveMul(uint);\n"
-            "uvec2  subgroupInclusiveMul(uvec2);\n"
-            "uvec3  subgroupInclusiveMul(uvec3);\n"
-            "uvec4  subgroupInclusiveMul(uvec4);\n"
-
-            "float  subgroupInclusiveMin(float);\n"
-            "vec2   subgroupInclusiveMin(vec2);\n"
-            "vec3   subgroupInclusiveMin(vec3);\n"
-            "vec4   subgroupInclusiveMin(vec4);\n"
-            "int    subgroupInclusiveMin(int);\n"
-            "ivec2  subgroupInclusiveMin(ivec2);\n"
-            "ivec3  subgroupInclusiveMin(ivec3);\n"
-            "ivec4  subgroupInclusiveMin(ivec4);\n"
-            "uint   subgroupInclusiveMin(uint);\n"
-            "uvec2  subgroupInclusiveMin(uvec2);\n"
-            "uvec3  subgroupInclusiveMin(uvec3);\n"
-            "uvec4  subgroupInclusiveMin(uvec4);\n"
-
-            "float  subgroupInclusiveMax(float);\n"
-            "vec2   subgroupInclusiveMax(vec2);\n"
-            "vec3   subgroupInclusiveMax(vec3);\n"
-            "vec4   subgroupInclusiveMax(vec4);\n"
-            "int    subgroupInclusiveMax(int);\n"
-            "ivec2  subgroupInclusiveMax(ivec2);\n"
-            "ivec3  subgroupInclusiveMax(ivec3);\n"
-            "ivec4  subgroupInclusiveMax(ivec4);\n"
-            "uint   subgroupInclusiveMax(uint);\n"
-            "uvec2  subgroupInclusiveMax(uvec2);\n"
-            "uvec3  subgroupInclusiveMax(uvec3);\n"
-            "uvec4  subgroupInclusiveMax(uvec4);\n"
-
-            "int    subgroupInclusiveAnd(int);\n"
-            "ivec2  subgroupInclusiveAnd(ivec2);\n"
-            "ivec3  subgroupInclusiveAnd(ivec3);\n"
-            "ivec4  subgroupInclusiveAnd(ivec4);\n"
-            "uint   subgroupInclusiveAnd(uint);\n"
-            "uvec2  subgroupInclusiveAnd(uvec2);\n"
-            "uvec3  subgroupInclusiveAnd(uvec3);\n"
-            "uvec4  subgroupInclusiveAnd(uvec4);\n"
-            "bool   subgroupInclusiveAnd(bool);\n"
-            "bvec2  subgroupInclusiveAnd(bvec2);\n"
-            "bvec3  subgroupInclusiveAnd(bvec3);\n"
-            "bvec4  subgroupInclusiveAnd(bvec4);\n"
-
-            "int    subgroupInclusiveOr(int);\n"
-            "ivec2  subgroupInclusiveOr(ivec2);\n"
-            "ivec3  subgroupInclusiveOr(ivec3);\n"
-            "ivec4  subgroupInclusiveOr(ivec4);\n"
-            "uint   subgroupInclusiveOr(uint);\n"
-            "uvec2  subgroupInclusiveOr(uvec2);\n"
-            "uvec3  subgroupInclusiveOr(uvec3);\n"
-            "uvec4  subgroupInclusiveOr(uvec4);\n"
-            "bool   subgroupInclusiveOr(bool);\n"
-            "bvec2  subgroupInclusiveOr(bvec2);\n"
-            "bvec3  subgroupInclusiveOr(bvec3);\n"
-            "bvec4  subgroupInclusiveOr(bvec4);\n"
-
-            "int    subgroupInclusiveXor(int);\n"
-            "ivec2  subgroupInclusiveXor(ivec2);\n"
-            "ivec3  subgroupInclusiveXor(ivec3);\n"
-            "ivec4  subgroupInclusiveXor(ivec4);\n"
-            "uint   subgroupInclusiveXor(uint);\n"
-            "uvec2  subgroupInclusiveXor(uvec2);\n"
-            "uvec3  subgroupInclusiveXor(uvec3);\n"
-            "uvec4  subgroupInclusiveXor(uvec4);\n"
-            "bool   subgroupInclusiveXor(bool);\n"
-            "bvec2  subgroupInclusiveXor(bvec2);\n"
-            "bvec3  subgroupInclusiveXor(bvec3);\n"
-            "bvec4  subgroupInclusiveXor(bvec4);\n"
-
-            "float  subgroupExclusiveAdd(float);\n"
-            "vec2   subgroupExclusiveAdd(vec2);\n"
-            "vec3   subgroupExclusiveAdd(vec3);\n"
-            "vec4   subgroupExclusiveAdd(vec4);\n"
-            "int    subgroupExclusiveAdd(int);\n"
-            "ivec2  subgroupExclusiveAdd(ivec2);\n"
-            "ivec3  subgroupExclusiveAdd(ivec3);\n"
-            "ivec4  subgroupExclusiveAdd(ivec4);\n"
-            "uint   subgroupExclusiveAdd(uint);\n"
-            "uvec2  subgroupExclusiveAdd(uvec2);\n"
-            "uvec3  subgroupExclusiveAdd(uvec3);\n"
-            "uvec4  subgroupExclusiveAdd(uvec4);\n"
-
-            "float  subgroupExclusiveMul(float);\n"
-            "vec2   subgroupExclusiveMul(vec2);\n"
-            "vec3   subgroupExclusiveMul(vec3);\n"
-            "vec4   subgroupExclusiveMul(vec4);\n"
-            "int    subgroupExclusiveMul(int);\n"
-            "ivec2  subgroupExclusiveMul(ivec2);\n"
-            "ivec3  subgroupExclusiveMul(ivec3);\n"
-            "ivec4  subgroupExclusiveMul(ivec4);\n"
-            "uint   subgroupExclusiveMul(uint);\n"
-            "uvec2  subgroupExclusiveMul(uvec2);\n"
-            "uvec3  subgroupExclusiveMul(uvec3);\n"
-            "uvec4  subgroupExclusiveMul(uvec4);\n"
-
-            "float  subgroupExclusiveMin(float);\n"
-            "vec2   subgroupExclusiveMin(vec2);\n"
-            "vec3   subgroupExclusiveMin(vec3);\n"
-            "vec4   subgroupExclusiveMin(vec4);\n"
-            "int    subgroupExclusiveMin(int);\n"
-            "ivec2  subgroupExclusiveMin(ivec2);\n"
-            "ivec3  subgroupExclusiveMin(ivec3);\n"
-            "ivec4  subgroupExclusiveMin(ivec4);\n"
-            "uint   subgroupExclusiveMin(uint);\n"
-            "uvec2  subgroupExclusiveMin(uvec2);\n"
-            "uvec3  subgroupExclusiveMin(uvec3);\n"
-            "uvec4  subgroupExclusiveMin(uvec4);\n"
-
-            "float  subgroupExclusiveMax(float);\n"
-            "vec2   subgroupExclusiveMax(vec2);\n"
-            "vec3   subgroupExclusiveMax(vec3);\n"
-            "vec4   subgroupExclusiveMax(vec4);\n"
-            "int    subgroupExclusiveMax(int);\n"
-            "ivec2  subgroupExclusiveMax(ivec2);\n"
-            "ivec3  subgroupExclusiveMax(ivec3);\n"
-            "ivec4  subgroupExclusiveMax(ivec4);\n"
-            "uint   subgroupExclusiveMax(uint);\n"
-            "uvec2  subgroupExclusiveMax(uvec2);\n"
-            "uvec3  subgroupExclusiveMax(uvec3);\n"
-            "uvec4  subgroupExclusiveMax(uvec4);\n"
-
-            "int    subgroupExclusiveAnd(int);\n"
-            "ivec2  subgroupExclusiveAnd(ivec2);\n"
-            "ivec3  subgroupExclusiveAnd(ivec3);\n"
-            "ivec4  subgroupExclusiveAnd(ivec4);\n"
-            "uint   subgroupExclusiveAnd(uint);\n"
-            "uvec2  subgroupExclusiveAnd(uvec2);\n"
-            "uvec3  subgroupExclusiveAnd(uvec3);\n"
-            "uvec4  subgroupExclusiveAnd(uvec4);\n"
-            "bool   subgroupExclusiveAnd(bool);\n"
-            "bvec2  subgroupExclusiveAnd(bvec2);\n"
-            "bvec3  subgroupExclusiveAnd(bvec3);\n"
-            "bvec4  subgroupExclusiveAnd(bvec4);\n"
-
-            "int    subgroupExclusiveOr(int);\n"
-            "ivec2  subgroupExclusiveOr(ivec2);\n"
-            "ivec3  subgroupExclusiveOr(ivec3);\n"
-            "ivec4  subgroupExclusiveOr(ivec4);\n"
-            "uint   subgroupExclusiveOr(uint);\n"
-            "uvec2  subgroupExclusiveOr(uvec2);\n"
-            "uvec3  subgroupExclusiveOr(uvec3);\n"
-            "uvec4  subgroupExclusiveOr(uvec4);\n"
-            "bool   subgroupExclusiveOr(bool);\n"
-            "bvec2  subgroupExclusiveOr(bvec2);\n"
-            "bvec3  subgroupExclusiveOr(bvec3);\n"
-            "bvec4  subgroupExclusiveOr(bvec4);\n"
-
-            "int    subgroupExclusiveXor(int);\n"
-            "ivec2  subgroupExclusiveXor(ivec2);\n"
-            "ivec3  subgroupExclusiveXor(ivec3);\n"
-            "ivec4  subgroupExclusiveXor(ivec4);\n"
-            "uint   subgroupExclusiveXor(uint);\n"
-            "uvec2  subgroupExclusiveXor(uvec2);\n"
-            "uvec3  subgroupExclusiveXor(uvec3);\n"
-            "uvec4  subgroupExclusiveXor(uvec4);\n"
-            "bool   subgroupExclusiveXor(bool);\n"
-            "bvec2  subgroupExclusiveXor(bvec2);\n"
-            "bvec3  subgroupExclusiveXor(bvec3);\n"
-            "bvec4  subgroupExclusiveXor(bvec4);\n"
-
-            "float  subgroupClusteredAdd(float, uint);\n"
-            "vec2   subgroupClusteredAdd(vec2, uint);\n"
-            "vec3   subgroupClusteredAdd(vec3, uint);\n"
-            "vec4   subgroupClusteredAdd(vec4, uint);\n"
-            "int    subgroupClusteredAdd(int, uint);\n"
-            "ivec2  subgroupClusteredAdd(ivec2, uint);\n"
-            "ivec3  subgroupClusteredAdd(ivec3, uint);\n"
-            "ivec4  subgroupClusteredAdd(ivec4, uint);\n"
-            "uint   subgroupClusteredAdd(uint, uint);\n"
-            "uvec2  subgroupClusteredAdd(uvec2, uint);\n"
-            "uvec3  subgroupClusteredAdd(uvec3, uint);\n"
-            "uvec4  subgroupClusteredAdd(uvec4, uint);\n"
-
-            "float  subgroupClusteredMul(float, uint);\n"
-            "vec2   subgroupClusteredMul(vec2, uint);\n"
-            "vec3   subgroupClusteredMul(vec3, uint);\n"
-            "vec4   subgroupClusteredMul(vec4, uint);\n"
-            "int    subgroupClusteredMul(int, uint);\n"
-            "ivec2  subgroupClusteredMul(ivec2, uint);\n"
-            "ivec3  subgroupClusteredMul(ivec3, uint);\n"
-            "ivec4  subgroupClusteredMul(ivec4, uint);\n"
-            "uint   subgroupClusteredMul(uint, uint);\n"
-            "uvec2  subgroupClusteredMul(uvec2, uint);\n"
-            "uvec3  subgroupClusteredMul(uvec3, uint);\n"
-            "uvec4  subgroupClusteredMul(uvec4, uint);\n"
-
-            "float  subgroupClusteredMin(float, uint);\n"
-            "vec2   subgroupClusteredMin(vec2, uint);\n"
-            "vec3   subgroupClusteredMin(vec3, uint);\n"
-            "vec4   subgroupClusteredMin(vec4, uint);\n"
-            "int    subgroupClusteredMin(int, uint);\n"
-            "ivec2  subgroupClusteredMin(ivec2, uint);\n"
-            "ivec3  subgroupClusteredMin(ivec3, uint);\n"
-            "ivec4  subgroupClusteredMin(ivec4, uint);\n"
-            "uint   subgroupClusteredMin(uint, uint);\n"
-            "uvec2  subgroupClusteredMin(uvec2, uint);\n"
-            "uvec3  subgroupClusteredMin(uvec3, uint);\n"
-            "uvec4  subgroupClusteredMin(uvec4, uint);\n"
-
-            "float  subgroupClusteredMax(float, uint);\n"
-            "vec2   subgroupClusteredMax(vec2, uint);\n"
-            "vec3   subgroupClusteredMax(vec3, uint);\n"
-            "vec4   subgroupClusteredMax(vec4, uint);\n"
-            "int    subgroupClusteredMax(int, uint);\n"
-            "ivec2  subgroupClusteredMax(ivec2, uint);\n"
-            "ivec3  subgroupClusteredMax(ivec3, uint);\n"
-            "ivec4  subgroupClusteredMax(ivec4, uint);\n"
-            "uint   subgroupClusteredMax(uint, uint);\n"
-            "uvec2  subgroupClusteredMax(uvec2, uint);\n"
-            "uvec3  subgroupClusteredMax(uvec3, uint);\n"
-            "uvec4  subgroupClusteredMax(uvec4, uint);\n"
-
-            "int    subgroupClusteredAnd(int, uint);\n"
-            "ivec2  subgroupClusteredAnd(ivec2, uint);\n"
-            "ivec3  subgroupClusteredAnd(ivec3, uint);\n"
-            "ivec4  subgroupClusteredAnd(ivec4, uint);\n"
-            "uint   subgroupClusteredAnd(uint, uint);\n"
-            "uvec2  subgroupClusteredAnd(uvec2, uint);\n"
-            "uvec3  subgroupClusteredAnd(uvec3, uint);\n"
-            "uvec4  subgroupClusteredAnd(uvec4, uint);\n"
-            "bool   subgroupClusteredAnd(bool, uint);\n"
-            "bvec2  subgroupClusteredAnd(bvec2, uint);\n"
-            "bvec3  subgroupClusteredAnd(bvec3, uint);\n"
-            "bvec4  subgroupClusteredAnd(bvec4, uint);\n"
-
-            "int    subgroupClusteredOr(int, uint);\n"
-            "ivec2  subgroupClusteredOr(ivec2, uint);\n"
-            "ivec3  subgroupClusteredOr(ivec3, uint);\n"
-            "ivec4  subgroupClusteredOr(ivec4, uint);\n"
-            "uint   subgroupClusteredOr(uint, uint);\n"
-            "uvec2  subgroupClusteredOr(uvec2, uint);\n"
-            "uvec3  subgroupClusteredOr(uvec3, uint);\n"
-            "uvec4  subgroupClusteredOr(uvec4, uint);\n"
-            "bool   subgroupClusteredOr(bool, uint);\n"
-            "bvec2  subgroupClusteredOr(bvec2, uint);\n"
-            "bvec3  subgroupClusteredOr(bvec3, uint);\n"
-            "bvec4  subgroupClusteredOr(bvec4, uint);\n"
-
-            "int    subgroupClusteredXor(int, uint);\n"
-            "ivec2  subgroupClusteredXor(ivec2, uint);\n"
-            "ivec3  subgroupClusteredXor(ivec3, uint);\n"
-            "ivec4  subgroupClusteredXor(ivec4, uint);\n"
-            "uint   subgroupClusteredXor(uint, uint);\n"
-            "uvec2  subgroupClusteredXor(uvec2, uint);\n"
-            "uvec3  subgroupClusteredXor(uvec3, uint);\n"
-            "uvec4  subgroupClusteredXor(uvec4, uint);\n"
-            "bool   subgroupClusteredXor(bool, uint);\n"
-            "bvec2  subgroupClusteredXor(bvec2, uint);\n"
-            "bvec3  subgroupClusteredXor(bvec3, uint);\n"
-            "bvec4  subgroupClusteredXor(bvec4, uint);\n"
-
-            "float  subgroupQuadBroadcast(float, uint);\n"
-            "vec2   subgroupQuadBroadcast(vec2, uint);\n"
-            "vec3   subgroupQuadBroadcast(vec3, uint);\n"
-            "vec4   subgroupQuadBroadcast(vec4, uint);\n"
-            "int    subgroupQuadBroadcast(int, uint);\n"
-            "ivec2  subgroupQuadBroadcast(ivec2, uint);\n"
-            "ivec3  subgroupQuadBroadcast(ivec3, uint);\n"
-            "ivec4  subgroupQuadBroadcast(ivec4, uint);\n"
-            "uint   subgroupQuadBroadcast(uint, uint);\n"
-            "uvec2  subgroupQuadBroadcast(uvec2, uint);\n"
-            "uvec3  subgroupQuadBroadcast(uvec3, uint);\n"
-            "uvec4  subgroupQuadBroadcast(uvec4, uint);\n"
-            "bool   subgroupQuadBroadcast(bool, uint);\n"
-            "bvec2  subgroupQuadBroadcast(bvec2, uint);\n"
-            "bvec3  subgroupQuadBroadcast(bvec3, uint);\n"
-            "bvec4  subgroupQuadBroadcast(bvec4, uint);\n"
-
-            "float  subgroupQuadSwapHorizontal(float);\n"
-            "vec2   subgroupQuadSwapHorizontal(vec2);\n"
-            "vec3   subgroupQuadSwapHorizontal(vec3);\n"
-            "vec4   subgroupQuadSwapHorizontal(vec4);\n"
-            "int    subgroupQuadSwapHorizontal(int);\n"
-            "ivec2  subgroupQuadSwapHorizontal(ivec2);\n"
-            "ivec3  subgroupQuadSwapHorizontal(ivec3);\n"
-            "ivec4  subgroupQuadSwapHorizontal(ivec4);\n"
-            "uint   subgroupQuadSwapHorizontal(uint);\n"
-            "uvec2  subgroupQuadSwapHorizontal(uvec2);\n"
-            "uvec3  subgroupQuadSwapHorizontal(uvec3);\n"
-            "uvec4  subgroupQuadSwapHorizontal(uvec4);\n"
-            "bool   subgroupQuadSwapHorizontal(bool);\n"
-            "bvec2  subgroupQuadSwapHorizontal(bvec2);\n"
-            "bvec3  subgroupQuadSwapHorizontal(bvec3);\n"
-            "bvec4  subgroupQuadSwapHorizontal(bvec4);\n"
-
-            "float  subgroupQuadSwapVertical(float);\n"
-            "vec2   subgroupQuadSwapVertical(vec2);\n"
-            "vec3   subgroupQuadSwapVertical(vec3);\n"
-            "vec4   subgroupQuadSwapVertical(vec4);\n"
-            "int    subgroupQuadSwapVertical(int);\n"
-            "ivec2  subgroupQuadSwapVertical(ivec2);\n"
-            "ivec3  subgroupQuadSwapVertical(ivec3);\n"
-            "ivec4  subgroupQuadSwapVertical(ivec4);\n"
-            "uint   subgroupQuadSwapVertical(uint);\n"
-            "uvec2  subgroupQuadSwapVertical(uvec2);\n"
-            "uvec3  subgroupQuadSwapVertical(uvec3);\n"
-            "uvec4  subgroupQuadSwapVertical(uvec4);\n"
-            "bool   subgroupQuadSwapVertical(bool);\n"
-            "bvec2  subgroupQuadSwapVertical(bvec2);\n"
-            "bvec3  subgroupQuadSwapVertical(bvec3);\n"
-            "bvec4  subgroupQuadSwapVertical(bvec4);\n"
-
-            "float  subgroupQuadSwapDiagonal(float);\n"
-            "vec2   subgroupQuadSwapDiagonal(vec2);\n"
-            "vec3   subgroupQuadSwapDiagonal(vec3);\n"
-            "vec4   subgroupQuadSwapDiagonal(vec4);\n"
-            "int    subgroupQuadSwapDiagonal(int);\n"
-            "ivec2  subgroupQuadSwapDiagonal(ivec2);\n"
-            "ivec3  subgroupQuadSwapDiagonal(ivec3);\n"
-            "ivec4  subgroupQuadSwapDiagonal(ivec4);\n"
-            "uint   subgroupQuadSwapDiagonal(uint);\n"
-            "uvec2  subgroupQuadSwapDiagonal(uvec2);\n"
-            "uvec3  subgroupQuadSwapDiagonal(uvec3);\n"
-            "uvec4  subgroupQuadSwapDiagonal(uvec4);\n"
-            "bool   subgroupQuadSwapDiagonal(bool);\n"
-            "bvec2  subgroupQuadSwapDiagonal(bvec2);\n"
-            "bvec3  subgroupQuadSwapDiagonal(bvec3);\n"
-            "bvec4  subgroupQuadSwapDiagonal(bvec4);\n"
-
-            "uvec4  subgroupPartitionNV(float);\n"
-            "uvec4  subgroupPartitionNV(vec2);\n"
-            "uvec4  subgroupPartitionNV(vec3);\n"
-            "uvec4  subgroupPartitionNV(vec4);\n"
-            "uvec4  subgroupPartitionNV(int);\n"
-            "uvec4  subgroupPartitionNV(ivec2);\n"
-            "uvec4  subgroupPartitionNV(ivec3);\n"
-            "uvec4  subgroupPartitionNV(ivec4);\n"
-            "uvec4  subgroupPartitionNV(uint);\n"
-            "uvec4  subgroupPartitionNV(uvec2);\n"
-            "uvec4  subgroupPartitionNV(uvec3);\n"
-            "uvec4  subgroupPartitionNV(uvec4);\n"
-            "uvec4  subgroupPartitionNV(bool);\n"
-            "uvec4  subgroupPartitionNV(bvec2);\n"
-            "uvec4  subgroupPartitionNV(bvec3);\n"
-            "uvec4  subgroupPartitionNV(bvec4);\n"
-
-            "float  subgroupPartitionedAddNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedAddNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedAddNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedAddNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedAddNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedAddNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedAddNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedAddNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedAddNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedAddNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedAddNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedAddNV(uvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedMulNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedMulNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedMulNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedMulNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedMulNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedMulNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedMulNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedMulNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedMulNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedMulNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedMulNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedMulNV(uvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedMinNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedMinNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedMinNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedMinNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedMinNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedMinNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedMinNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedMinNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedMinNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedMinNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedMinNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedMinNV(uvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedMaxNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedMaxNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedMaxNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedMaxNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedMaxNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedMaxNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedMaxNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedMaxNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedMaxNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedMaxNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedMaxNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedMaxNV(uvec4, uvec4 ballot);\n"
-
-            "int    subgroupPartitionedAndNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedAndNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedAndNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedAndNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedAndNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedAndNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedAndNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedAndNV(uvec4, uvec4 ballot);\n"
-            "bool   subgroupPartitionedAndNV(bool, uvec4 ballot);\n"
-            "bvec2  subgroupPartitionedAndNV(bvec2, uvec4 ballot);\n"
-            "bvec3  subgroupPartitionedAndNV(bvec3, uvec4 ballot);\n"
-            "bvec4  subgroupPartitionedAndNV(bvec4, uvec4 ballot);\n"
-
-            "int    subgroupPartitionedOrNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedOrNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedOrNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedOrNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedOrNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedOrNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedOrNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedOrNV(uvec4, uvec4 ballot);\n"
-            "bool   subgroupPartitionedOrNV(bool, uvec4 ballot);\n"
-            "bvec2  subgroupPartitionedOrNV(bvec2, uvec4 ballot);\n"
-            "bvec3  subgroupPartitionedOrNV(bvec3, uvec4 ballot);\n"
-            "bvec4  subgroupPartitionedOrNV(bvec4, uvec4 ballot);\n"
-
-            "int    subgroupPartitionedXorNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedXorNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedXorNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedXorNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedXorNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedXorNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedXorNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedXorNV(uvec4, uvec4 ballot);\n"
-            "bool   subgroupPartitionedXorNV(bool, uvec4 ballot);\n"
-            "bvec2  subgroupPartitionedXorNV(bvec2, uvec4 ballot);\n"
-            "bvec3  subgroupPartitionedXorNV(bvec3, uvec4 ballot);\n"
-            "bvec4  subgroupPartitionedXorNV(bvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedInclusiveAddNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedInclusiveAddNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedInclusiveAddNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedInclusiveAddNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedInclusiveAddNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedInclusiveAddNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedInclusiveAddNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedInclusiveAddNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedInclusiveAddNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedInclusiveAddNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedInclusiveAddNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedInclusiveAddNV(uvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedInclusiveMulNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedInclusiveMulNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedInclusiveMulNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedInclusiveMulNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedInclusiveMulNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedInclusiveMulNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedInclusiveMulNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedInclusiveMulNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedInclusiveMulNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedInclusiveMulNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedInclusiveMulNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedInclusiveMulNV(uvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedInclusiveMinNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedInclusiveMinNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedInclusiveMinNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedInclusiveMinNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedInclusiveMinNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedInclusiveMinNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedInclusiveMinNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedInclusiveMinNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedInclusiveMinNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedInclusiveMinNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedInclusiveMinNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedInclusiveMinNV(uvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedInclusiveMaxNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedInclusiveMaxNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedInclusiveMaxNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedInclusiveMaxNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedInclusiveMaxNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedInclusiveMaxNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedInclusiveMaxNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedInclusiveMaxNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedInclusiveMaxNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedInclusiveMaxNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedInclusiveMaxNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedInclusiveMaxNV(uvec4, uvec4 ballot);\n"
-
-            "int    subgroupPartitionedInclusiveAndNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedInclusiveAndNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedInclusiveAndNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedInclusiveAndNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedInclusiveAndNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedInclusiveAndNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedInclusiveAndNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedInclusiveAndNV(uvec4, uvec4 ballot);\n"
-            "bool   subgroupPartitionedInclusiveAndNV(bool, uvec4 ballot);\n"
-            "bvec2  subgroupPartitionedInclusiveAndNV(bvec2, uvec4 ballot);\n"
-            "bvec3  subgroupPartitionedInclusiveAndNV(bvec3, uvec4 ballot);\n"
-            "bvec4  subgroupPartitionedInclusiveAndNV(bvec4, uvec4 ballot);\n"
-
-            "int    subgroupPartitionedInclusiveOrNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedInclusiveOrNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedInclusiveOrNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedInclusiveOrNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedInclusiveOrNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedInclusiveOrNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedInclusiveOrNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedInclusiveOrNV(uvec4, uvec4 ballot);\n"
-            "bool   subgroupPartitionedInclusiveOrNV(bool, uvec4 ballot);\n"
-            "bvec2  subgroupPartitionedInclusiveOrNV(bvec2, uvec4 ballot);\n"
-            "bvec3  subgroupPartitionedInclusiveOrNV(bvec3, uvec4 ballot);\n"
-            "bvec4  subgroupPartitionedInclusiveOrNV(bvec4, uvec4 ballot);\n"
-
-            "int    subgroupPartitionedInclusiveXorNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedInclusiveXorNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedInclusiveXorNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedInclusiveXorNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedInclusiveXorNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedInclusiveXorNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedInclusiveXorNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedInclusiveXorNV(uvec4, uvec4 ballot);\n"
-            "bool   subgroupPartitionedInclusiveXorNV(bool, uvec4 ballot);\n"
-            "bvec2  subgroupPartitionedInclusiveXorNV(bvec2, uvec4 ballot);\n"
-            "bvec3  subgroupPartitionedInclusiveXorNV(bvec3, uvec4 ballot);\n"
-            "bvec4  subgroupPartitionedInclusiveXorNV(bvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedExclusiveAddNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedExclusiveAddNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedExclusiveAddNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedExclusiveAddNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedExclusiveAddNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedExclusiveAddNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedExclusiveAddNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedExclusiveAddNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedExclusiveAddNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedExclusiveAddNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedExclusiveAddNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedExclusiveAddNV(uvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedExclusiveMulNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedExclusiveMulNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedExclusiveMulNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedExclusiveMulNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedExclusiveMulNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedExclusiveMulNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedExclusiveMulNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedExclusiveMulNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedExclusiveMulNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedExclusiveMulNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedExclusiveMulNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedExclusiveMulNV(uvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedExclusiveMinNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedExclusiveMinNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedExclusiveMinNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedExclusiveMinNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedExclusiveMinNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedExclusiveMinNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedExclusiveMinNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedExclusiveMinNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedExclusiveMinNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedExclusiveMinNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedExclusiveMinNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedExclusiveMinNV(uvec4, uvec4 ballot);\n"
-
-            "float  subgroupPartitionedExclusiveMaxNV(float, uvec4 ballot);\n"
-            "vec2   subgroupPartitionedExclusiveMaxNV(vec2, uvec4 ballot);\n"
-            "vec3   subgroupPartitionedExclusiveMaxNV(vec3, uvec4 ballot);\n"
-            "vec4   subgroupPartitionedExclusiveMaxNV(vec4, uvec4 ballot);\n"
-            "int    subgroupPartitionedExclusiveMaxNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedExclusiveMaxNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedExclusiveMaxNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedExclusiveMaxNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedExclusiveMaxNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedExclusiveMaxNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedExclusiveMaxNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedExclusiveMaxNV(uvec4, uvec4 ballot);\n"
-
-            "int    subgroupPartitionedExclusiveAndNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedExclusiveAndNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedExclusiveAndNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedExclusiveAndNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedExclusiveAndNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedExclusiveAndNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedExclusiveAndNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedExclusiveAndNV(uvec4, uvec4 ballot);\n"
-            "bool   subgroupPartitionedExclusiveAndNV(bool, uvec4 ballot);\n"
-            "bvec2  subgroupPartitionedExclusiveAndNV(bvec2, uvec4 ballot);\n"
-            "bvec3  subgroupPartitionedExclusiveAndNV(bvec3, uvec4 ballot);\n"
-            "bvec4  subgroupPartitionedExclusiveAndNV(bvec4, uvec4 ballot);\n"
-
-            "int    subgroupPartitionedExclusiveOrNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedExclusiveOrNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedExclusiveOrNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedExclusiveOrNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedExclusiveOrNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedExclusiveOrNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedExclusiveOrNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedExclusiveOrNV(uvec4, uvec4 ballot);\n"
-            "bool   subgroupPartitionedExclusiveOrNV(bool, uvec4 ballot);\n"
-            "bvec2  subgroupPartitionedExclusiveOrNV(bvec2, uvec4 ballot);\n"
-            "bvec3  subgroupPartitionedExclusiveOrNV(bvec3, uvec4 ballot);\n"
-            "bvec4  subgroupPartitionedExclusiveOrNV(bvec4, uvec4 ballot);\n"
-
-            "int    subgroupPartitionedExclusiveXorNV(int, uvec4 ballot);\n"
-            "ivec2  subgroupPartitionedExclusiveXorNV(ivec2, uvec4 ballot);\n"
-            "ivec3  subgroupPartitionedExclusiveXorNV(ivec3, uvec4 ballot);\n"
-            "ivec4  subgroupPartitionedExclusiveXorNV(ivec4, uvec4 ballot);\n"
-            "uint   subgroupPartitionedExclusiveXorNV(uint, uvec4 ballot);\n"
-            "uvec2  subgroupPartitionedExclusiveXorNV(uvec2, uvec4 ballot);\n"
-            "uvec3  subgroupPartitionedExclusiveXorNV(uvec3, uvec4 ballot);\n"
-            "uvec4  subgroupPartitionedExclusiveXorNV(uvec4, uvec4 ballot);\n"
-            "bool   subgroupPartitionedExclusiveXorNV(bool, uvec4 ballot);\n"
-            "bvec2  subgroupPartitionedExclusiveXorNV(bvec2, uvec4 ballot);\n"
-            "bvec3  subgroupPartitionedExclusiveXorNV(bvec3, uvec4 ballot);\n"
-            "bvec4  subgroupPartitionedExclusiveXorNV(bvec4, uvec4 ballot);\n"
-            "\n");
-
-        if (profile != EEsProfile && version >= 400) {
-            commonBuiltins.append(
-                "bool   subgroupAllEqual(double);\n"
-                "bool   subgroupAllEqual(dvec2);\n"
-                "bool   subgroupAllEqual(dvec3);\n"
-                "bool   subgroupAllEqual(dvec4);\n"
-
-                "double subgroupBroadcast(double, uint);\n"
-                "dvec2  subgroupBroadcast(dvec2, uint);\n"
-                "dvec3  subgroupBroadcast(dvec3, uint);\n"
-                "dvec4  subgroupBroadcast(dvec4, uint);\n"
-
-                "double subgroupBroadcastFirst(double);\n"
-                "dvec2  subgroupBroadcastFirst(dvec2);\n"
-                "dvec3  subgroupBroadcastFirst(dvec3);\n"
-                "dvec4  subgroupBroadcastFirst(dvec4);\n"
-
-                "double subgroupShuffle(double, uint);\n"
-                "dvec2  subgroupShuffle(dvec2, uint);\n"
-                "dvec3  subgroupShuffle(dvec3, uint);\n"
-                "dvec4  subgroupShuffle(dvec4, uint);\n"
-
-                "double subgroupShuffleXor(double, uint);\n"
-                "dvec2  subgroupShuffleXor(dvec2, uint);\n"
-                "dvec3  subgroupShuffleXor(dvec3, uint);\n"
-                "dvec4  subgroupShuffleXor(dvec4, uint);\n"
-
-                "double subgroupShuffleUp(double, uint delta);\n"
-                "dvec2  subgroupShuffleUp(dvec2, uint delta);\n"
-                "dvec3  subgroupShuffleUp(dvec3, uint delta);\n"
-                "dvec4  subgroupShuffleUp(dvec4, uint delta);\n"
-
-                "double subgroupShuffleDown(double, uint delta);\n"
-                "dvec2  subgroupShuffleDown(dvec2, uint delta);\n"
-                "dvec3  subgroupShuffleDown(dvec3, uint delta);\n"
-                "dvec4  subgroupShuffleDown(dvec4, uint delta);\n"
-
-                "double subgroupAdd(double);\n"
-                "dvec2  subgroupAdd(dvec2);\n"
-                "dvec3  subgroupAdd(dvec3);\n"
-                "dvec4  subgroupAdd(dvec4);\n"
-
-                "double subgroupMul(double);\n"
-                "dvec2  subgroupMul(dvec2);\n"
-                "dvec3  subgroupMul(dvec3);\n"
-                "dvec4  subgroupMul(dvec4);\n"
-
-                "double subgroupMin(double);\n"
-                "dvec2  subgroupMin(dvec2);\n"
-                "dvec3  subgroupMin(dvec3);\n"
-                "dvec4  subgroupMin(dvec4);\n"
-
-                "double subgroupMax(double);\n"
-                "dvec2  subgroupMax(dvec2);\n"
-                "dvec3  subgroupMax(dvec3);\n"
-                "dvec4  subgroupMax(dvec4);\n"
-
-                "double subgroupInclusiveAdd(double);\n"
-                "dvec2  subgroupInclusiveAdd(dvec2);\n"
-                "dvec3  subgroupInclusiveAdd(dvec3);\n"
-                "dvec4  subgroupInclusiveAdd(dvec4);\n"
-
-                "double subgroupInclusiveMul(double);\n"
-                "dvec2  subgroupInclusiveMul(dvec2);\n"
-                "dvec3  subgroupInclusiveMul(dvec3);\n"
-                "dvec4  subgroupInclusiveMul(dvec4);\n"
-
-                "double subgroupInclusiveMin(double);\n"
-                "dvec2  subgroupInclusiveMin(dvec2);\n"
-                "dvec3  subgroupInclusiveMin(dvec3);\n"
-                "dvec4  subgroupInclusiveMin(dvec4);\n"
-
-                "double subgroupInclusiveMax(double);\n"
-                "dvec2  subgroupInclusiveMax(dvec2);\n"
-                "dvec3  subgroupInclusiveMax(dvec3);\n"
-                "dvec4  subgroupInclusiveMax(dvec4);\n"
-
-                "double subgroupExclusiveAdd(double);\n"
-                "dvec2  subgroupExclusiveAdd(dvec2);\n"
-                "dvec3  subgroupExclusiveAdd(dvec3);\n"
-                "dvec4  subgroupExclusiveAdd(dvec4);\n"
-
-                "double subgroupExclusiveMul(double);\n"
-                "dvec2  subgroupExclusiveMul(dvec2);\n"
-                "dvec3  subgroupExclusiveMul(dvec3);\n"
-                "dvec4  subgroupExclusiveMul(dvec4);\n"
-
-                "double subgroupExclusiveMin(double);\n"
-                "dvec2  subgroupExclusiveMin(dvec2);\n"
-                "dvec3  subgroupExclusiveMin(dvec3);\n"
-                "dvec4  subgroupExclusiveMin(dvec4);\n"
-
-                "double subgroupExclusiveMax(double);\n"
-                "dvec2  subgroupExclusiveMax(dvec2);\n"
-                "dvec3  subgroupExclusiveMax(dvec3);\n"
-                "dvec4  subgroupExclusiveMax(dvec4);\n"
-
-                "double subgroupClusteredAdd(double, uint);\n"
-                "dvec2  subgroupClusteredAdd(dvec2, uint);\n"
-                "dvec3  subgroupClusteredAdd(dvec3, uint);\n"
-                "dvec4  subgroupClusteredAdd(dvec4, uint);\n"
-
-                "double subgroupClusteredMul(double, uint);\n"
-                "dvec2  subgroupClusteredMul(dvec2, uint);\n"
-                "dvec3  subgroupClusteredMul(dvec3, uint);\n"
-                "dvec4  subgroupClusteredMul(dvec4, uint);\n"
-
-                "double subgroupClusteredMin(double, uint);\n"
-                "dvec2  subgroupClusteredMin(dvec2, uint);\n"
-                "dvec3  subgroupClusteredMin(dvec3, uint);\n"
-                "dvec4  subgroupClusteredMin(dvec4, uint);\n"
-
-                "double subgroupClusteredMax(double, uint);\n"
-                "dvec2  subgroupClusteredMax(dvec2, uint);\n"
-                "dvec3  subgroupClusteredMax(dvec3, uint);\n"
-                "dvec4  subgroupClusteredMax(dvec4, uint);\n"
-
-                "double subgroupQuadBroadcast(double, uint);\n"
-                "dvec2  subgroupQuadBroadcast(dvec2, uint);\n"
-                "dvec3  subgroupQuadBroadcast(dvec3, uint);\n"
-                "dvec4  subgroupQuadBroadcast(dvec4, uint);\n"
-
-                "double subgroupQuadSwapHorizontal(double);\n"
-                "dvec2  subgroupQuadSwapHorizontal(dvec2);\n"
-                "dvec3  subgroupQuadSwapHorizontal(dvec3);\n"
-                "dvec4  subgroupQuadSwapHorizontal(dvec4);\n"
-
-                "double subgroupQuadSwapVertical(double);\n"
-                "dvec2  subgroupQuadSwapVertical(dvec2);\n"
-                "dvec3  subgroupQuadSwapVertical(dvec3);\n"
-                "dvec4  subgroupQuadSwapVertical(dvec4);\n"
-
-                "double subgroupQuadSwapDiagonal(double);\n"
-                "dvec2  subgroupQuadSwapDiagonal(dvec2);\n"
-                "dvec3  subgroupQuadSwapDiagonal(dvec3);\n"
-                "dvec4  subgroupQuadSwapDiagonal(dvec4);\n"
-
-                "uvec4  subgroupPartitionNV(double);\n"
-                "uvec4  subgroupPartitionNV(dvec2);\n"
-                "uvec4  subgroupPartitionNV(dvec3);\n"
-                "uvec4  subgroupPartitionNV(dvec4);\n"
-
-                "double subgroupPartitionedAddNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedAddNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedAddNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedAddNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedMulNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedMulNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedMulNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedMulNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedMinNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedMinNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedMinNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedMinNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedMaxNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedMaxNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedMaxNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedMaxNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedInclusiveAddNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedInclusiveAddNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedInclusiveAddNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedInclusiveAddNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedInclusiveMulNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedInclusiveMulNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedInclusiveMulNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedInclusiveMulNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedInclusiveMinNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedInclusiveMinNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedInclusiveMinNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedInclusiveMinNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedInclusiveMaxNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedInclusiveMaxNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedInclusiveMaxNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedInclusiveMaxNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedExclusiveAddNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedExclusiveAddNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedExclusiveAddNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedExclusiveAddNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedExclusiveMulNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedExclusiveMulNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedExclusiveMulNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedExclusiveMulNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedExclusiveMinNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedExclusiveMinNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedExclusiveMinNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedExclusiveMinNV(dvec4, uvec4 ballot);\n"
-
-                "double subgroupPartitionedExclusiveMaxNV(double, uvec4 ballot);\n"
-                "dvec2  subgroupPartitionedExclusiveMaxNV(dvec2, uvec4 ballot);\n"
-                "dvec3  subgroupPartitionedExclusiveMaxNV(dvec3, uvec4 ballot);\n"
-                "dvec4  subgroupPartitionedExclusiveMaxNV(dvec4, uvec4 ballot);\n"
-
-                "\n");
+            if (!logicalOp) {
+                for (size_t j = 0; j < sizeof(floatTypes)/sizeof(floatTypes[0]); ++j) {
+                    snprintf(buf, bufSize, op, floatTypes[j], floatTypes[j]);
+                    commonBuiltins.append(buf);
+                }
+                if (profile != EEsProfile && version >= 400) {
+                    for (size_t j = 0; j < sizeof(doubleTypes)/sizeof(doubleTypes[0]); ++j) {
+                        snprintf(buf, bufSize, op, doubleTypes[j], doubleTypes[j]);
+                        commonBuiltins.append(buf);
+                    }
+                }
             }
+            if (!mathOp) {
+                for (size_t j = 0; j < sizeof(boolTypes)/sizeof(boolTypes[0]); ++j) {
+                    snprintf(buf, bufSize, op, boolTypes[j], boolTypes[j]);
+                    commonBuiltins.append(buf);
+                }
+            }
+            for (size_t j = 0; j < sizeof(intTypes)/sizeof(intTypes[0]); ++j) {
+                snprintf(buf, bufSize, op, intTypes[j], intTypes[j]);
+                commonBuiltins.append(buf);
+            }
+        }
 
         stageBuiltins[EShLangCompute].append(
             "void subgroupMemoryBarrierShared();"
@@ -3721,7 +2796,7 @@
             "\n");
     }
 
-    if ((profile != EEsProfile && version >= 450) || 
+    if ((profile != EEsProfile && version >= 450) ||
         (profile == EEsProfile && version >= 320)) {
         commonBuiltins.append(
             "struct gl_TextureFootprint2DNV {"
@@ -5047,7 +4122,7 @@
     if ((profile != EEsProfile && version >= 450) || (profile == EEsProfile && version >= 320)) {
         stageBuiltins[EShLangMeshNV].append(
             "void writePackedPrimitiveIndices4x8NV(uint, uint);"
-            "\n");   
+            "\n");
     }
 #endif
 
@@ -5986,7 +5061,7 @@
 
     // GL_ARB_shader_ballot
     if (profile != EEsProfile && version >= 450) {
-        const char* ballotDecls = 
+        const char* ballotDecls =
             "uniform uint gl_SubGroupSizeARB;"
             "in uint     gl_SubGroupInvocationARB;"
             "in uint64_t gl_SubGroupEqMaskARB;"
@@ -5995,7 +5070,7 @@
             "in uint64_t gl_SubGroupLeMaskARB;"
             "in uint64_t gl_SubGroupLtMaskARB;"
             "\n";
-        const char* fragmentBallotDecls = 
+        const char* fragmentBallotDecls =
             "uniform uint gl_SubGroupSizeARB;"
             "flat in uint     gl_SubGroupInvocationARB;"
             "flat in uint64_t gl_SubGroupEqMaskARB;"
@@ -6287,7 +5362,7 @@
                             continue;
 
                         // Loop over the bTypes
-                        for (int bType = 0; bType < sizeof(bTypes)/sizeof(TBasicType); ++bType) {
+                        for (size_t bType = 0; bType < sizeof(bTypes)/sizeof(TBasicType); ++bType) {
 #ifndef GLSLANG_WEB
                             if (bTypes[bType] == EbtFloat16 && (profile == EEsProfile || version < 450))
                                 continue;
diff --git a/glslang/MachineIndependent/Intermediate.cpp b/glslang/MachineIndependent/Intermediate.cpp
index 93d41f7..429e1e8 100644
--- a/glslang/MachineIndependent/Intermediate.cpp
+++ b/glslang/MachineIndependent/Intermediate.cpp
@@ -562,6 +562,237 @@
     return true;
 }
 
+bool TIntermediate::buildConvertOp(TBasicType dst, TBasicType src, TOperator& newOp) const
+{
+    switch (dst) {
+#ifndef GLSLANG_WEB
+    case EbtDouble:
+        switch (src) {
+        case EbtUint:    newOp = EOpConvUintToDouble;    break;
+        case EbtBool:    newOp = EOpConvBoolToDouble;    break;
+        case EbtFloat:   newOp = EOpConvFloatToDouble;   break;
+        case EbtInt:     newOp = EOpConvIntToDouble;     break;
+        case EbtInt8:    newOp = EOpConvInt8ToDouble;    break;
+        case EbtUint8:   newOp = EOpConvUint8ToDouble;   break;
+        case EbtInt16:   newOp = EOpConvInt16ToDouble;   break;
+        case EbtUint16:  newOp = EOpConvUint16ToDouble;  break;
+        case EbtFloat16: newOp = EOpConvFloat16ToDouble; break;
+        case EbtInt64:   newOp = EOpConvInt64ToDouble;   break;
+        case EbtUint64:  newOp = EOpConvUint64ToDouble;  break;
+        default:
+            return false;
+        }
+        break;
+#endif
+    case EbtFloat:
+        switch (src) {
+        case EbtInt:     newOp = EOpConvIntToFloat;     break;
+        case EbtUint:    newOp = EOpConvUintToFloat;    break;
+        case EbtBool:    newOp = EOpConvBoolToFloat;    break;
+#ifndef GLSLANG_WEB
+        case EbtDouble:  newOp = EOpConvDoubleToFloat;  break;
+        case EbtInt8:    newOp = EOpConvInt8ToFloat;    break;
+        case EbtUint8:   newOp = EOpConvUint8ToFloat;   break;
+        case EbtInt16:   newOp = EOpConvInt16ToFloat;   break;
+        case EbtUint16:  newOp = EOpConvUint16ToFloat;  break;
+        case EbtFloat16: newOp = EOpConvFloat16ToFloat; break;
+        case EbtInt64:   newOp = EOpConvInt64ToFloat;   break;
+        case EbtUint64:  newOp = EOpConvUint64ToFloat;  break;
+#endif
+        default:
+            return false;
+        }
+        break;
+#ifndef GLSLANG_WEB
+    case EbtFloat16:
+        switch (src) {
+        case EbtInt8:   newOp = EOpConvInt8ToFloat16;   break;
+        case EbtUint8:  newOp = EOpConvUint8ToFloat16;  break;
+        case EbtInt16:  newOp = EOpConvInt16ToFloat16;  break;
+        case EbtUint16: newOp = EOpConvUint16ToFloat16; break;
+        case EbtInt:    newOp = EOpConvIntToFloat16;    break;
+        case EbtUint:   newOp = EOpConvUintToFloat16;   break;
+        case EbtBool:   newOp = EOpConvBoolToFloat16;   break;
+        case EbtFloat:  newOp = EOpConvFloatToFloat16;  break;
+        case EbtDouble: newOp = EOpConvDoubleToFloat16; break;
+        case EbtInt64:  newOp = EOpConvInt64ToFloat16;  break;
+        case EbtUint64: newOp = EOpConvUint64ToFloat16; break;
+        default:
+            return false;
+        }
+        break;
+#endif
+    case EbtBool:
+        switch (src) {
+        case EbtInt:     newOp = EOpConvIntToBool;     break;
+        case EbtUint:    newOp = EOpConvUintToBool;    break;
+        case EbtFloat:   newOp = EOpConvFloatToBool;   break;
+#ifndef GLSLANG_WEB
+        case EbtDouble:  newOp = EOpConvDoubleToBool;  break;
+        case EbtInt8:    newOp = EOpConvInt8ToBool;    break;
+        case EbtUint8:   newOp = EOpConvUint8ToBool;   break;
+        case EbtInt16:   newOp = EOpConvInt16ToBool;   break;
+        case EbtUint16:  newOp = EOpConvUint16ToBool;  break;
+        case EbtFloat16: newOp = EOpConvFloat16ToBool; break;
+        case EbtInt64:   newOp = EOpConvInt64ToBool;   break;
+        case EbtUint64:  newOp = EOpConvUint64ToBool;  break;
+#endif
+        default:
+            return false;
+        }
+        break;
+#ifndef GLSLANG_WEB
+    case EbtInt8:
+        switch (src) {
+        case EbtUint8:   newOp = EOpConvUint8ToInt8;   break;
+        case EbtInt16:   newOp = EOpConvInt16ToInt8;   break;
+        case EbtUint16:  newOp = EOpConvUint16ToInt8;  break;
+        case EbtInt:     newOp = EOpConvIntToInt8;     break;
+        case EbtUint:    newOp = EOpConvUintToInt8;    break;
+        case EbtInt64:   newOp = EOpConvInt64ToInt8;   break;
+        case EbtUint64:  newOp = EOpConvUint64ToInt8;  break;
+        case EbtBool:    newOp = EOpConvBoolToInt8;    break;
+        case EbtFloat:   newOp = EOpConvFloatToInt8;   break;
+        case EbtDouble:  newOp = EOpConvDoubleToInt8;  break;
+        case EbtFloat16: newOp = EOpConvFloat16ToInt8; break;
+        default:
+            return false;
+        }
+        break;
+    case EbtUint8:
+        switch (src) {
+        case EbtInt8:    newOp = EOpConvInt8ToUint8;    break;
+        case EbtInt16:   newOp = EOpConvInt16ToUint8;   break;
+        case EbtUint16:  newOp = EOpConvUint16ToUint8;  break;
+        case EbtInt:     newOp = EOpConvIntToUint8;     break;
+        case EbtUint:    newOp = EOpConvUintToUint8;    break;
+        case EbtInt64:   newOp = EOpConvInt64ToUint8;   break;
+        case EbtUint64:  newOp = EOpConvUint64ToUint8;  break;
+        case EbtBool:    newOp = EOpConvBoolToUint8;    break;
+        case EbtFloat:   newOp = EOpConvFloatToUint8;   break;
+        case EbtDouble:  newOp = EOpConvDoubleToUint8;  break;
+        case EbtFloat16: newOp = EOpConvFloat16ToUint8; break;
+        default:
+            return false;
+        }
+        break;
+
+    case EbtInt16:
+        switch (src) {
+        case EbtUint8:   newOp = EOpConvUint8ToInt16;   break;
+        case EbtInt8:    newOp = EOpConvInt8ToInt16;    break;
+        case EbtUint16:  newOp = EOpConvUint16ToInt16;  break;
+        case EbtInt:     newOp = EOpConvIntToInt16;     break;
+        case EbtUint:    newOp = EOpConvUintToInt16;    break;
+        case EbtInt64:   newOp = EOpConvInt64ToInt16;   break;
+        case EbtUint64:  newOp = EOpConvUint64ToInt16;  break;
+        case EbtBool:    newOp = EOpConvBoolToInt16;    break;
+        case EbtFloat:   newOp = EOpConvFloatToInt16;   break;
+        case EbtDouble:  newOp = EOpConvDoubleToInt16;  break;
+        case EbtFloat16: newOp = EOpConvFloat16ToInt16; break;
+        default:
+            return false;
+        }
+        break;
+    case EbtUint16:
+        switch (src) {
+        case EbtInt8:    newOp = EOpConvInt8ToUint16;    break;
+        case EbtUint8:   newOp = EOpConvUint8ToUint16;   break;
+        case EbtInt16:   newOp = EOpConvInt16ToUint16;   break;
+        case EbtInt:     newOp = EOpConvIntToUint16;     break;
+        case EbtUint:    newOp = EOpConvUintToUint16;    break;
+        case EbtInt64:   newOp = EOpConvInt64ToUint16;   break;
+        case EbtUint64:  newOp = EOpConvUint64ToUint16;  break;
+        case EbtBool:    newOp = EOpConvBoolToUint16;    break;
+        case EbtFloat:   newOp = EOpConvFloatToUint16;   break;
+        case EbtDouble:  newOp = EOpConvDoubleToUint16;  break;
+        case EbtFloat16: newOp = EOpConvFloat16ToUint16; break;
+        default:
+            return false;
+        }
+        break;
+#endif
+
+    case EbtInt:
+        switch (src) {
+        case EbtUint:    newOp = EOpConvUintToInt;    break;
+        case EbtBool:    newOp = EOpConvBoolToInt;    break;
+        case EbtFloat:   newOp = EOpConvFloatToInt;   break;
+#ifndef GLSLANG_WEB
+        case EbtInt8:    newOp = EOpConvInt8ToInt;    break;
+        case EbtUint8:   newOp = EOpConvUint8ToInt;   break;
+        case EbtInt16:   newOp = EOpConvInt16ToInt;   break;
+        case EbtUint16:  newOp = EOpConvUint16ToInt;  break;
+        case EbtDouble:  newOp = EOpConvDoubleToInt;  break;
+        case EbtFloat16: newOp = EOpConvFloat16ToInt; break;
+        case EbtInt64:   newOp = EOpConvInt64ToInt;   break;
+        case EbtUint64:  newOp = EOpConvUint64ToInt;  break;
+#endif
+        default:
+            return false;
+        }
+        break;
+    case EbtUint:
+        switch (src) {
+        case EbtInt:     newOp = EOpConvIntToUint;     break;
+        case EbtBool:    newOp = EOpConvBoolToUint;    break;
+        case EbtFloat:   newOp = EOpConvFloatToUint;   break;
+#ifndef GLSLANG_WEB
+        case EbtInt8:    newOp = EOpConvInt8ToUint;    break;
+        case EbtUint8:   newOp = EOpConvUint8ToUint;   break;
+        case EbtInt16:   newOp = EOpConvInt16ToUint;   break;
+        case EbtUint16:  newOp = EOpConvUint16ToUint;  break;
+        case EbtDouble:  newOp = EOpConvDoubleToUint;  break;
+        case EbtFloat16: newOp = EOpConvFloat16ToUint; break;
+        case EbtInt64:   newOp = EOpConvInt64ToUint;   break;
+        case EbtUint64:  newOp = EOpConvUint64ToUint;  break;
+#endif
+        default:
+            return false;
+        }
+        break;
+#ifndef GLSLANG_WEB
+    case EbtInt64:
+        switch (src) {
+        case EbtInt8:    newOp = EOpConvInt8ToInt64;    break;
+        case EbtUint8:   newOp = EOpConvUint8ToInt64;   break;
+        case EbtInt16:   newOp = EOpConvInt16ToInt64;   break;
+        case EbtUint16:  newOp = EOpConvUint16ToInt64;  break;
+        case EbtInt:     newOp = EOpConvIntToInt64;     break;
+        case EbtUint:    newOp = EOpConvUintToInt64;    break;
+        case EbtBool:    newOp = EOpConvBoolToInt64;    break;
+        case EbtFloat:   newOp = EOpConvFloatToInt64;   break;
+        case EbtDouble:  newOp = EOpConvDoubleToInt64;  break;
+        case EbtFloat16: newOp = EOpConvFloat16ToInt64; break;
+        case EbtUint64:  newOp = EOpConvUint64ToInt64;  break;
+        default:
+            return false;
+        }
+        break;
+    case EbtUint64:
+        switch (src) {
+        case EbtInt8:    newOp = EOpConvInt8ToUint64;    break;
+        case EbtUint8:   newOp = EOpConvUint8ToUint64;   break;
+        case EbtInt16:   newOp = EOpConvInt16ToUint64;   break;
+        case EbtUint16:  newOp = EOpConvUint16ToUint64;  break;
+        case EbtInt:     newOp = EOpConvIntToUint64;     break;
+        case EbtUint:    newOp = EOpConvUintToUint64;    break;
+        case EbtBool:    newOp = EOpConvBoolToUint64;    break;
+        case EbtFloat:   newOp = EOpConvFloatToUint64;   break;
+        case EbtDouble:  newOp = EOpConvDoubleToUint64;  break;
+        case EbtFloat16: newOp = EOpConvFloat16ToUint64; break;
+        case EbtInt64:   newOp = EOpConvInt64ToUint64;   break;
+        default:
+            return false;
+        }
+        break;
+#endif
+    default:
+        return false;
+    }
+    return true;
+}
+
 // This is 'mechanism' here, it does any conversion told.
 // It is about basic type, not about shape.
 // The policy comes from the shader or the calling code.
@@ -608,230 +839,7 @@
             return nullptr;
     }
 
-    switch (convertTo) {
-#ifndef GLSLANG_WEB
-    case EbtDouble:
-        switch (node->getBasicType()) {
-        case EbtUint:    newOp = EOpConvUintToDouble;    break;
-        case EbtBool:    newOp = EOpConvBoolToDouble;    break;
-        case EbtFloat:   newOp = EOpConvFloatToDouble;   break;
-        case EbtInt:     newOp = EOpConvIntToDouble;     break;
-        case EbtInt8:    newOp = EOpConvInt8ToDouble;    break;
-        case EbtUint8:   newOp = EOpConvUint8ToDouble;   break;
-        case EbtInt16:   newOp = EOpConvInt16ToDouble;   break;
-        case EbtUint16:  newOp = EOpConvUint16ToDouble;  break;
-        case EbtFloat16: newOp = EOpConvFloat16ToDouble; break;
-        case EbtInt64:   newOp = EOpConvInt64ToDouble;   break;
-        case EbtUint64:  newOp = EOpConvUint64ToDouble;  break;
-        default:
-            return nullptr;
-        }
-        break;
-#endif
-    case EbtFloat:
-        switch (node->getBasicType()) {
-        case EbtInt:     newOp = EOpConvIntToFloat;     break;
-        case EbtUint:    newOp = EOpConvUintToFloat;    break;
-        case EbtBool:    newOp = EOpConvBoolToFloat;    break;
-#ifndef GLSLANG_WEB
-        case EbtDouble:  newOp = EOpConvDoubleToFloat;  break;
-        case EbtInt8:    newOp = EOpConvInt8ToFloat;    break;
-        case EbtUint8:   newOp = EOpConvUint8ToFloat;   break;
-        case EbtInt16:   newOp = EOpConvInt16ToFloat;   break;
-        case EbtUint16:  newOp = EOpConvUint16ToFloat;  break;
-        case EbtFloat16: newOp = EOpConvFloat16ToFloat; break;
-        case EbtInt64:   newOp = EOpConvInt64ToFloat;   break;
-        case EbtUint64:  newOp = EOpConvUint64ToFloat;  break;
-#endif
-        default:
-            return nullptr;
-        }
-        break;
-#ifndef GLSLANG_WEB
-    case EbtFloat16:
-        switch (node->getBasicType()) {
-        case EbtInt8:   newOp = EOpConvInt8ToFloat16;   break;
-        case EbtUint8:  newOp = EOpConvUint8ToFloat16;  break;
-        case EbtInt16:  newOp = EOpConvInt16ToFloat16;  break;
-        case EbtUint16: newOp = EOpConvUint16ToFloat16; break;
-        case EbtInt:    newOp = EOpConvIntToFloat16;    break;
-        case EbtUint:   newOp = EOpConvUintToFloat16;   break;
-        case EbtBool:   newOp = EOpConvBoolToFloat16;   break;
-        case EbtFloat:  newOp = EOpConvFloatToFloat16;  break;
-        case EbtDouble: newOp = EOpConvDoubleToFloat16; break;
-        case EbtInt64:  newOp = EOpConvInt64ToFloat16;  break;
-        case EbtUint64: newOp = EOpConvUint64ToFloat16; break;
-        default:
-            return nullptr;
-        }
-        break;
-#endif
-    case EbtBool:
-        switch (node->getBasicType()) {
-        case EbtInt:     newOp = EOpConvIntToBool;     break;
-        case EbtUint:    newOp = EOpConvUintToBool;    break;
-        case EbtFloat:   newOp = EOpConvFloatToBool;   break;
-#ifndef GLSLANG_WEB
-        case EbtDouble:  newOp = EOpConvDoubleToBool;  break;
-        case EbtInt8:    newOp = EOpConvInt8ToBool;    break;
-        case EbtUint8:   newOp = EOpConvUint8ToBool;   break;
-        case EbtInt16:   newOp = EOpConvInt16ToBool;   break;
-        case EbtUint16:  newOp = EOpConvUint16ToBool;  break;
-        case EbtFloat16: newOp = EOpConvFloat16ToBool; break;
-        case EbtInt64:   newOp = EOpConvInt64ToBool;   break;
-        case EbtUint64:  newOp = EOpConvUint64ToBool;  break;
-#endif
-        default:
-            return nullptr;
-        }
-        break;
-#ifndef GLSLANG_WEB
-    case EbtInt8:
-        switch (node->getBasicType()) {
-        case EbtUint8:   newOp = EOpConvUint8ToInt8;   break;
-        case EbtInt16:   newOp = EOpConvInt16ToInt8;   break;
-        case EbtUint16:  newOp = EOpConvUint16ToInt8;  break;
-        case EbtInt:     newOp = EOpConvIntToInt8;     break;
-        case EbtUint:    newOp = EOpConvUintToInt8;    break;
-        case EbtInt64:   newOp = EOpConvInt64ToInt8;   break;
-        case EbtUint64:  newOp = EOpConvUint64ToInt8;  break;
-        case EbtBool:    newOp = EOpConvBoolToInt8;    break;
-        case EbtFloat:   newOp = EOpConvFloatToInt8;   break;
-        case EbtDouble:  newOp = EOpConvDoubleToInt8;  break;
-        case EbtFloat16: newOp = EOpConvFloat16ToInt8; break;
-        default:
-            return nullptr;
-        }
-        break;
-    case EbtUint8:
-        switch (node->getBasicType()) {
-        case EbtInt8:    newOp = EOpConvInt8ToUint8;    break;
-        case EbtInt16:   newOp = EOpConvInt16ToUint8;   break;
-        case EbtUint16:  newOp = EOpConvUint16ToUint8;  break;
-        case EbtInt:     newOp = EOpConvIntToUint8;     break;
-        case EbtUint:    newOp = EOpConvUintToUint8;    break;
-        case EbtInt64:   newOp = EOpConvInt64ToUint8;   break;
-        case EbtUint64:  newOp = EOpConvUint64ToUint8;  break;
-        case EbtBool:    newOp = EOpConvBoolToUint8;    break;
-        case EbtFloat:   newOp = EOpConvFloatToUint8;   break;
-        case EbtDouble:  newOp = EOpConvDoubleToUint8;  break;
-        case EbtFloat16: newOp = EOpConvFloat16ToUint8; break;
-        default:
-            return nullptr;
-        }
-        break;
-
-    case EbtInt16:
-        switch (node->getBasicType()) {
-        case EbtUint8:   newOp = EOpConvUint8ToInt16;   break;
-        case EbtInt8:    newOp = EOpConvInt8ToInt16;    break;
-        case EbtUint16:  newOp = EOpConvUint16ToInt16;  break;
-        case EbtInt:     newOp = EOpConvIntToInt16;     break;
-        case EbtUint:    newOp = EOpConvUintToInt16;    break;
-        case EbtInt64:   newOp = EOpConvInt64ToInt16;   break;
-        case EbtUint64:  newOp = EOpConvUint64ToInt16;  break;
-        case EbtBool:    newOp = EOpConvBoolToInt16;    break;
-        case EbtFloat:   newOp = EOpConvFloatToInt16;   break;
-        case EbtDouble:  newOp = EOpConvDoubleToInt16;  break;
-        case EbtFloat16: newOp = EOpConvFloat16ToInt16; break;
-        default:
-            return nullptr;
-        }
-        break;
-    case EbtUint16:
-        switch (node->getBasicType()) {
-        case EbtInt8:    newOp = EOpConvInt8ToUint16;    break;
-        case EbtUint8:   newOp = EOpConvUint8ToUint16;   break;
-        case EbtInt16:   newOp = EOpConvInt16ToUint16;   break;
-        case EbtInt:     newOp = EOpConvIntToUint16;     break;
-        case EbtUint:    newOp = EOpConvUintToUint16;    break;
-        case EbtInt64:   newOp = EOpConvInt64ToUint16;   break;
-        case EbtUint64:  newOp = EOpConvUint64ToUint16;  break;
-        case EbtBool:    newOp = EOpConvBoolToUint16;    break;
-        case EbtFloat:   newOp = EOpConvFloatToUint16;   break;
-        case EbtDouble:  newOp = EOpConvDoubleToUint16;  break;
-        case EbtFloat16: newOp = EOpConvFloat16ToUint16; break;
-        default:
-            return nullptr;
-        }
-        break;
-#endif
-
-    case EbtInt:
-        switch (node->getBasicType()) {
-        case EbtUint:    newOp = EOpConvUintToInt;    break;
-        case EbtBool:    newOp = EOpConvBoolToInt;    break;
-        case EbtFloat:   newOp = EOpConvFloatToInt;   break;
-#ifndef GLSLANG_WEB
-        case EbtInt8:    newOp = EOpConvInt8ToInt;    break;
-        case EbtUint8:   newOp = EOpConvUint8ToInt;   break;
-        case EbtInt16:   newOp = EOpConvInt16ToInt;   break;
-        case EbtUint16:  newOp = EOpConvUint16ToInt;  break;
-        case EbtDouble:  newOp = EOpConvDoubleToInt;  break;
-        case EbtFloat16: newOp = EOpConvFloat16ToInt; break;
-        case EbtInt64:   newOp = EOpConvInt64ToInt;   break;
-        case EbtUint64:  newOp = EOpConvUint64ToInt;  break;
-#endif
-        default:
-            return nullptr;
-        }
-        break;
-    case EbtUint:
-        switch (node->getBasicType()) {
-        case EbtInt:     newOp = EOpConvIntToUint;     break;
-        case EbtBool:    newOp = EOpConvBoolToUint;    break;
-        case EbtFloat:   newOp = EOpConvFloatToUint;   break;
-#ifndef GLSLANG_WEB
-        case EbtInt8:    newOp = EOpConvInt8ToUint;    break;
-        case EbtUint8:   newOp = EOpConvUint8ToUint;   break;
-        case EbtInt16:   newOp = EOpConvInt16ToUint;   break;
-        case EbtUint16:  newOp = EOpConvUint16ToUint;  break;
-        case EbtDouble:  newOp = EOpConvDoubleToUint;  break;
-        case EbtFloat16: newOp = EOpConvFloat16ToUint; break;
-        case EbtInt64:   newOp = EOpConvInt64ToUint;   break;
-        case EbtUint64:  newOp = EOpConvUint64ToUint;  break;
-#endif
-        default:
-            return nullptr;
-        }
-        break;
-#ifndef GLSLANG_WEB
-    case EbtInt64:
-        switch (node->getBasicType()) {
-        case EbtInt8:    newOp = EOpConvInt8ToInt64;    break;
-        case EbtUint8:   newOp = EOpConvUint8ToInt64;   break;
-        case EbtInt16:   newOp = EOpConvInt16ToInt64;   break;
-        case EbtUint16:  newOp = EOpConvUint16ToInt64;  break;
-        case EbtInt:     newOp = EOpConvIntToInt64;     break;
-        case EbtUint:    newOp = EOpConvUintToInt64;    break;
-        case EbtBool:    newOp = EOpConvBoolToInt64;    break;
-        case EbtFloat:   newOp = EOpConvFloatToInt64;   break;
-        case EbtDouble:  newOp = EOpConvDoubleToInt64;  break;
-        case EbtFloat16: newOp = EOpConvFloat16ToInt64; break;
-        case EbtUint64:  newOp = EOpConvUint64ToInt64;  break;
-        default:
-            return nullptr;
-        }
-        break;
-    case EbtUint64:
-        switch (node->getBasicType()) {
-        case EbtInt8:    newOp = EOpConvInt8ToUint64;    break;
-        case EbtUint8:   newOp = EOpConvUint8ToUint64;   break;
-        case EbtInt16:   newOp = EOpConvInt16ToUint64;   break;
-        case EbtUint16:  newOp = EOpConvUint16ToUint64;  break;
-        case EbtInt:     newOp = EOpConvIntToUint64;     break;
-        case EbtUint:    newOp = EOpConvUintToUint64;    break;
-        case EbtBool:    newOp = EOpConvBoolToUint64;    break;
-        case EbtFloat:   newOp = EOpConvFloatToUint64;   break;
-        case EbtDouble:  newOp = EOpConvDoubleToUint64;  break;
-        case EbtFloat16: newOp = EOpConvFloat16ToUint64; break;
-        case EbtInt64:   newOp = EOpConvInt64ToUint64;   break;
-        default:
-            return nullptr;
-        }
-        break;
-#endif
-    default:
+    if (!buildConvertOp(convertTo, node->getBasicType(), newOp)) {
         return nullptr;
     }
 
diff --git a/glslang/MachineIndependent/ParseHelper.cpp b/glslang/MachineIndependent/ParseHelper.cpp
index 08fa610..b381be9 100644
--- a/glslang/MachineIndependent/ParseHelper.cpp
+++ b/glslang/MachineIndependent/ParseHelper.cpp
@@ -2155,9 +2155,12 @@
         break;
 
     case EOpSubgroupBroadcast:
-        // <id> must be an integral constant expression.
-        if ((*argp)[1]->getAsConstantUnion() == nullptr)
-            error(loc, "argument must be compile-time constant", "id", "");
+    case EOpSubgroupQuadBroadcast:
+        if (spvVersion.spv < EShTargetSpv_1_5) {
+            // <id> must be an integral constant expression.
+            if ((*argp)[1]->getAsConstantUnion() == nullptr)
+                error(loc, "argument must be compile-time constant", "id", "");
+        }
         break;
 
     case EOpBarrier:
@@ -2202,6 +2205,30 @@
         // these require SPIR-V 1.3
         if (spvVersion.spv > 0 && spvVersion.spv < EShTargetSpv_1_3)
             error(loc, "requires SPIR-V 1.3", "subgroup op", "");
+
+        // Check that if extended types are being used that the correct extensions are enabled.
+        if (arg0 != nullptr) {
+            const TType& type = arg0->getType();
+            switch (type.getBasicType()) {
+            default:
+                break;
+            case EbtInt8:
+            case EbtUint8:
+                requireExtensions(loc, 1, &E_GL_EXT_shader_subgroup_extended_types_int8, type.getCompleteString().c_str());
+                break;
+            case EbtInt16:
+            case EbtUint16:
+                requireExtensions(loc, 1, &E_GL_EXT_shader_subgroup_extended_types_int16, type.getCompleteString().c_str());
+                break;
+            case EbtInt64:
+            case EbtUint64:
+                requireExtensions(loc, 1, &E_GL_EXT_shader_subgroup_extended_types_int64, type.getCompleteString().c_str());
+                break;
+            case EbtFloat16:
+                requireExtensions(loc, 1, &E_GL_EXT_shader_subgroup_extended_types_float16, type.getCompleteString().c_str());
+                break;
+            }
+        }
     }
 }
 
@@ -2748,10 +2775,25 @@
 //
 bool TParseContext::constructorError(const TSourceLoc& loc, TIntermNode* node, TFunction& function, TOperator op, TType& type)
 {
-    type.shallowCopy(function.getType());
+    // See if the constructor does not establish the main type, only requalifies
+    // it, in which case the type comes from the argument instead of from the
+    // constructor function.
+    switch (op) {
+    case EOpConstructNonuniform:
+        if (node != nullptr && node->getAsTyped() != nullptr) {
+            type.shallowCopy(node->getAsTyped()->getType());
+            type.getQualifier().makeTemporary();
+            type.getQualifier().nonUniform = true;
+        }
+        break;
+    default:
+        type.shallowCopy(function.getType());
+        break;
+    }
 
+    // See if it's a matrix
     bool constructingMatrix = false;
-    switch(op) {
+    switch (op) {
 #ifndef GLSLANG_WEB
     case EOpConstructTextureSampler:
         return constructorTextureSamplerError(loc, function);
@@ -2845,6 +2887,8 @@
             }
         }
     }
+    if (op == EOpConstructNonuniform)
+        constType = false;
 
 #ifndef GLSLANG_WEB
     switch (op) {
@@ -5106,7 +5150,8 @@
         return;
     } else if (id == "location") {
         profileRequires(loc, EEsProfile, 300, nullptr, "location");
-        const char* exts[2] = { E_GL_ARB_separate_shader_objects, E_GL_ARB_explicit_attrib_location };
+        const char* exts[2] = { E_GL_ARB_separate_shader_objects, E_GL_ARB_explicit_attrib_location }; 
+        // GL_ARB_explicit_uniform_location requires 330 or GL_ARB_explicit_attrib_location we do not need to add it here
         profileRequires(loc, ~EEsProfile, 330, 2, exts, "location");
         if ((unsigned int)value >= TQualifier::layoutLocationEnd)
             error(loc, "location is too large", id.c_str(), "");
@@ -5360,14 +5405,17 @@
             }
             if (id == "local_size_x") {
                 publicType.shaderQualifiers.localSize[0] = value;
+                publicType.shaderQualifiers.localSizeNotDefault[0] = true;
                 return;
             }
             if (id == "local_size_y") {
                 publicType.shaderQualifiers.localSize[1] = value;
+                publicType.shaderQualifiers.localSizeNotDefault[1] = true;
                 return;
             }
             if (id == "local_size_z") {
                 publicType.shaderQualifiers.localSize[2] = value;
+                publicType.shaderQualifiers.localSizeNotDefault[2] = true;
                 return;
             }
             if (spvVersion.spv != 0) {
@@ -5863,8 +5911,9 @@
         case EvqBuffer:
         {
             const char* feature = "location qualifier on uniform or buffer";
-            requireProfile(loc, EEsProfile | ECoreProfile | ECompatibilityProfile, feature);
-            profileRequires(loc, ECoreProfile | ECompatibilityProfile, 430, nullptr, feature);
+            requireProfile(loc, EEsProfile | ECoreProfile | ECompatibilityProfile | ENoProfile, feature);
+            profileRequires(loc, ~EEsProfile, 330, E_GL_ARB_explicit_attrib_location, feature);
+            profileRequires(loc, ~EEsProfile, 430, E_GL_ARB_explicit_uniform_location, feature);
             profileRequires(loc, EEsProfile, 310, nullptr, feature);
             break;
         }
@@ -6912,6 +6961,12 @@
         break;
 
     case EOpConstructUVec2:
+        if (node->getType().getBasicType() == EbtReference) {
+            requireExtensions(loc, 1, &E_GL_EXT_buffer_reference_uvec2, "reference conversion to uvec2");
+            TIntermTyped* newNode = intermediate.addBuiltInFunctionCall(node->getLoc(), EOpConvPtrToUvec2, true, node,
+                type);
+            return newNode;
+        }
     case EOpConstructUVec3:
     case EOpConstructUVec4:
     case EOpConstructUint:
@@ -7058,8 +7113,7 @@
 
     case EOpConstructNonuniform:
         // Make a nonuniform copy of node
-        newNode = intermediate.addBuiltInFunctionCall(node->getLoc(), EOpCopyObject, true, node, node->getType());
-        newNode->getWritableType().getQualifier().nonUniform = true;
+        newNode = intermediate.addBuiltInFunctionCall(node->getLoc(), EOpCopyObject, true, node, type);
         return newNode;
 
     case EOpConstructReference:
@@ -7069,7 +7123,15 @@
             return newNode;
         // construct reference from uint64
         } else if (node->getType().isScalar() && node->getType().getBasicType() == EbtUint64) {
-            TIntermTyped* newNode = intermediate.addBuiltInFunctionCall(node->getLoc(), EOpConvUint64ToPtr, true, node, type);
+            TIntermTyped* newNode = intermediate.addBuiltInFunctionCall(node->getLoc(), EOpConvUint64ToPtr, true, node,
+                type);
+            return newNode;
+        // construct reference from uvec2
+        } else if (node->getType().isVector() && node->getType().getBasicType() == EbtUint &&
+                   node->getVectorSize() == 2) {
+            requireExtensions(loc, 1, &E_GL_EXT_buffer_reference_uvec2, "uvec2 conversion to reference");
+            TIntermTyped* newNode = intermediate.addBuiltInFunctionCall(node->getLoc(), EOpConvUvec2ToPtr, true, node,
+                type);
             return newNode;
         } else {
             return nullptr;
@@ -7945,7 +8007,7 @@
             error(loc, "can only apply to 'in'", "point_mode", "");
     }
     for (int i = 0; i < 3; ++i) {
-        if (publicType.shaderQualifiers.localSize[i] > 1) {
+        if (publicType.shaderQualifiers.localSizeNotDefault[i]) {
             if (publicType.qualifier.storage == EvqVaryingIn) {
                 if (! intermediate.setLocalSize(i, publicType.shaderQualifiers.localSize[i]))
                     error(loc, "cannot change previously set size", "local_size", "");
diff --git a/glslang/MachineIndependent/ParseHelper.h b/glslang/MachineIndependent/ParseHelper.h
index 5cee05e..39363f1 100644
--- a/glslang/MachineIndependent/ParseHelper.h
+++ b/glslang/MachineIndependent/ParseHelper.h
@@ -283,7 +283,7 @@
                   const TString* entryPoint = nullptr);
     virtual ~TParseContext();
 
-    bool obeyPrecisionQualifiers() const { return precisionManager.respectingPrecisionQualifiers(); };
+    bool obeyPrecisionQualifiers() const { return precisionManager.respectingPrecisionQualifiers(); }
     void setPrecisionDefaults();
 
     void setLimits(const TBuiltInResource&) override;
diff --git a/glslang/MachineIndependent/ShaderLang.cpp b/glslang/MachineIndependent/ShaderLang.cpp
old mode 100755
new mode 100644
index f63305e..9b3cdc6
--- a/glslang/MachineIndependent/ShaderLang.cpp
+++ b/glslang/MachineIndependent/ShaderLang.cpp
@@ -2048,6 +2048,9 @@
 
 unsigned TProgram::getLocalSize(int dim) const                        { return reflection->getLocalSize(dim); }
 int TProgram::getReflectionIndex(const char* name) const              { return reflection->getIndex(name); }
+int TProgram::getReflectionPipeIOIndex(const char* name, const bool inOrOut) const
+                                                                      { return reflection->getPipeIOIndex(name, inOrOut); }
+
 int TProgram::getNumUniformVariables() const                          { return reflection->getNumUniforms(); }
 const TObjectReflection& TProgram::getUniform(int index) const        { return reflection->getUniform(index); }
 int TProgram::getNumUniformBlocks() const                             { return reflection->getNumUniformBlocks(); }
diff --git a/glslang/MachineIndependent/SymbolTable.cpp b/glslang/MachineIndependent/SymbolTable.cpp
old mode 100755
new mode 100644
diff --git a/glslang/MachineIndependent/SymbolTable.h b/glslang/MachineIndependent/SymbolTable.h
old mode 100755
new mode 100644
diff --git a/glslang/MachineIndependent/Versions.cpp b/glslang/MachineIndependent/Versions.cpp
index eb1314a..bccf6a5 100644
--- a/glslang/MachineIndependent/Versions.cpp
+++ b/glslang/MachineIndependent/Versions.cpp
@@ -174,6 +174,7 @@
     extensionBehavior[E_GL_ARB_texture_cube_map_array]       = EBhDisable;
     extensionBehavior[E_GL_ARB_shader_texture_lod]           = EBhDisable;
     extensionBehavior[E_GL_ARB_explicit_attrib_location]     = EBhDisable;
+    extensionBehavior[E_GL_ARB_explicit_uniform_location]    = EBhDisable;
     extensionBehavior[E_GL_ARB_shader_image_load_store]      = EBhDisable;
     extensionBehavior[E_GL_ARB_shader_atomic_counters]       = EBhDisable;
     extensionBehavior[E_GL_ARB_shader_draw_parameters]       = EBhDisable;
@@ -214,6 +215,7 @@
     extensionBehavior[E_GL_EXT_fragment_invocation_density]             = EBhDisable;
     extensionBehavior[E_GL_EXT_buffer_reference]                        = EBhDisable;
     extensionBehavior[E_GL_EXT_buffer_reference2]                       = EBhDisable;
+    extensionBehavior[E_GL_EXT_buffer_reference_uvec2]                  = EBhDisable;
     extensionBehavior[E_GL_EXT_demote_to_helper_invocation]             = EBhDisable;
 
     extensionBehavior[E_GL_EXT_shader_16bit_storage]                    = EBhDisable;
@@ -300,6 +302,12 @@
     extensionBehavior[E_GL_EXT_shader_explicit_arithmetic_types_float16] = EBhDisable;
     extensionBehavior[E_GL_EXT_shader_explicit_arithmetic_types_float32] = EBhDisable;
     extensionBehavior[E_GL_EXT_shader_explicit_arithmetic_types_float64] = EBhDisable;
+
+    // subgroup extended types
+    extensionBehavior[E_GL_EXT_shader_subgroup_extended_types_int8]    = EBhDisable;
+    extensionBehavior[E_GL_EXT_shader_subgroup_extended_types_int16]   = EBhDisable;
+    extensionBehavior[E_GL_EXT_shader_subgroup_extended_types_int64]   = EBhDisable;
+    extensionBehavior[E_GL_EXT_shader_subgroup_extended_types_float16] = EBhDisable;
 }
 #endif // GLSLANG_WEB
 
@@ -371,6 +379,7 @@
             "#define GL_ARB_texture_cube_map_array 1\n"
             "#define GL_ARB_shader_texture_lod 1\n"
             "#define GL_ARB_explicit_attrib_location 1\n"
+            "#define GL_ARB_explicit_uniform_location 1\n"
             "#define GL_ARB_shader_image_load_store 1\n"
             "#define GL_ARB_shader_atomic_counters 1\n"
             "#define GL_ARB_shader_draw_parameters 1\n"
@@ -398,6 +407,7 @@
             "#define GL_EXT_fragment_invocation_density 1\n"
             "#define GL_EXT_buffer_reference 1\n"
             "#define GL_EXT_buffer_reference2 1\n"
+            "#define GL_EXT_buffer_reference_uvec2 1\n"
             "#define GL_EXT_demote_to_helper_invocation 1\n"
 
             // GL_KHR_shader_subgroup
@@ -447,6 +457,11 @@
             "#define GL_EXT_shader_explicit_arithmetic_types_float16 1\n"
             "#define GL_EXT_shader_explicit_arithmetic_types_float32 1\n"
             "#define GL_EXT_shader_explicit_arithmetic_types_float64 1\n"
+
+            "#define GL_EXT_shader_subgroup_extended_types_int8 1\n"
+            "#define GL_EXT_shader_subgroup_extended_types_int16 1\n"
+            "#define GL_EXT_shader_subgroup_extended_types_int64 1\n"
+            "#define GL_EXT_shader_subgroup_extended_types_float16 1\n"
             ;
 
         if (version >= 150) {
@@ -822,10 +837,20 @@
         updateExtensionBehavior(line, "GL_KHR_shader_subgroup_basic", behaviorString);
     else if (strcmp(extension, "GL_NV_shader_subgroup_partitioned") == 0)
         updateExtensionBehavior(line, "GL_KHR_shader_subgroup_basic", behaviorString);
-    else if (strcmp(extension, "GL_EXT_buffer_reference2") == 0)
+    else if (strcmp(extension, "GL_EXT_buffer_reference2") == 0 ||
+             strcmp(extension, "GL_EXT_buffer_reference_uvec2") == 0)
         updateExtensionBehavior(line, "GL_EXT_buffer_reference", behaviorString);
     else if (strcmp(extension, "GL_NV_integer_cooperative_matrix") == 0)
         updateExtensionBehavior(line, "GL_NV_cooperative_matrix", behaviorString);
+    // subgroup extended types to explicit types
+    else if (strcmp(extension, "GL_EXT_shader_subgroup_extended_types_int8") == 0)
+        updateExtensionBehavior(line, "GL_EXT_shader_explicit_arithmetic_types_int8", behaviorString);
+    else if (strcmp(extension, "GL_EXT_shader_subgroup_extended_types_int16") == 0)
+        updateExtensionBehavior(line, "GL_EXT_shader_explicit_arithmetic_types_int16", behaviorString);
+    else if (strcmp(extension, "GL_EXT_shader_subgroup_extended_types_int64") == 0)
+        updateExtensionBehavior(line, "GL_EXT_shader_explicit_arithmetic_types_int64", behaviorString);
+    else if (strcmp(extension, "GL_EXT_shader_subgroup_extended_types_float16") == 0)
+        updateExtensionBehavior(line, "GL_EXT_shader_explicit_arithmetic_types_float16", behaviorString);
 }
 
 void TParseVersions::updateExtensionBehavior(const char* extension, TExtensionBehavior behavior)
diff --git a/glslang/MachineIndependent/Versions.h b/glslang/MachineIndependent/Versions.h
index 9535c89..84fafd7 100644
--- a/glslang/MachineIndependent/Versions.h
+++ b/glslang/MachineIndependent/Versions.h
@@ -126,6 +126,7 @@
 const char* const E_GL_ARB_texture_cube_map_array       = "GL_ARB_texture_cube_map_array";
 const char* const E_GL_ARB_shader_texture_lod           = "GL_ARB_shader_texture_lod";
 const char* const E_GL_ARB_explicit_attrib_location     = "GL_ARB_explicit_attrib_location";
+const char* const E_GL_ARB_explicit_uniform_location    = "GL_ARB_explicit_uniform_location";
 const char* const E_GL_ARB_shader_image_load_store      = "GL_ARB_shader_image_load_store";
 const char* const E_GL_ARB_shader_atomic_counters       = "GL_ARB_shader_atomic_counters";
 const char* const E_GL_ARB_shader_draw_parameters       = "GL_ARB_shader_draw_parameters";
@@ -174,6 +175,7 @@
 const char* const E_GL_EXT_fragment_invocation_density      = "GL_EXT_fragment_invocation_density";
 const char* const E_GL_EXT_buffer_reference                 = "GL_EXT_buffer_reference";
 const char* const E_GL_EXT_buffer_reference2                = "GL_EXT_buffer_reference2";
+const char* const E_GL_EXT_buffer_reference_uvec2           = "GL_EXT_buffer_reference_uvec2";
 const char* const E_GL_EXT_demote_to_helper_invocation      = "GL_EXT_demote_to_helper_invocation";
 const char* const E_GL_EXT_shader_realtime_clock            = "GL_EXT_shader_realtime_clock";
 
@@ -257,7 +259,7 @@
 const char* const E_GL_OES_texture_buffer                       = "GL_OES_texture_buffer";
 const char* const E_GL_OES_texture_cube_map_array               = "GL_OES_texture_cube_map_array";
 
-// KHX
+// EXT
 const char* const E_GL_EXT_shader_explicit_arithmetic_types          = "GL_EXT_shader_explicit_arithmetic_types";
 const char* const E_GL_EXT_shader_explicit_arithmetic_types_int8     = "GL_EXT_shader_explicit_arithmetic_types_int8";
 const char* const E_GL_EXT_shader_explicit_arithmetic_types_int16    = "GL_EXT_shader_explicit_arithmetic_types_int16";
@@ -267,6 +269,11 @@
 const char* const E_GL_EXT_shader_explicit_arithmetic_types_float32  = "GL_EXT_shader_explicit_arithmetic_types_float32";
 const char* const E_GL_EXT_shader_explicit_arithmetic_types_float64  = "GL_EXT_shader_explicit_arithmetic_types_float64";
 
+const char* const E_GL_EXT_shader_subgroup_extended_types_int8    = "GL_EXT_shader_subgroup_extended_types_int8";
+const char* const E_GL_EXT_shader_subgroup_extended_types_int16   = "GL_EXT_shader_subgroup_extended_types_int16";
+const char* const E_GL_EXT_shader_subgroup_extended_types_int64   = "GL_EXT_shader_subgroup_extended_types_int64";
+const char* const E_GL_EXT_shader_subgroup_extended_types_float16 = "GL_EXT_shader_subgroup_extended_types_float16";
+
 // Arrays of extensions for the above AEP duplications
 
 const char* const AEP_geometry_shader[] = { E_GL_EXT_geometry_shader, E_GL_OES_geometry_shader };
diff --git a/glslang/MachineIndependent/attribute.h b/glslang/MachineIndependent/attribute.h
index 844ce45..38a943d 100644
--- a/glslang/MachineIndependent/attribute.h
+++ b/glslang/MachineIndependent/attribute.h
@@ -76,7 +76,49 @@
         EatMaxIterations,
         EatIterationMultiple,
         EatPeelCount,
-        EatPartialCount
+        EatPartialCount,
+        EatFormatRgba32f,
+        EatFormatRgba16f,
+        EatFormatR32f,
+        EatFormatRgba8,
+        EatFormatRgba8Snorm,
+        EatFormatRg32f,
+        EatFormatRg16f,
+        EatFormatR11fG11fB10f,
+        EatFormatR16f,
+        EatFormatRgba16,
+        EatFormatRgb10A2,
+        EatFormatRg16,
+        EatFormatRg8,
+        EatFormatR16,
+        EatFormatR8,
+        EatFormatRgba16Snorm,
+        EatFormatRg16Snorm,
+        EatFormatRg8Snorm,
+        EatFormatR16Snorm,
+        EatFormatR8Snorm,
+        EatFormatRgba32i,
+        EatFormatRgba16i,
+        EatFormatRgba8i,
+        EatFormatR32i,
+        EatFormatRg32i,
+        EatFormatRg16i,
+        EatFormatRg8i,
+        EatFormatR16i,
+        EatFormatR8i,
+        EatFormatRgba32ui,
+        EatFormatRgba16ui,
+        EatFormatRgba8ui,
+        EatFormatR32ui,
+        EatFormatRgb10a2ui,
+        EatFormatRg32ui,
+        EatFormatRg16ui,
+        EatFormatRg8ui,
+        EatFormatR16ui,
+        EatFormatR8ui,
+        EatFormatUnknown,
+        EatNonWritable,
+        EatNonReadable
     };
 
     class TIntermAggregate;
diff --git a/glslang/MachineIndependent/iomapper.h b/glslang/MachineIndependent/iomapper.h
index 01afc5a..684e88d 100644
--- a/glslang/MachineIndependent/iomapper.h
+++ b/glslang/MachineIndependent/iomapper.h
@@ -114,7 +114,7 @@
     bool doAutoLocationMapping() const;
     TSlotSet::iterator findSlot(int set, int slot);
     bool checkEmpty(int set, int slot);
-    bool validateInOut(EShLanguage /*stage*/, TVarEntryInfo& /*ent*/) override { return true; };
+    bool validateInOut(EShLanguage /*stage*/, TVarEntryInfo& /*ent*/) override { return true; }
     int reserveSlot(int set, int slot, int size = 1);
     int getFreeSlot(int set, int base, int size = 1);
     int resolveSet(EShLanguage /*stage*/, TVarEntryInfo& ent) override;
@@ -125,7 +125,7 @@
     void addStage(EShLanguage stage) override {
         if (stage < EShLangCount)
             stageMask[stage] = true;
-    };
+    }
     uint32_t computeTypeLocationSize(const TType& type, EShLanguage stage);
 
     TSlotSetMap slots;
@@ -191,7 +191,7 @@
     typedef std::map<TString, int> TVarSlotMap;  // <resourceName, location/binding>
     typedef std::map<int, TVarSlotMap> TSlotMap; // <resourceKey, TVarSlotMap>
     TDefaultGlslIoResolver(const TIntermediate& intermediate);
-    bool validateBinding(EShLanguage /*stage*/, TVarEntryInfo& /*ent*/) override { return true; };
+    bool validateBinding(EShLanguage /*stage*/, TVarEntryInfo& /*ent*/) override { return true; }
     TResourceType getResourceType(const glslang::TType& type) override;
     int resolveInOutLocation(EShLanguage stage, TVarEntryInfo& ent) override;
     int resolveUniformLocation(EShLanguage /*stage*/, TVarEntryInfo& ent) override;
@@ -209,7 +209,7 @@
     int buildStorageKey(EShLanguage stage, TStorageQualifier type) {
         assert(static_cast<uint32_t>(stage) <= 0x0000ffff && static_cast<uint32_t>(type) <= 0x0000ffff);
         return (stage << 16) | type;
-    };
+    }
 
 protected:
     // Use for mark pre stage, to get more interface symbol information.
@@ -242,7 +242,7 @@
         const_cast<TString&>(first) = _Right.first;
         second = _Right.second;
         return (*this);
-    };
+    }
 };
 typedef std::vector<TVarLivePair> TVarLiveVector;
 
@@ -253,7 +253,7 @@
     virtual ~TIoMapper() {}
     // grow the reflection stage by stage
     bool virtual addStage(EShLanguage, TIntermediate&, TInfoSink&, TIoMapResolver*);
-    bool virtual doMap(TIoMapResolver*, TInfoSink&) { return true; };
+    bool virtual doMap(TIoMapResolver*, TInfoSink&) { return true; }
 };
 
 // I/O mapper for OpenGL
diff --git a/glslang/MachineIndependent/localintermediate.h b/glslang/MachineIndependent/localintermediate.h
index 77ed7c3..13b0f6c 100644
--- a/glslang/MachineIndependent/localintermediate.h
+++ b/glslang/MachineIndependent/localintermediate.h
@@ -271,6 +271,9 @@
         localSize[0] = 1;
         localSize[1] = 1;
         localSize[2] = 1;
+        localSizeNotDefault[0] = false;
+        localSizeNotDefault[1] = false;
+        localSizeNotDefault[2] = false;
         localSizeSpecId[0] = TQualifier::layoutNotSet;
         localSizeSpecId[1] = TQualifier::layoutNotSet;
         localSizeSpecId[2] = TQualifier::layoutNotSet;
@@ -308,6 +311,12 @@
         case EShTargetSpv_1_3:
             processes.addProcess("target-env spirv1.3");
             break;
+        case EShTargetSpv_1_4:
+            processes.addProcess("target-env spirv1.4");
+            break;
+        case EShTargetSpv_1_5:
+            processes.addProcess("target-env spirv1.5");
+            break;
         default:
             processes.addProcess("target-env spirvUnknown");
             break;
@@ -648,8 +657,9 @@
 
     bool setLocalSize(int dim, int size)
     {
-        if (localSize[dim] > 1)
+        if (localSizeNotDefault[dim])
             return size == localSize[dim];
+        localSizeNotDefault[dim] = true;
         localSize[dim] = size;
         return true;
     }
@@ -776,6 +786,9 @@
     void merge(TInfoSink&, TIntermediate&);
     void finalCheck(TInfoSink&, bool keepUncalled);
 
+    bool buildConvertOp(TBasicType dst, TBasicType src, TOperator& convertOp) const;
+    TIntermTyped* createConversion(TBasicType convertTo, TIntermTyped* node) const;
+
     void addIoAccessed(const TString& name) { ioAccessed.insert(name); }
     bool inIoAccessed(const TString& name) const { return ioAccessed.find(name) != ioAccessed.end(); }
 
@@ -866,7 +879,6 @@
     bool specConstantPropagates(const TIntermTyped&, const TIntermTyped&);
     void performTextureUpgradeAndSamplerRemovalTransformation(TIntermNode* root);
     bool isConversionAllowed(TOperator op, TIntermTyped* node) const;
-    TIntermTyped* createConversion(TBasicType convertTo, TIntermTyped* node) const;
     std::tuple<TBasicType, TBasicType> getConversionDestinatonType(TBasicType type0, TBasicType type1, TOperator op) const;
 
     // JohnK: I think this function should go away.
@@ -921,6 +933,7 @@
     TInterlockOrdering interlockOrdering;
     bool pointMode;
     int localSize[3];
+    bool localSizeNotDefault[3];
     int localSizeSpecId[3];
     bool earlyFragmentTests;
     bool postDepthCoverage;
diff --git a/glslang/MachineIndependent/parseVersions.h b/glslang/MachineIndependent/parseVersions.h
old mode 100755
new mode 100644
diff --git a/glslang/MachineIndependent/reflection.cpp b/glslang/MachineIndependent/reflection.cpp
index f2be2ff..9f1089d 100644
--- a/glslang/MachineIndependent/reflection.cpp
+++ b/glslang/MachineIndependent/reflection.cpp
@@ -112,6 +112,10 @@
             TReflection::TMapIndexToReflection &ioItems =
                 input ? reflection.indexToPipeInput : reflection.indexToPipeOutput;
 
+
+            TReflection::TNameToIndex &ioMapper =
+                input ? reflection.pipeInNameToIndex : reflection.pipeOutNameToIndex;
+
             if (reflection.options & EShReflectionUnwrapIOBlocks) {
                 bool anonymous = IsAnonymous(name);
 
@@ -129,12 +133,13 @@
                     blowUpIOAggregate(input, baseName, type);
                 }
             } else {
-                TReflection::TNameToIndex::const_iterator it = reflection.nameToIndex.find(name.c_str());
-                if (it == reflection.nameToIndex.end()) {
-                    reflection.nameToIndex[name.c_str()] = (int)ioItems.size();
+                TReflection::TNameToIndex::const_iterator it = ioMapper.find(name.c_str());
+                if (it == ioMapper.end()) {
+                    // seperate pipe i/o params from uniforms and blocks
+                    // in is only for input in first stage as out is only for last stage. check traverse in call stack.
+                    ioMapper[name.c_str()] = ioItems.size();
                     ioItems.push_back(
                         TObjectReflection(name.c_str(), type, 0, mapToGlType(type), mapToGlArraySize(type), 0));
-
                     EShLanguageMask& stages = ioItems.back().stages;
                     stages = static_cast<EShLanguageMask>(stages | 1 << intermediate.getStage());
                 } else {
diff --git a/glslang/MachineIndependent/reflection.h b/glslang/MachineIndependent/reflection.h
index e3561a9..efdc893 100644
--- a/glslang/MachineIndependent/reflection.h
+++ b/glslang/MachineIndependent/reflection.h
@@ -152,6 +152,20 @@
     // see getIndex(const char*)
     int getIndex(const TString& name) const { return getIndex(name.c_str()); }
 
+
+    // for mapping any name to its index (only pipe input/output names)
+    int getPipeIOIndex(const char* name, const bool inOrOut) const
+    {
+        TNameToIndex::const_iterator it = inOrOut ? pipeInNameToIndex.find(name) : pipeOutNameToIndex.find(name);
+        if (it == (inOrOut ? pipeInNameToIndex.end() : pipeOutNameToIndex.end()))
+            return -1;
+        else
+            return it->second;
+    }
+
+    // see gePipeIOIndex(const char*, const bool)
+    int getPipeIOIndex(const TString& name, const bool inOrOut) const { return getPipeIOIndex(name.c_str(), inOrOut); }
+
     // Thread local size
     unsigned getLocalSize(int dim) const { return dim <= 2 ? localSize[dim] : 0; }
 
@@ -189,6 +203,8 @@
 
     TObjectReflection badReflection; // return for queries of -1 or generally out of range; has expected descriptions with in it for this
     TNameToIndex nameToIndex;        // maps names to indexes; can hold all types of data: uniform/buffer and which function names have been processed
+    TNameToIndex pipeInNameToIndex;  // maps pipe in names to indexes, this is a fix to seperate pipe I/O from uniforms and buffers.
+    TNameToIndex pipeOutNameToIndex; // maps pipe out names to indexes, this is a fix to seperate pipe I/O from uniforms and buffers.
     TMapIndexToReflection indexToUniform;
     TMapIndexToReflection indexToUniformBlock;
     TMapIndexToReflection indexToBufferVariable;
diff --git a/glslang/OSDependent/Unix/CMakeLists.txt b/glslang/OSDependent/Unix/CMakeLists.txt
index e652f45..91fb45a 100644
--- a/glslang/OSDependent/Unix/CMakeLists.txt
+++ b/glslang/OSDependent/Unix/CMakeLists.txt
@@ -20,6 +20,7 @@
 endif()
 
 if(ENABLE_GLSLANG_INSTALL)
-    install(TARGETS OSDependent
+    install(TARGETS OSDependent EXPORT OSDependentTargets
             ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+	install(EXPORT OSDependentTargets DESTINATION lib/cmake)
 endif(ENABLE_GLSLANG_INSTALL)
diff --git a/glslang/OSDependent/Web/CMakeLists.txt b/glslang/OSDependent/Web/CMakeLists.txt
new file mode 100644
index 0000000..e8238c3
--- /dev/null
+++ b/glslang/OSDependent/Web/CMakeLists.txt
@@ -0,0 +1,24 @@
+add_executable(glslang.js "glslang.js.cpp")
+glslang_set_link_args(glslang.js)
+target_link_libraries(glslang.js glslang SPIRV)
+if(EMSCRIPTEN)
+    set_target_properties(glslang.js PROPERTIES
+        OUTPUT_NAME "glslang"
+        SUFFIX ".js")
+    em_link_pre_js(glslang.js "${CMAKE_CURRENT_SOURCE_DIR}/glslang.pre.js")
+
+    target_link_options(glslang.js PRIVATE
+        "SHELL:--bind -s MODULARIZE=1")
+    if(ENABLE_EMSCRIPTEN_ENVIRONMENT_NODE)
+        target_link_options(glslang.js PRIVATE
+            "SHELL:-s ENVIRONMENT=node -s BINARYEN_ASYNC_COMPILATION=0")
+    else()
+        target_link_options(glslang.js PRIVATE
+            "SHELL:-s ENVIRONMENT=web,worker")
+    endif()
+
+    if(NOT ENABLE_EMSCRIPTEN_ENVIRONMENT_NODE)
+        add_custom_command(TARGET glslang.js POST_BUILD
+            COMMAND cat ${CMAKE_CURRENT_SOURCE_DIR}/glslang.after.js >> ${CMAKE_CURRENT_BINARY_DIR}/glslang.js)
+    endif()
+endif(EMSCRIPTEN)
diff --git a/glslang/OSDependent/Web/glslang.after.js b/glslang/OSDependent/Web/glslang.after.js
new file mode 100644
index 0000000..c2cfc35
--- /dev/null
+++ b/glslang/OSDependent/Web/glslang.after.js
@@ -0,0 +1,26 @@
+export default (() => {
+    const initialize = () => {
+        return new Promise(resolve => {
+            Module({
+                locateFile() {
+                    const i = import.meta.url.lastIndexOf('/')
+                    return import.meta.url.substring(0, i) + '/glslang.wasm';
+                },
+                onRuntimeInitialized() {
+                    resolve({
+                        compileGLSLZeroCopy: this.compileGLSLZeroCopy,
+                        compileGLSL: this.compileGLSL,
+                    });
+                },
+            });
+        });
+    };
+
+    let instance;
+    return () => {
+        if (!instance) {
+            instance = initialize();
+        }
+        return instance;
+    };
+})();
diff --git a/glslang/glslang.js.cpp b/glslang/OSDependent/Web/glslang.js.cpp
similarity index 97%
rename from glslang/glslang.js.cpp
rename to glslang/OSDependent/Web/glslang.js.cpp
index 45b3d3f..6cb93fe 100644
--- a/glslang/glslang.js.cpp
+++ b/glslang/OSDependent/Web/glslang.js.cpp
@@ -35,17 +35,16 @@
 
 #include <cstdio>
 #include <cstdint>
+#include <memory>
 
 #ifdef __EMSCRIPTEN__
 #include <emscripten.h>
-#endif  // __EMSCRIPTEN__
-#include <memory>
+#endif
 
-#include "../SPIRV/GlslangToSpv.h"
-#include "../SPIRV/doc.h"
-#include "./../glslang/Public/ShaderLang.h"
+#include "../../../SPIRV/GlslangToSpv.h"
+#include "../../../glslang/Public/ShaderLang.h"
 
-#ifndef EMSCRIPTEN_KEEPALIVE
+#ifndef __EMSCRIPTEN__
 #define EMSCRIPTEN_KEEPALIVE
 #endif
 
diff --git a/glslang/glslang.pre.js b/glslang/OSDependent/Web/glslang.pre.js
similarity index 100%
rename from glslang/glslang.pre.js
rename to glslang/OSDependent/Web/glslang.pre.js
diff --git a/glslang/OSDependent/Windows/CMakeLists.txt b/glslang/OSDependent/Windows/CMakeLists.txt
index f257418..f6bd4e9 100644
--- a/glslang/OSDependent/Windows/CMakeLists.txt
+++ b/glslang/OSDependent/Windows/CMakeLists.txt
@@ -15,6 +15,7 @@
 endif(WIN32)
 
 if(ENABLE_GLSLANG_INSTALL)
-    install(TARGETS OSDependent
+    install(TARGETS OSDependent EXPORT OSDependentTargets
             ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
+	install(EXPORT OSDependentTargets DESTINATION lib/cmake)
 endif(ENABLE_GLSLANG_INSTALL)
diff --git a/glslang/Public/ShaderLang.h b/glslang/Public/ShaderLang.h
index a3103c6..4cc6c2f 100755
--- a/glslang/Public/ShaderLang.h
+++ b/glslang/Public/ShaderLang.h
@@ -156,6 +156,7 @@
     EShTargetSpv_1_2 = (1 << 16) | (2 << 8),          // SPIR-V 1.2
     EShTargetSpv_1_3 = (1 << 16) | (3 << 8),          // SPIR-V 1.3
     EShTargetSpv_1_4 = (1 << 16) | (4 << 8),          // SPIR-V 1.4
+    EShTargetSpv_1_5 = (1 << 16) | (5 << 8),          // SPIR-V 1.5
 } EShTargetLanguageVersion;
 
 struct TInputLanguage {
@@ -486,6 +487,8 @@
         environment.target.version = version;
     }
 
+    void getStrings(const char* const* &s, int& n) { s = strings; n = numStrings; }
+
 #ifdef ENABLE_HLSL
     void setEnvTargetHlslFunctionality1() { environment.target.hlslFunctionality1 = true; }
     bool getEnvTargetHlslFunctionality1() const { return environment.target.hlslFunctionality1; }
@@ -772,7 +775,7 @@
     TProgram();
     virtual ~TProgram();
     void addShader(TShader* shader) { stages[shader->stage].push_back(shader); }
-
+    std::list<TShader*>& getShaders(EShLanguage stage) { return stages[stage]; }
     // Link Validation interface
     bool link(EShMessages);
     const char* getInfoLog();
@@ -788,6 +791,7 @@
     bool buildReflection(int opts = EShReflectionDefault);
     unsigned getLocalSize(int dim) const;                  // return dim'th local size
     int getReflectionIndex(const char *name) const;
+    int getReflectionPipeIOIndex(const char* name, const bool inOrOut) const;
     int getNumUniformVariables() const;
     const TObjectReflection& getUniform(int index) const;
     int getNumUniformBlocks() const;
@@ -817,6 +821,9 @@
     // can be used for glGetUniformIndices()
     int getUniformIndex(const char *name) const        { return getReflectionIndex(name); }
 
+    int getPipeIOIndex(const char *name, const bool inOrOut) const
+                                                       { return getReflectionPipeIOIndex(name, inOrOut); }
+
     // can be used for "name" part of glGetActiveUniform()
     const char *getUniformName(int index) const        { return getUniform(index).name.c_str(); }
 
diff --git a/glslang/updateGrammar b/glslang/updateGrammar
index 3779458..9384db9 100755
--- a/glslang/updateGrammar
+++ b/glslang/updateGrammar
@@ -1,4 +1,4 @@
-#!/usr/bin/bash
+#!/bin/bash
 
 if [ "$1" = 'web' ]
 then
diff --git a/gtests/CMakeLists.txt b/gtests/CMakeLists.txt
index f678cb6..77d217f 100644
--- a/gtests/CMakeLists.txt
+++ b/gtests/CMakeLists.txt
@@ -31,8 +31,9 @@
         set_property(TARGET glslangtests PROPERTY FOLDER tests)
         glslang_set_link_args(glslangtests)
         if(ENABLE_GLSLANG_INSTALL)
-            install(TARGETS glslangtests
+            install(TARGETS glslangtests EXPORT glslangtestsTargets
                     RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
+			install(EXPORT glslangtestsTargets DESTINATION lib/cmake)
         endif(ENABLE_GLSLANG_INSTALL)
 
         set(GLSLANG_TEST_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/../Test")
diff --git a/gtests/Hlsl.FromFile.cpp b/gtests/Hlsl.FromFile.cpp
old mode 100644
new mode 100755
index 7738c80..59c687d
--- a/gtests/Hlsl.FromFile.cpp
+++ b/gtests/Hlsl.FromFile.cpp
@@ -230,6 +230,7 @@
         {"hlsl.hull.void.tesc", "main"},
         {"hlsl.hull.ctrlpt-1.tesc", "main"},
         {"hlsl.hull.ctrlpt-2.tesc", "main"},
+        {"hlsl.format.rwtexture.frag", "main"},
         {"hlsl.groupid.comp", "main"},
         {"hlsl.identifier.sample.frag", "main"},
         {"hlsl.if.frag", "PixelShaderFunction"},
diff --git a/gtests/Spv.FromFile.cpp b/gtests/Spv.FromFile.cpp
index d12d39a..9620341 100644
--- a/gtests/Spv.FromFile.cpp
+++ b/gtests/Spv.FromFile.cpp
@@ -284,6 +284,7 @@
         "spv.bufferhandle7.frag",
         "spv.bufferhandle8.frag",
         "spv.bufferhandle9.frag",
+        "spv.bufferhandleUvec2.frag",
         "spv.bufferhandle_Error.frag",
         "spv.builtInXFB.vert",
         "spv.conditionalDemote.frag",
@@ -403,6 +404,7 @@
         "spv.storageBuffer.vert",
         "spv.precise.tese",
         "spv.precise.tesc",
+        "spv.volatileAtomic.comp",
         "spv.vulkan100.subgroupArithmetic.comp",
         "spv.vulkan100.subgroupPartitioned.comp",
         "spv.xfb.vert",
@@ -459,6 +461,22 @@
         "spv.subgroupShuffleRelative.comp",
         "spv.subgroupQuad.comp",
         "spv.subgroupVote.comp",
+        "spv.subgroupExtendedTypesArithmetic.comp",
+        "spv.subgroupExtendedTypesArithmeticNeg.comp",
+        "spv.subgroupExtendedTypesBallot.comp",
+        "spv.subgroupExtendedTypesBallotNeg.comp",
+        "spv.subgroupExtendedTypesClustered.comp",
+        "spv.subgroupExtendedTypesClusteredNeg.comp",
+        "spv.subgroupExtendedTypesPartitioned.comp",
+        "spv.subgroupExtendedTypesPartitionedNeg.comp",
+        "spv.subgroupExtendedTypesShuffle.comp",
+        "spv.subgroupExtendedTypesShuffleNeg.comp",
+        "spv.subgroupExtendedTypesShuffleRelative.comp",
+        "spv.subgroupExtendedTypesShuffleRelativeNeg.comp",
+        "spv.subgroupExtendedTypesQuad.comp",
+        "spv.subgroupExtendedTypesQuadNeg.comp",
+        "spv.subgroupExtendedTypesVote.comp",
+        "spv.subgroupExtendedTypesVoteNeg.comp",
         "spv.vulkan110.storageBuffer.vert",
     })),
     FileNameAsCustomTestSuffix
diff --git a/hlsl/CMakeLists.txt b/hlsl/CMakeLists.txt
index 7436dde..44f9d6a 100644
--- a/hlsl/CMakeLists.txt
+++ b/hlsl/CMakeLists.txt
@@ -33,12 +33,13 @@
 
 if(ENABLE_GLSLANG_INSTALL)
     if(BUILD_SHARED_LIBS)
-        install(TARGETS HLSL
+        install(TARGETS HLSL EXPORT HLSLTargets
                 ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
                 LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
                 RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
     else()
-        install(TARGETS HLSL
+        install(TARGETS HLSL EXPORT HLSLTargets
                 ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
     endif()
+	install(EXPORT HLSLTargets DESTINATION lib/cmake)
 endif(ENABLE_GLSLANG_INSTALL)
diff --git a/hlsl/hlslAttributes.cpp b/hlsl/hlslAttributes.cpp
index 261cec3..0cc0d3f 100644
--- a/hlsl/hlslAttributes.cpp
+++ b/hlsl/hlslAttributes.cpp
@@ -58,6 +58,49 @@
                 return EatConstantId;
             else if (name == "push_constant")
                 return EatPushConstant;
+        } else if (nameSpace == "spv") {
+            if (name == "format_rgba32f")       return EatFormatRgba32f;
+            if (name == "format_rgba16f")       return EatFormatRgba16f;
+            if (name == "format_r32f")          return EatFormatR32f;
+            if (name == "format_rgba8")         return EatFormatRgba8;
+            if (name == "format_rgba8snorm")    return EatFormatRgba8Snorm;
+            if (name == "format_rg32f")         return EatFormatRg32f;
+            if (name == "format_rg16f")         return EatFormatRg16f;
+            if (name == "format_r11fg11fb10f")  return EatFormatR11fG11fB10f;
+            if (name == "format_r16f")          return EatFormatR16f;
+            if (name == "format_rgba16")        return EatFormatRgba16;
+            if (name == "format_rgb10a2")       return EatFormatRgb10A2;
+            if (name == "format_rg16")          return EatFormatRg16;
+            if (name == "format_rg8")           return EatFormatRg8;
+            if (name == "format_r16")           return EatFormatR16;
+            if (name == "format_r8")            return EatFormatR8;
+            if (name == "format_rgba16snorm")   return EatFormatRgba16Snorm;
+            if (name == "format_rg16snorm")     return EatFormatRg16Snorm;
+            if (name == "format_rg8snorm")      return EatFormatRg8Snorm;
+            if (name == "format_r16snorm")      return EatFormatR16Snorm;
+            if (name == "format_r8snorm")       return EatFormatR8Snorm;
+            if (name == "format_rgba32i")       return EatFormatRgba32i;
+            if (name == "format_rgba16i")       return EatFormatRgba16i;
+            if (name == "format_rgba8i")        return EatFormatRgba8i;
+            if (name == "format_r32i")          return EatFormatR32i;
+            if (name == "format_rg32i")         return EatFormatRg32i;
+            if (name == "format_rg16i")         return EatFormatRg16i;
+            if (name == "format_rg8i")          return EatFormatRg8i;
+            if (name == "format_r16i")          return EatFormatR16i;
+            if (name == "format_r8i")           return EatFormatR8i;
+            if (name == "format_rgba32ui")      return EatFormatRgba32ui;
+            if (name == "format_rgba16ui")      return EatFormatRgba16ui;
+            if (name == "format_rgba8ui")       return EatFormatRgba8ui;
+            if (name == "format_r32ui")         return EatFormatR32ui;
+            if (name == "format_rgb10a2ui")     return EatFormatRgb10a2ui;
+            if (name == "format_rg32ui")        return EatFormatRg32ui;
+            if (name == "format_rg16ui")        return EatFormatRg16ui;
+            if (name == "format_rg8ui")         return EatFormatRg8ui;
+            if (name == "format_r16ui")         return EatFormatR16ui;
+            if (name == "format_r8ui")          return EatFormatR8ui;
+
+            if (name == "nonwritable")    return EatNonWritable;
+            if (name == "nonreadable")    return EatNonReadable;
         } else if (nameSpace.size() > 0)
             return EatNone;
 
diff --git a/hlsl/hlslGrammar.cpp b/hlsl/hlslGrammar.cpp
old mode 100755
new mode 100644
index 45cf5d5..8ab1a90
--- a/hlsl/hlslGrammar.cpp
+++ b/hlsl/hlslGrammar.cpp
@@ -3221,6 +3221,11 @@
             return false;
         }
 
+        if (arguments == nullptr) {
+            expected("one or more arguments");
+            return false;
+        }
+
         // hook it up
         node = parseContext.handleFunctionCall(arguments->getLoc(), constructorFunction, arguments);
 
diff --git a/hlsl/hlslGrammar.h b/hlsl/hlslGrammar.h
old mode 100755
new mode 100644
diff --git a/hlsl/hlslParseHelper.cpp b/hlsl/hlslParseHelper.cpp
old mode 100644
new mode 100755
index 1549e3a..be665ac
--- a/hlsl/hlslParseHelper.cpp
+++ b/hlsl/hlslParseHelper.cpp
@@ -1950,6 +1950,52 @@
                 setSpecConstantId(loc, type.getQualifier(), value);
             }
             break;
+
+        // image formats
+        case EatFormatRgba32f:      type.getQualifier().layoutFormat = ElfRgba32f;      break;
+        case EatFormatRgba16f:      type.getQualifier().layoutFormat = ElfRgba16f;      break;
+        case EatFormatR32f:         type.getQualifier().layoutFormat = ElfR32f;         break;
+        case EatFormatRgba8:        type.getQualifier().layoutFormat = ElfRgba8;        break;
+        case EatFormatRgba8Snorm:   type.getQualifier().layoutFormat = ElfRgba8Snorm;   break;
+        case EatFormatRg32f:        type.getQualifier().layoutFormat = ElfRg32f;        break;
+        case EatFormatRg16f:        type.getQualifier().layoutFormat = ElfRg16f;        break;
+        case EatFormatR11fG11fB10f: type.getQualifier().layoutFormat = ElfR11fG11fB10f; break;
+        case EatFormatR16f:         type.getQualifier().layoutFormat = ElfR16f;         break;
+        case EatFormatRgba16:       type.getQualifier().layoutFormat = ElfRgba16;       break;
+        case EatFormatRgb10A2:      type.getQualifier().layoutFormat = ElfRgb10A2;      break;
+        case EatFormatRg16:         type.getQualifier().layoutFormat = ElfRg16;         break;
+        case EatFormatRg8:          type.getQualifier().layoutFormat = ElfRg8;          break;
+        case EatFormatR16:          type.getQualifier().layoutFormat = ElfR16;          break;
+        case EatFormatR8:           type.getQualifier().layoutFormat = ElfR8;           break;
+        case EatFormatRgba16Snorm:  type.getQualifier().layoutFormat = ElfRgba16Snorm;  break;
+        case EatFormatRg16Snorm:    type.getQualifier().layoutFormat = ElfRg16Snorm;    break;
+        case EatFormatRg8Snorm:     type.getQualifier().layoutFormat = ElfRg8Snorm;     break;
+        case EatFormatR16Snorm:     type.getQualifier().layoutFormat = ElfR16Snorm;     break;
+        case EatFormatR8Snorm:      type.getQualifier().layoutFormat = ElfR8Snorm;      break;
+        case EatFormatRgba32i:      type.getQualifier().layoutFormat = ElfRgba32i;      break;
+        case EatFormatRgba16i:      type.getQualifier().layoutFormat = ElfRgba16i;      break;
+        case EatFormatRgba8i:       type.getQualifier().layoutFormat = ElfRgba8i;       break;
+        case EatFormatR32i:         type.getQualifier().layoutFormat = ElfR32i;         break;
+        case EatFormatRg32i:        type.getQualifier().layoutFormat = ElfRg32i;        break;
+        case EatFormatRg16i:        type.getQualifier().layoutFormat = ElfRg16i;        break;
+        case EatFormatRg8i:         type.getQualifier().layoutFormat = ElfRg8i;         break;
+        case EatFormatR16i:         type.getQualifier().layoutFormat = ElfR16i;         break;
+        case EatFormatR8i:          type.getQualifier().layoutFormat = ElfR8i;          break;
+        case EatFormatRgba32ui:     type.getQualifier().layoutFormat = ElfRgba32ui;     break;
+        case EatFormatRgba16ui:     type.getQualifier().layoutFormat = ElfRgba16ui;     break;
+        case EatFormatRgba8ui:      type.getQualifier().layoutFormat = ElfRgba8ui;      break;
+        case EatFormatR32ui:        type.getQualifier().layoutFormat = ElfR32ui;        break;
+        case EatFormatRgb10a2ui:    type.getQualifier().layoutFormat = ElfRgb10a2ui;    break;
+        case EatFormatRg32ui:       type.getQualifier().layoutFormat = ElfRg32ui;       break;
+        case EatFormatRg16ui:       type.getQualifier().layoutFormat = ElfRg16ui;       break;
+        case EatFormatRg8ui:        type.getQualifier().layoutFormat = ElfRg8ui;        break;
+        case EatFormatR16ui:        type.getQualifier().layoutFormat = ElfR16ui;        break;
+        case EatFormatR8ui:         type.getQualifier().layoutFormat = ElfR8ui;         break;
+        case EatFormatUnknown:      type.getQualifier().layoutFormat = ElfNone;         break;
+
+        case EatNonWritable:  type.getQualifier().readonly = true;   break;
+        case EatNonReadable:  type.getQualifier().writeonly = true;  break;
+
         default:
             if (! allowEntry)
                 warn(loc, "attribute does not apply to a type", "", "");
diff --git a/hlsl/hlslParseHelper.h b/hlsl/hlslParseHelper.h
old mode 100755
new mode 100644
diff --git a/known_good.json b/known_good.json
index 9946335..3d8994a 100644
--- a/known_good.json
+++ b/known_good.json
@@ -5,14 +5,14 @@
       "site" : "github",
       "subrepo" : "KhronosGroup/SPIRV-Tools",
       "subdir" : "External/spirv-tools",
-      "commit" : "9b3cc3e05337358d0bd9fec1b7a51e3cbf55312b"
+      "commit" : "c3ca04741945c332ddbeb7d7125dbae2809b7195"
     },
     {
       "name" : "spirv-tools/external/spirv-headers",
       "site" : "github",
       "subrepo" : "KhronosGroup/SPIRV-Headers",
       "subdir" : "External/spirv-tools/external/spirv-headers",
-      "commit" : "38cafab379e5d16137cb97a485b9385191039b92"
+      "commit" : "b252a50953ac4375cb1864e94f4b0234db9d215d"
     }
   ]
 }
diff --git a/kokoro/linux-clang-release-bazel/build.sh b/kokoro/linux-clang-release-bazel/build.sh
new file mode 100644
index 0000000..89c23fe
--- /dev/null
+++ b/kokoro/linux-clang-release-bazel/build.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# Copyright (c) 2019 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Linux Build Script.
+
+# Fail on any error.
+set -e
+# Display commands being run.
+set -x
+
+CC=clang
+CXX=clang++
+SRC=$PWD/github/glslang
+cd $SRC
+
+# Bazel limitation: No 'External' directory is allowed!!
+mv External third_party
+
+gsutil cp gs://bazel/0.29.1/release/bazel-0.29.1-linux-x86_64 .
+chmod +x bazel-0.29.1-linux-x86_64
+
+echo $(date): Build everything...
+./bazel-0.29.1-linux-x86_64 build :all
+echo $(date): Build completed.
+
+echo $(date): Starting bazel test...
+./bazel-0.29.1-linux-x86_64 test :all
+echo $(date): Bazel test completed.
diff --git a/kokoro/linux-clang-release-bazel/continuous.cfg b/kokoro/linux-clang-release-bazel/continuous.cfg
new file mode 100644
index 0000000..054f4f6
--- /dev/null
+++ b/kokoro/linux-clang-release-bazel/continuous.cfg
@@ -0,0 +1,16 @@
+# Copyright (c) 2019 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Continuous build configuration.
+build_file: "glslang/kokoro/linux-clang-release-bazel/build.sh"
diff --git a/kokoro/linux-clang-release-bazel/presubmit.cfg b/kokoro/linux-clang-release-bazel/presubmit.cfg
new file mode 100644
index 0000000..572898c
--- /dev/null
+++ b/kokoro/linux-clang-release-bazel/presubmit.cfg
@@ -0,0 +1,16 @@
+# Copyright (c) 2019 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Presubmit build configuration.
+build_file: "glslang/kokoro/linux-clang-release-bazel/build.sh"
diff --git a/kokoro/macos-clang-release-bazel/build.sh b/kokoro/macos-clang-release-bazel/build.sh
new file mode 100644
index 0000000..a899c13
--- /dev/null
+++ b/kokoro/macos-clang-release-bazel/build.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# Copyright (c) 2019 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# macOS Build Script.
+
+# Fail on any error.
+set -e
+# Display commands being run.
+set -x
+
+CC=clang
+CXX=clang++
+SRC=$PWD/github/glslang
+cd $SRC
+
+mv External third_party
+
+# Get bazel 0.29.1.
+gsutil cp gs://bazel/0.29.1/release/bazel-0.29.1-darwin-x86_64 .
+chmod +x bazel-0.29.1-darwin-x86_64
+
+echo $(date): Build everything...
+./bazel-0.29.1-darwin-x86_64 build :all
+echo $(date): Build completed.
+
+echo $(date): Starting bazel test...
+./bazel-0.29.1-darwin-x86_64 test :all
+echo $(date): Bazel test completed.
diff --git a/kokoro/macos-clang-release-bazel/continuous.cfg b/kokoro/macos-clang-release-bazel/continuous.cfg
new file mode 100644
index 0000000..5822fd2
--- /dev/null
+++ b/kokoro/macos-clang-release-bazel/continuous.cfg
@@ -0,0 +1,16 @@
+# Copyright (c) 2019 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Continuous build configuration.
+build_file: "glslang/kokoro/macos-clang-release-bazel/build.sh"
diff --git a/kokoro/macos-clang-release-bazel/presubmit.cfg b/kokoro/macos-clang-release-bazel/presubmit.cfg
new file mode 100644
index 0000000..12cf646
--- /dev/null
+++ b/kokoro/macos-clang-release-bazel/presubmit.cfg
@@ -0,0 +1,16 @@
+# Copyright (c) 2019 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Presubmit build configuration.
+build_file: "glslang/kokoro/macos-clang-release-bazel/build.sh"
diff --git a/kokoro/windows-msvc-2015-release-bazel/build.bat b/kokoro/windows-msvc-2015-release-bazel/build.bat
new file mode 100644
index 0000000..969d74d
--- /dev/null
+++ b/kokoro/windows-msvc-2015-release-bazel/build.bat
@@ -0,0 +1,55 @@
+:: Copyright (c) 2019 Google LLC.
+::
+:: Licensed under the Apache License, Version 2.0 (the "License");
+:: you may not use this file except in compliance with the License.
+:: You may obtain a copy of the License at
+::
+::     http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+::
+:: Windows Build Script.
+
+@echo on
+
+set SRC=%cd%\github\glslang
+
+:: Force usage of python 3.6
+set PATH=C:\python36;%PATH%
+cd %SRC%
+
+mv External third_party
+
+:: REM Install Bazel.
+wget -q https://github.com/bazelbuild/bazel/releases/download/0.29.1/bazel-0.29.1-windows-x86_64.zip
+unzip -q bazel-0.29.1-windows-x86_64.zip
+
+:: Set up MSVC
+call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" x64
+set BAZEL_VS=C:\Program Files (x86)\Microsoft Visual Studio 14.0
+set BAZEL_VC=C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC
+set BAZEL_SH=c:\tools\msys64\usr\bin\bash.exe
+set BAZEL_PYTHON=c:\tools\python2\python.exe
+
+:: #########################################
+:: Start building.
+:: #########################################
+echo "Build everything... %DATE% %TIME%"
+bazel.exe build :all
+if %ERRORLEVEL% NEQ 0 exit /b %ERRORLEVEL%
+echo "Build Completed %DATE% %TIME%"
+
+:: ##############
+:: Run the tests
+:: ##############
+echo "Running Tests... %DATE% %TIME%"
+bazel.exe test :all
+if %ERRORLEVEL% NEQ 0 exit /b %ERRORLEVEL%
+echo "Tests Completed %DATE% %TIME%"
+
+exit /b 0
+
diff --git a/kokoro/windows-msvc-2015-release-bazel/continuous.cfg b/kokoro/windows-msvc-2015-release-bazel/continuous.cfg
new file mode 100644
index 0000000..4b06889
--- /dev/null
+++ b/kokoro/windows-msvc-2015-release-bazel/continuous.cfg
@@ -0,0 +1,16 @@
+# Copyright (c) 2019 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Continuous build configuration.
+build_file: "glslang/kokoro/windows-msvc-2015-release-bazel/build.bat"
diff --git a/kokoro/windows-msvc-2015-release-bazel/presubmit.cfg b/kokoro/windows-msvc-2015-release-bazel/presubmit.cfg
new file mode 100644
index 0000000..08bcefb
--- /dev/null
+++ b/kokoro/windows-msvc-2015-release-bazel/presubmit.cfg
@@ -0,0 +1,16 @@
+# Copyright (c) 2019 Google LLC.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Presubmit build configuration.
+build_file: "glslang/kokoro/windows-msvc-2015-release-bazel/build.bat"
