src/intel/compiler/elk/elk_eu_opcodes.h - platform/external/mesa3d - Git at Google

 /*
  * Copyright © 2024 Intel Corporation
  * SPDX-License-Identifier: MIT
  */

 #pragma once

 #ifdef __cplusplus
 extern "C" {
 #endif

 enum elk_opcode {
    /* These are the actual hardware instructions. */
    ELK_OPCODE_ILLEGAL,
    ELK_OPCODE_MOV,
    ELK_OPCODE_SEL,
    ELK_OPCODE_MOVI, /**< G45+ */
    ELK_OPCODE_NOT,
    ELK_OPCODE_AND,
    ELK_OPCODE_OR,
    ELK_OPCODE_XOR,
    ELK_OPCODE_SHR,
    ELK_OPCODE_SHL,
    ELK_OPCODE_DIM, /**< Gfx7.5 only */
    ELK_OPCODE_SMOV, /**< Gfx8+ */
    ELK_OPCODE_ASR,
    ELK_OPCODE_CMP,
    ELK_OPCODE_CMPN,
    ELK_OPCODE_CSEL, /**< Gfx8+ */
    ELK_OPCODE_F32TO16, /**< Gfx7 only */
    ELK_OPCODE_F16TO32, /**< Gfx7 only */
    ELK_OPCODE_BFREV, /**< Gfx7+ */
    ELK_OPCODE_BFE, /**< Gfx7+ */
    ELK_OPCODE_BFI1, /**< Gfx7+ */
    ELK_OPCODE_BFI2, /**< Gfx7+ */
    ELK_OPCODE_JMPI,
    ELK_OPCODE_BRD, /**< Gfx7+ */
    ELK_OPCODE_IF,
    ELK_OPCODE_IFF, /**< Pre-Gfx6 */
    ELK_OPCODE_BRC, /**< Gfx7+ */
    ELK_OPCODE_ELSE,
    ELK_OPCODE_ENDIF,
    ELK_OPCODE_DO, /**< Pre-Gfx6 */
    ELK_OPCODE_CASE, /**< Gfx6 only */
    ELK_OPCODE_WHILE,
    ELK_OPCODE_BREAK,
    ELK_OPCODE_CONTINUE,
    ELK_OPCODE_HALT,
    ELK_OPCODE_CALLA, /**< Gfx7.5+ */
    ELK_OPCODE_MSAVE, /**< Pre-Gfx6 */
    ELK_OPCODE_CALL, /**< Gfx6+ */
    ELK_OPCODE_MREST, /**< Pre-Gfx6 */
    ELK_OPCODE_RET, /**< Gfx6+ */
    ELK_OPCODE_PUSH, /**< Pre-Gfx6 */
    ELK_OPCODE_FORK, /**< Gfx6 only */
    ELK_OPCODE_GOTO, /**< Gfx8+ */
    ELK_OPCODE_POP, /**< Pre-Gfx6 */
    ELK_OPCODE_WAIT,
    ELK_OPCODE_SEND,
    ELK_OPCODE_SENDC,
    ELK_OPCODE_MATH, /**< Gfx6+ */
    ELK_OPCODE_ADD,
    ELK_OPCODE_MUL,
    ELK_OPCODE_AVG,
    ELK_OPCODE_FRC,
    ELK_OPCODE_RNDU,
    ELK_OPCODE_RNDD,
    ELK_OPCODE_RNDE,
    ELK_OPCODE_RNDZ,
    ELK_OPCODE_MAC,
    ELK_OPCODE_MACH,
    ELK_OPCODE_LZD,
    ELK_OPCODE_FBH, /**< Gfx7+ */
    ELK_OPCODE_FBL, /**< Gfx7+ */
    ELK_OPCODE_CBIT, /**< Gfx7+ */
    ELK_OPCODE_ADDC, /**< Gfx7+ */
    ELK_OPCODE_SUBB, /**< Gfx7+ */
    ELK_OPCODE_SAD2,
    ELK_OPCODE_SADA2,
    ELK_OPCODE_DP4,
    ELK_OPCODE_DPH,
    ELK_OPCODE_DP3,
    ELK_OPCODE_DP2,
    ELK_OPCODE_LINE,
    ELK_OPCODE_PLN, /**< G45+ */
    ELK_OPCODE_MAD, /**< Gfx6+ */
    ELK_OPCODE_LRP, /**< Gfx6+ */
    ELK_OPCODE_MADM, /**< Gfx8+ */
    ELK_OPCODE_NENOP, /**< G45 only */
    ELK_OPCODE_NOP,

    NUM_ELK_OPCODES,

    /* These are compiler backend opcodes that get translated into other
     * instructions.
     */
    ELK_FS_OPCODE_FB_WRITE = NUM_ELK_OPCODES,

    /**
     * Same as ELK_FS_OPCODE_FB_WRITE but expects its arguments separately as
     * individual sources instead of as a single payload blob. The
     * position/ordering of the arguments are defined by the enum
     * fb_write_logical_srcs.
     */
    ELK_FS_OPCODE_FB_WRITE_LOGICAL,

    ELK_FS_OPCODE_REP_FB_WRITE,

    ELK_SHADER_OPCODE_RCP,
    ELK_SHADER_OPCODE_RSQ,
    ELK_SHADER_OPCODE_SQRT,
    ELK_SHADER_OPCODE_EXP2,
    ELK_SHADER_OPCODE_LOG2,
    ELK_SHADER_OPCODE_POW,
    ELK_SHADER_OPCODE_INT_QUOTIENT,
    ELK_SHADER_OPCODE_INT_REMAINDER,
    ELK_SHADER_OPCODE_SIN,
    ELK_SHADER_OPCODE_COS,

    /**
     * A generic "send" opcode.  The first source is the descriptor and
     * the second source is the message payload.
     */
    ELK_SHADER_OPCODE_SEND,

    /**
     * An "undefined" write which does nothing but indicates to liveness that
     * we don't care about any values in the register which predate this
     * instruction.  Used to prevent partial writes from causing issues with
     * live ranges.
     */
    ELK_SHADER_OPCODE_UNDEF,

    /**
     * Texture sampling opcodes.
     *
     * LOGICAL opcodes are eventually translated to the matching non-LOGICAL
     * opcode but instead of taking a single payload blob they expect their
     * arguments separately as individual sources. The position/ordering of the
     * arguments are defined by the enum tex_logical_srcs.
     */
    ELK_SHADER_OPCODE_TEX,
    ELK_SHADER_OPCODE_TEX_LOGICAL,
    ELK_SHADER_OPCODE_TXD,
    ELK_SHADER_OPCODE_TXD_LOGICAL,
    ELK_SHADER_OPCODE_TXF,
    ELK_SHADER_OPCODE_TXF_LOGICAL,
    ELK_SHADER_OPCODE_TXF_LZ,
    ELK_SHADER_OPCODE_TXL,
    ELK_SHADER_OPCODE_TXL_LOGICAL,
    ELK_SHADER_OPCODE_TXL_LZ,
    ELK_SHADER_OPCODE_TXS,
    ELK_SHADER_OPCODE_TXS_LOGICAL,
    ELK_FS_OPCODE_TXB,
    ELK_FS_OPCODE_TXB_LOGICAL,
    ELK_SHADER_OPCODE_TXF_CMS,
    ELK_SHADER_OPCODE_TXF_CMS_LOGICAL,
    ELK_SHADER_OPCODE_TXF_CMS_W,
    ELK_SHADER_OPCODE_TXF_CMS_W_LOGICAL,
    ELK_SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL,
    ELK_SHADER_OPCODE_TXF_UMS,
    ELK_SHADER_OPCODE_TXF_UMS_LOGICAL,
    ELK_SHADER_OPCODE_TXF_MCS,
    ELK_SHADER_OPCODE_TXF_MCS_LOGICAL,
    ELK_SHADER_OPCODE_LOD,
    ELK_SHADER_OPCODE_LOD_LOGICAL,
    ELK_SHADER_OPCODE_TG4,
    ELK_SHADER_OPCODE_TG4_LOGICAL,
    ELK_SHADER_OPCODE_TG4_OFFSET,
    ELK_SHADER_OPCODE_TG4_OFFSET_LOGICAL,
    ELK_SHADER_OPCODE_SAMPLEINFO,
    ELK_SHADER_OPCODE_SAMPLEINFO_LOGICAL,

    ELK_SHADER_OPCODE_IMAGE_SIZE_LOGICAL,

    /**
     * Combines multiple sources of size 1 into a larger virtual GRF.
     * For example, parameters for a send-from-GRF message.  Or, updating
     * channels of a size 4 VGRF used to store vec4s such as texturing results.
     *
     * This will be lowered into MOVs from each source to consecutive offsets
     * of the destination VGRF.
     *
     * src[0] may be BAD_FILE.  If so, the lowering pass skips emitting the MOV,
     * but still reserves the first channel of the destination VGRF.  This can be
     * used to reserve space for, say, a message header set up by the generators.
     */
    ELK_SHADER_OPCODE_LOAD_PAYLOAD,

    /**
     * Packs a number of sources into a single value. Unlike LOAD_PAYLOAD, this
     * acts intra-channel, obtaining the final value for each channel by
     * combining the sources values for the same channel, the first source
     * occupying the lowest bits and the last source occupying the highest
     * bits.
     */
    ELK_FS_OPCODE_PACK,

    /**
     * Typed and untyped surface access opcodes.
     *
     * LOGICAL opcodes are eventually translated to the matching non-LOGICAL
     * opcode but instead of taking a single payload blob they expect their
     * arguments separately as individual sources:
     *
     * Source 0: [required] Surface coordinates.
     * Source 1: [optional] Operation source.
     * Source 2: [required] Surface index.
     * Source 3: [required] Number of coordinate components (as UD immediate).
     * Source 4: [required] Opcode-specific control immediate, same as source 2
     *                      of the matching non-LOGICAL opcode.
     */
    ELK_VEC4_OPCODE_UNTYPED_ATOMIC,
    ELK_SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
    ELK_VEC4_OPCODE_UNTYPED_SURFACE_READ,
    ELK_SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
    ELK_VEC4_OPCODE_UNTYPED_SURFACE_WRITE,
    ELK_SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL,

    ELK_SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL,
    ELK_SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL,

    /**
     * Untyped A64 surface access opcodes.
     *
     * Source 0: 64-bit address
     * Source 1: Operational source
     * Source 2: [required] Opcode-specific control immediate, same as source 2
     *                      of the matching non-LOGICAL opcode.
     */
    ELK_SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL,
    ELK_SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL,
    ELK_SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL,
    ELK_SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL,
    ELK_SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL,
    ELK_SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL,
    ELK_SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL,
    ELK_SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,

    ELK_SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
    ELK_SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL,
    ELK_SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,

    ELK_SHADER_OPCODE_RND_MODE,
    ELK_SHADER_OPCODE_FLOAT_CONTROL_MODE,

    /**
     * Byte scattered write/read opcodes.
     *
     * LOGICAL opcodes are eventually translated to the matching non-LOGICAL
     * opcode, but instead of taking a single payload blog they expect their
     * arguments separately as individual sources, like untyped write/read.
     */
    ELK_SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL,
    ELK_SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL,
    ELK_SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL,
    ELK_SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL,

    /**
     * Memory fence messages.
     *
     * Source 0: Must be register g0, used as header.
     * Source 1: Immediate bool to indicate whether control is returned to the
     *           thread only after the fence has been honored.
     * Source 2: Immediate byte indicating which memory to fence.  Zero means
     *           global memory; GFX7_BTI_SLM means SLM (for Gfx11+ only).
     *
     * Vec4 backend only uses Source 0.
     */
    ELK_SHADER_OPCODE_MEMORY_FENCE,

    /**
     * Scheduling-only fence.
     *
     * Sources can be used to force a stall until the registers in those are
     * available.  This might generate MOVs or SYNC_NOPs (Gfx12+).
     */
    ELK_FS_OPCODE_SCHEDULING_FENCE,

    ELK_SHADER_OPCODE_GFX4_SCRATCH_READ,
    ELK_SHADER_OPCODE_GFX4_SCRATCH_WRITE,
    ELK_SHADER_OPCODE_GFX7_SCRATCH_READ,

    ELK_SHADER_OPCODE_SCRATCH_HEADER,

    /**
     * Gfx8+ SIMD8 URB messages.
     */
    ELK_SHADER_OPCODE_URB_READ_LOGICAL,
    ELK_SHADER_OPCODE_URB_WRITE_LOGICAL,

    /**
     * Return the index of the first enabled live channel and assign it to
     * to the first component of the destination.  Frequently used as input
     * for the BROADCAST pseudo-opcode.
     */
    ELK_SHADER_OPCODE_FIND_LIVE_CHANNEL,

    /**
     * Return the index of the last enabled live channel and assign it to
     * the first component of the destination.
     */
    ELK_SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL,

    /**
     * Return the current execution mask in the specified flag subregister.
     * Can be CSE'ed more easily than a plain MOV from the ce0 ARF register.
     */
    ELK_FS_OPCODE_LOAD_LIVE_CHANNELS,

    /**
     * Pick the channel from its first source register given by the index
     * specified as second source.  Useful for variable indexing of surfaces.
     *
     * Note that because the result of this instruction is by definition
     * uniform and it can always be splatted to multiple channels using a
     * scalar regioning mode, only the first channel of the destination region
     * is guaranteed to be updated, which implies that BROADCAST instructions
     * should usually be marked force_writemask_all.
     */
    ELK_SHADER_OPCODE_BROADCAST,

    /* Pick the channel from its first source register given by the index
     * specified as second source.
     *
     * This is similar to the BROADCAST instruction except that it takes a
     * dynamic index and potentially puts a different value in each output
     * channel.
     */
    ELK_SHADER_OPCODE_SHUFFLE,

    /* Select between src0 and src1 based on channel enables.
     *
     * This instruction copies src0 into the enabled channels of the
     * destination and copies src1 into the disabled channels.
     */
    ELK_SHADER_OPCODE_SEL_EXEC,

    /* This turns into an align16 mov from src0 to dst with a swizzle
     * provided as an immediate in src1.
     */
    ELK_SHADER_OPCODE_QUAD_SWIZZLE,

    /* Take every Nth element in src0 and broadcast it to the group of N
     * channels in which it lives in the destination.  The offset within the
     * cluster is given by src1 and the cluster size is given by src2.
     */
    ELK_SHADER_OPCODE_CLUSTER_BROADCAST,

    ELK_SHADER_OPCODE_GET_BUFFER_SIZE,

    ELK_SHADER_OPCODE_INTERLOCK,

    /** Target for a HALT
     *
     * All HALT instructions in a shader must target the same jump point and
     * that point is denoted by a HALT_TARGET instruction.
     */
    ELK_SHADER_OPCODE_HALT_TARGET,

    ELK_VEC4_OPCODE_MOV_BYTES,
    ELK_VEC4_OPCODE_PACK_BYTES,
    ELK_VEC4_OPCODE_UNPACK_UNIFORM,
    ELK_VEC4_OPCODE_DOUBLE_TO_F32,
    ELK_VEC4_OPCODE_DOUBLE_TO_D32,
    ELK_VEC4_OPCODE_DOUBLE_TO_U32,
    ELK_VEC4_OPCODE_TO_DOUBLE,
    ELK_VEC4_OPCODE_PICK_LOW_32BIT,
    ELK_VEC4_OPCODE_PICK_HIGH_32BIT,
    ELK_VEC4_OPCODE_SET_LOW_32BIT,
    ELK_VEC4_OPCODE_SET_HIGH_32BIT,
    ELK_VEC4_OPCODE_MOV_FOR_SCRATCH,
    ELK_VEC4_OPCODE_ZERO_OOB_PUSH_REGS,

    ELK_FS_OPCODE_DDX_COARSE,
    ELK_FS_OPCODE_DDX_FINE,
    /**
     * Compute dFdy(), dFdyCoarse(), or dFdyFine().
     */
    ELK_FS_OPCODE_DDY_COARSE,
    ELK_FS_OPCODE_DDY_FINE,
    ELK_FS_OPCODE_LINTERP,
    ELK_FS_OPCODE_PIXEL_X,
    ELK_FS_OPCODE_PIXEL_Y,
    ELK_FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
    ELK_FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4,
    ELK_FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
    ELK_FS_OPCODE_SET_SAMPLE_ID,
    ELK_FS_OPCODE_PACK_HALF_2x16_SPLIT,
    ELK_FS_OPCODE_INTERPOLATE_AT_SAMPLE,
    ELK_FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
    ELK_FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,

    ELK_VEC4_VS_OPCODE_URB_WRITE,
    ELK_VS_OPCODE_PULL_CONSTANT_LOAD,
    ELK_VS_OPCODE_PULL_CONSTANT_LOAD_GFX7,

    ELK_VS_OPCODE_UNPACK_FLAGS_SIMD4X2,

    /**
     * Write geometry shader output data to the URB.
     *
     * Unlike ELK_VEC4_VS_OPCODE_URB_WRITE, this opcode doesn't do an implied move from
     * R0 to the first MRF.  This allows the geometry shader to override the
     * "Slot {0,1} Offset" fields in the message header.
     */
    ELK_VEC4_GS_OPCODE_URB_WRITE,

    /**
     * Write geometry shader output data to the URB and request a new URB
     * handle (gfx6).
     *
     * This opcode doesn't do an implied move from R0 to the first MRF.
     */
    ELK_VEC4_GS_OPCODE_URB_WRITE_ALLOCATE,

    /**
     * Terminate the geometry shader thread by doing an empty URB write.
     *
     * This opcode doesn't do an implied move from R0 to the first MRF.  This
     * allows the geometry shader to override the "GS Number of Output Vertices
     * for Slot {0,1}" fields in the message header.
     */
    ELK_GS_OPCODE_THREAD_END,

    /**
     * Set the "Slot {0,1} Offset" fields of a URB_WRITE message header.
     *
     * - dst is the MRF containing the message header.
     *
     * - src0.x indicates which portion of the URB should be written to (e.g. a
     *   vertex number)
     *
     * - src1 is an immediate multiplier which will be applied to src0
     *   (e.g. the size of a single vertex in the URB).
     *
     * Note: the hardware will apply this offset *in addition to* the offset in
     * vec4_instruction::offset.
     */
    ELK_GS_OPCODE_SET_WRITE_OFFSET,

    /**
     * Set the "GS Number of Output Vertices for Slot {0,1}" fields of a
     * URB_WRITE message header.
     *
     * - dst is the MRF containing the message header.
     *
     * - src0.x is the vertex count.  The upper 16 bits will be ignored.
     */
    ELK_GS_OPCODE_SET_VERTEX_COUNT,

    /**
     * Set DWORD 2 of dst to the value in src.
     */
    ELK_GS_OPCODE_SET_DWORD_2,

    /**
     * Prepare the dst register for storage in the "Channel Mask" fields of a
     * URB_WRITE message header.
     *
     * DWORD 4 of dst is shifted left by 4 bits, so that later,
     * ELK_GS_OPCODE_SET_CHANNEL_MASKS can OR DWORDs 0 and 4 together to form the
     * final channel mask.
     *
     * Note: since ELK_GS_OPCODE_SET_CHANNEL_MASKS ORs DWORDs 0 and 4 together to
     * form the final channel mask, DWORDs 0 and 4 of the dst register must not
     * have any extraneous bits set prior to execution of this opcode (that is,
     * they should be in the range 0x0 to 0xf).
     */
    ELK_GS_OPCODE_PREPARE_CHANNEL_MASKS,

    /**
     * Set the "Channel Mask" fields of a URB_WRITE message header.
     *
     * - dst is the MRF containing the message header.
     *
     * - src.x is the channel mask, as prepared by
     *   ELK_GS_OPCODE_PREPARE_CHANNEL_MASKS.  DWORDs 0 and 4 are OR'ed together to
     *   form the final channel mask.
     */
    ELK_GS_OPCODE_SET_CHANNEL_MASKS,

    /**
     * Get the "Instance ID" fields from the payload.
     *
     * - dst is the GRF for gl_InvocationID.
     */
    ELK_GS_OPCODE_GET_INSTANCE_ID,

    /**
     * Send a FF_SYNC message to allocate initial URB handles (gfx6).
     *
     * - dst will be used as the writeback register for the FF_SYNC operation.
     *
     * - src0 is the number of primitives written.
     *
     * - src1 is the value to hold in M0.0: number of SO vertices to write
     *   and number of SO primitives needed. Its value will be overwritten
     *   with the SVBI values if transform feedback is enabled.
     *
     * Note: This opcode uses an implicit MRF register for the ff_sync message
     * header, so the caller is expected to set inst->base_mrf and initialize
     * that MRF register to r0. This opcode will also write to this MRF register
     * to include the allocated URB handle so it can then be reused directly as
     * the header in the URB write operation we are allocating the handle for.
     */
    ELK_GS_OPCODE_FF_SYNC,

    /**
     * Move r0.1 (which holds PrimitiveID information in gfx6) to a separate
     * register.
     *
     * - dst is the GRF where PrimitiveID information will be moved.
     */
    ELK_GS_OPCODE_SET_PRIMITIVE_ID,

    /**
     * Write transform feedback data to the SVB by sending a SVB WRITE message.
     * Used in gfx6.
     *
     * - dst is the MRF register containing the message header.
     *
     * - src0 is the register where the vertex data is going to be copied from.
     *
     * - src1 is the destination register when write commit occurs.
     */
    ELK_GS_OPCODE_SVB_WRITE,

    /**
     * Set destination index in the SVB write message payload (M0.5). Used
     * in gfx6 for transform feedback.
     *
     * - dst is the header to save the destination indices for SVB WRITE.
     * - src is the register that holds the destination indices value.
     */
    ELK_GS_OPCODE_SVB_SET_DST_INDEX,

    /**
     * Prepare Mx.0 subregister for being used in the FF_SYNC message header.
     * Used in gfx6 for transform feedback.
     *
     * - dst will hold the register with the final Mx.0 value.
     *
     * - src0 has the number of vertices emitted in SO (NumSOVertsToWrite)
     *
     * - src1 has the number of needed primitives for SO (NumSOPrimsNeeded)
     *
     * - src2 is the value to hold in M0: number of SO vertices to write
     *   and number of SO primitives needed.
     */
    ELK_GS_OPCODE_FF_SYNC_SET_PRIMITIVES,

    /**
     * Terminate the compute shader.
     */
    ELK_CS_OPCODE_CS_TERMINATE,

    /**
     * GLSL barrier()
     */
    ELK_SHADER_OPCODE_BARRIER,

    /**
     * Calculate the high 32-bits of a 32x32 multiply.
     */
    ELK_SHADER_OPCODE_MULH,

    /** Signed subtraction with saturation. */
    ELK_SHADER_OPCODE_ISUB_SAT,

    /** Unsigned subtraction with saturation. */
    ELK_SHADER_OPCODE_USUB_SAT,

    /**
     * A MOV that uses VxH indirect addressing.
     *
     * Source 0: A register to start from (HW_REG).
     * Source 1: An indirect offset (in bytes, UD GRF).
     * Source 2: The length of the region that could be accessed (in bytes,
     *           UD immediate).
     */
    ELK_SHADER_OPCODE_MOV_INDIRECT,

    /** Fills out a relocatable immediate */
    ELK_SHADER_OPCODE_MOV_RELOC_IMM,

    ELK_VEC4_OPCODE_URB_READ,
    ELK_TCS_OPCODE_GET_INSTANCE_ID,
    ELK_VEC4_TCS_OPCODE_URB_WRITE,
    ELK_VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS,
    ELK_VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS,
    ELK_TCS_OPCODE_GET_PRIMITIVE_ID,
    ELK_TCS_OPCODE_CREATE_BARRIER_HEADER,
    ELK_TCS_OPCODE_SRC0_010_IS_ZERO,
    ELK_TCS_OPCODE_RELEASE_INPUT,
    ELK_TCS_OPCODE_THREAD_END,

    ELK_TES_OPCODE_GET_PRIMITIVE_ID,
    ELK_TES_OPCODE_CREATE_INPUT_READ_HEADER,
    ELK_TES_OPCODE_ADD_INDIRECT_URB_OFFSET,

    ELK_SHADER_OPCODE_READ_SR_REG,
 };


 #ifdef __cplusplus
 }
 #endif
	/*
	* Copyright © 2024 Intel Corporation
	* SPDX-License-Identifier: MIT
	*/

	#pragma once

	#ifdef __cplusplus
	extern "C" {
	#endif

	enum elk_opcode {
	/* These are the actual hardware instructions. */
	ELK_OPCODE_ILLEGAL,
	ELK_OPCODE_MOV,
	ELK_OPCODE_SEL,
	ELK_OPCODE_MOVI, /*< G45+ /
	ELK_OPCODE_NOT,
	ELK_OPCODE_AND,
	ELK_OPCODE_OR,
	ELK_OPCODE_XOR,
	ELK_OPCODE_SHR,
	ELK_OPCODE_SHL,
	ELK_OPCODE_DIM, /*< Gfx7.5 only /
	ELK_OPCODE_SMOV, /*< Gfx8+ /
	ELK_OPCODE_ASR,
	ELK_OPCODE_CMP,
	ELK_OPCODE_CMPN,
	ELK_OPCODE_CSEL, /*< Gfx8+ /
	ELK_OPCODE_F32TO16, /*< Gfx7 only /
	ELK_OPCODE_F16TO32, /*< Gfx7 only /
	ELK_OPCODE_BFREV, /*< Gfx7+ /
	ELK_OPCODE_BFE, /*< Gfx7+ /
	ELK_OPCODE_BFI1, /*< Gfx7+ /
	ELK_OPCODE_BFI2, /*< Gfx7+ /
	ELK_OPCODE_JMPI,
	ELK_OPCODE_BRD, /*< Gfx7+ /
	ELK_OPCODE_IF,
	ELK_OPCODE_IFF, /*< Pre-Gfx6 /
	ELK_OPCODE_BRC, /*< Gfx7+ /
	ELK_OPCODE_ELSE,
	ELK_OPCODE_ENDIF,
	ELK_OPCODE_DO, /*< Pre-Gfx6 /
	ELK_OPCODE_CASE, /*< Gfx6 only /
	ELK_OPCODE_WHILE,
	ELK_OPCODE_BREAK,
	ELK_OPCODE_CONTINUE,
	ELK_OPCODE_HALT,
	ELK_OPCODE_CALLA, /*< Gfx7.5+ /
	ELK_OPCODE_MSAVE, /*< Pre-Gfx6 /
	ELK_OPCODE_CALL, /*< Gfx6+ /
	ELK_OPCODE_MREST, /*< Pre-Gfx6 /
	ELK_OPCODE_RET, /*< Gfx6+ /
	ELK_OPCODE_PUSH, /*< Pre-Gfx6 /
	ELK_OPCODE_FORK, /*< Gfx6 only /
	ELK_OPCODE_GOTO, /*< Gfx8+ /
	ELK_OPCODE_POP, /*< Pre-Gfx6 /
	ELK_OPCODE_WAIT,
	ELK_OPCODE_SEND,
	ELK_OPCODE_SENDC,
	ELK_OPCODE_MATH, /*< Gfx6+ /
	ELK_OPCODE_ADD,
	ELK_OPCODE_MUL,
	ELK_OPCODE_AVG,
	ELK_OPCODE_FRC,
	ELK_OPCODE_RNDU,
	ELK_OPCODE_RNDD,
	ELK_OPCODE_RNDE,
	ELK_OPCODE_RNDZ,
	ELK_OPCODE_MAC,
	ELK_OPCODE_MACH,
	ELK_OPCODE_LZD,
	ELK_OPCODE_FBH, /*< Gfx7+ /
	ELK_OPCODE_FBL, /*< Gfx7+ /
	ELK_OPCODE_CBIT, /*< Gfx7+ /
	ELK_OPCODE_ADDC, /*< Gfx7+ /
	ELK_OPCODE_SUBB, /*< Gfx7+ /
	ELK_OPCODE_SAD2,
	ELK_OPCODE_SADA2,
	ELK_OPCODE_DP4,
	ELK_OPCODE_DPH,
	ELK_OPCODE_DP3,
	ELK_OPCODE_DP2,
	ELK_OPCODE_LINE,
	ELK_OPCODE_PLN, /*< G45+ /
	ELK_OPCODE_MAD, /*< Gfx6+ /
	ELK_OPCODE_LRP, /*< Gfx6+ /
	ELK_OPCODE_MADM, /*< Gfx8+ /
	ELK_OPCODE_NENOP, /*< G45 only /
	ELK_OPCODE_NOP,

	NUM_ELK_OPCODES,

	/* These are compiler backend opcodes that get translated into other
	* instructions.
	*/
	ELK_FS_OPCODE_FB_WRITE = NUM_ELK_OPCODES,

	/**
	* Same as ELK_FS_OPCODE_FB_WRITE but expects its arguments separately as
	* individual sources instead of as a single payload blob. The
	* position/ordering of the arguments are defined by the enum
	* fb_write_logical_srcs.
	*/
	ELK_FS_OPCODE_FB_WRITE_LOGICAL,

	ELK_FS_OPCODE_REP_FB_WRITE,

	ELK_SHADER_OPCODE_RCP,
	ELK_SHADER_OPCODE_RSQ,
	ELK_SHADER_OPCODE_SQRT,
	ELK_SHADER_OPCODE_EXP2,
	ELK_SHADER_OPCODE_LOG2,
	ELK_SHADER_OPCODE_POW,
	ELK_SHADER_OPCODE_INT_QUOTIENT,
	ELK_SHADER_OPCODE_INT_REMAINDER,
	ELK_SHADER_OPCODE_SIN,
	ELK_SHADER_OPCODE_COS,

	/**
	* A generic "send" opcode. The first source is the descriptor and
	* the second source is the message payload.
	*/
	ELK_SHADER_OPCODE_SEND,

	/**
	* An "undefined" write which does nothing but indicates to liveness that
	* we don't care about any values in the register which predate this
	* instruction. Used to prevent partial writes from causing issues with
	* live ranges.
	*/
	ELK_SHADER_OPCODE_UNDEF,

	/**
	* Texture sampling opcodes.
	*
	* LOGICAL opcodes are eventually translated to the matching non-LOGICAL
	* opcode but instead of taking a single payload blob they expect their
	* arguments separately as individual sources. The position/ordering of the
	* arguments are defined by the enum tex_logical_srcs.
	*/
	ELK_SHADER_OPCODE_TEX,
	ELK_SHADER_OPCODE_TEX_LOGICAL,
	ELK_SHADER_OPCODE_TXD,
	ELK_SHADER_OPCODE_TXD_LOGICAL,
	ELK_SHADER_OPCODE_TXF,
	ELK_SHADER_OPCODE_TXF_LOGICAL,
	ELK_SHADER_OPCODE_TXF_LZ,
	ELK_SHADER_OPCODE_TXL,
	ELK_SHADER_OPCODE_TXL_LOGICAL,
	ELK_SHADER_OPCODE_TXL_LZ,
	ELK_SHADER_OPCODE_TXS,
	ELK_SHADER_OPCODE_TXS_LOGICAL,
	ELK_FS_OPCODE_TXB,
	ELK_FS_OPCODE_TXB_LOGICAL,
	ELK_SHADER_OPCODE_TXF_CMS,
	ELK_SHADER_OPCODE_TXF_CMS_LOGICAL,
	ELK_SHADER_OPCODE_TXF_CMS_W,
	ELK_SHADER_OPCODE_TXF_CMS_W_LOGICAL,
	ELK_SHADER_OPCODE_TXF_CMS_W_GFX12_LOGICAL,
	ELK_SHADER_OPCODE_TXF_UMS,
	ELK_SHADER_OPCODE_TXF_UMS_LOGICAL,
	ELK_SHADER_OPCODE_TXF_MCS,
	ELK_SHADER_OPCODE_TXF_MCS_LOGICAL,
	ELK_SHADER_OPCODE_LOD,
	ELK_SHADER_OPCODE_LOD_LOGICAL,
	ELK_SHADER_OPCODE_TG4,
	ELK_SHADER_OPCODE_TG4_LOGICAL,
	ELK_SHADER_OPCODE_TG4_OFFSET,
	ELK_SHADER_OPCODE_TG4_OFFSET_LOGICAL,
	ELK_SHADER_OPCODE_SAMPLEINFO,
	ELK_SHADER_OPCODE_SAMPLEINFO_LOGICAL,

	ELK_SHADER_OPCODE_IMAGE_SIZE_LOGICAL,

	/**
	* Combines multiple sources of size 1 into a larger virtual GRF.
	* For example, parameters for a send-from-GRF message. Or, updating
	* channels of a size 4 VGRF used to store vec4s such as texturing results.
	*
	* This will be lowered into MOVs from each source to consecutive offsets
	* of the destination VGRF.
	*
	* src[0] may be BAD_FILE. If so, the lowering pass skips emitting the MOV,
	* but still reserves the first channel of the destination VGRF. This can be
	* used to reserve space for, say, a message header set up by the generators.
	*/
	ELK_SHADER_OPCODE_LOAD_PAYLOAD,

	/**
	* Packs a number of sources into a single value. Unlike LOAD_PAYLOAD, this
	* acts intra-channel, obtaining the final value for each channel by
	* combining the sources values for the same channel, the first source
	* occupying the lowest bits and the last source occupying the highest
	* bits.
	*/
	ELK_FS_OPCODE_PACK,

	/**
	* Typed and untyped surface access opcodes.
	*
	* LOGICAL opcodes are eventually translated to the matching non-LOGICAL
	* opcode but instead of taking a single payload blob they expect their
	* arguments separately as individual sources:
	*
	* Source 0: [required] Surface coordinates.
	* Source 1: [optional] Operation source.
	* Source 2: [required] Surface index.
	* Source 3: [required] Number of coordinate components (as UD immediate).
	* Source 4: [required] Opcode-specific control immediate, same as source 2
	* of the matching non-LOGICAL opcode.
	*/
	ELK_VEC4_OPCODE_UNTYPED_ATOMIC,
	ELK_SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
	ELK_VEC4_OPCODE_UNTYPED_SURFACE_READ,
	ELK_SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
	ELK_VEC4_OPCODE_UNTYPED_SURFACE_WRITE,
	ELK_SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL,

	ELK_SHADER_OPCODE_UNALIGNED_OWORD_BLOCK_READ_LOGICAL,
	ELK_SHADER_OPCODE_OWORD_BLOCK_WRITE_LOGICAL,

	/**
	* Untyped A64 surface access opcodes.
	*
	* Source 0: 64-bit address
	* Source 1: Operational source
	* Source 2: [required] Opcode-specific control immediate, same as source 2
	* of the matching non-LOGICAL opcode.
	*/
	ELK_SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL,
	ELK_SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL,
	ELK_SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL,
	ELK_SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL,
	ELK_SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL,
	ELK_SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL,
	ELK_SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL,
	ELK_SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,

	ELK_SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
	ELK_SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL,
	ELK_SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,

	ELK_SHADER_OPCODE_RND_MODE,
	ELK_SHADER_OPCODE_FLOAT_CONTROL_MODE,

	/**
	* Byte scattered write/read opcodes.
	*
	* LOGICAL opcodes are eventually translated to the matching non-LOGICAL
	* opcode, but instead of taking a single payload blog they expect their
	* arguments separately as individual sources, like untyped write/read.
	*/
	ELK_SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL,
	ELK_SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL,
	ELK_SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL,
	ELK_SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL,

	/**
	* Memory fence messages.
	*
	* Source 0: Must be register g0, used as header.
	* Source 1: Immediate bool to indicate whether control is returned to the
	* thread only after the fence has been honored.
	* Source 2: Immediate byte indicating which memory to fence. Zero means
	* global memory; GFX7_BTI_SLM means SLM (for Gfx11+ only).
	*
	* Vec4 backend only uses Source 0.
	*/
	ELK_SHADER_OPCODE_MEMORY_FENCE,

	/**
	* Scheduling-only fence.
	*
	* Sources can be used to force a stall until the registers in those are
	* available. This might generate MOVs or SYNC_NOPs (Gfx12+).
	*/
	ELK_FS_OPCODE_SCHEDULING_FENCE,

	ELK_SHADER_OPCODE_GFX4_SCRATCH_READ,
	ELK_SHADER_OPCODE_GFX4_SCRATCH_WRITE,
	ELK_SHADER_OPCODE_GFX7_SCRATCH_READ,

	ELK_SHADER_OPCODE_SCRATCH_HEADER,

	/**
	* Gfx8+ SIMD8 URB messages.
	*/
	ELK_SHADER_OPCODE_URB_READ_LOGICAL,
	ELK_SHADER_OPCODE_URB_WRITE_LOGICAL,

	/**
	* Return the index of the first enabled live channel and assign it to
	* to the first component of the destination. Frequently used as input
	* for the BROADCAST pseudo-opcode.
	*/
	ELK_SHADER_OPCODE_FIND_LIVE_CHANNEL,

	/**
	* Return the index of the last enabled live channel and assign it to
	* the first component of the destination.
	*/
	ELK_SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL,

	/**
	* Return the current execution mask in the specified flag subregister.
	* Can be CSE'ed more easily than a plain MOV from the ce0 ARF register.
	*/
	ELK_FS_OPCODE_LOAD_LIVE_CHANNELS,

	/**
	* Pick the channel from its first source register given by the index
	* specified as second source. Useful for variable indexing of surfaces.
	*
	* Note that because the result of this instruction is by definition
	* uniform and it can always be splatted to multiple channels using a
	* scalar regioning mode, only the first channel of the destination region
	* is guaranteed to be updated, which implies that BROADCAST instructions
	* should usually be marked force_writemask_all.
	*/
	ELK_SHADER_OPCODE_BROADCAST,

	/* Pick the channel from its first source register given by the index
	* specified as second source.
	*
	* This is similar to the BROADCAST instruction except that it takes a
	* dynamic index and potentially puts a different value in each output
	* channel.
	*/
	ELK_SHADER_OPCODE_SHUFFLE,

	/* Select between src0 and src1 based on channel enables.
	*
	* This instruction copies src0 into the enabled channels of the
	* destination and copies src1 into the disabled channels.
	*/
	ELK_SHADER_OPCODE_SEL_EXEC,

	/* This turns into an align16 mov from src0 to dst with a swizzle
	* provided as an immediate in src1.
	*/
	ELK_SHADER_OPCODE_QUAD_SWIZZLE,

	/* Take every Nth element in src0 and broadcast it to the group of N
	* channels in which it lives in the destination. The offset within the
	* cluster is given by src1 and the cluster size is given by src2.
	*/
	ELK_SHADER_OPCODE_CLUSTER_BROADCAST,

	ELK_SHADER_OPCODE_GET_BUFFER_SIZE,

	ELK_SHADER_OPCODE_INTERLOCK,

	/** Target for a HALT
	*
	* All HALT instructions in a shader must target the same jump point and
	* that point is denoted by a HALT_TARGET instruction.
	*/
	ELK_SHADER_OPCODE_HALT_TARGET,

	ELK_VEC4_OPCODE_MOV_BYTES,
	ELK_VEC4_OPCODE_PACK_BYTES,
	ELK_VEC4_OPCODE_UNPACK_UNIFORM,
	ELK_VEC4_OPCODE_DOUBLE_TO_F32,
	ELK_VEC4_OPCODE_DOUBLE_TO_D32,
	ELK_VEC4_OPCODE_DOUBLE_TO_U32,
	ELK_VEC4_OPCODE_TO_DOUBLE,
	ELK_VEC4_OPCODE_PICK_LOW_32BIT,
	ELK_VEC4_OPCODE_PICK_HIGH_32BIT,
	ELK_VEC4_OPCODE_SET_LOW_32BIT,
	ELK_VEC4_OPCODE_SET_HIGH_32BIT,
	ELK_VEC4_OPCODE_MOV_FOR_SCRATCH,
	ELK_VEC4_OPCODE_ZERO_OOB_PUSH_REGS,

	ELK_FS_OPCODE_DDX_COARSE,
	ELK_FS_OPCODE_DDX_FINE,
	/**
	* Compute dFdy(), dFdyCoarse(), or dFdyFine().
	*/
	ELK_FS_OPCODE_DDY_COARSE,
	ELK_FS_OPCODE_DDY_FINE,
	ELK_FS_OPCODE_LINTERP,
	ELK_FS_OPCODE_PIXEL_X,
	ELK_FS_OPCODE_PIXEL_Y,
	ELK_FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
	ELK_FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4,
	ELK_FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL,
	ELK_FS_OPCODE_SET_SAMPLE_ID,
	ELK_FS_OPCODE_PACK_HALF_2x16_SPLIT,
	ELK_FS_OPCODE_INTERPOLATE_AT_SAMPLE,
	ELK_FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
	ELK_FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,

	ELK_VEC4_VS_OPCODE_URB_WRITE,
	ELK_VS_OPCODE_PULL_CONSTANT_LOAD,
	ELK_VS_OPCODE_PULL_CONSTANT_LOAD_GFX7,

	ELK_VS_OPCODE_UNPACK_FLAGS_SIMD4X2,

	/**
	* Write geometry shader output data to the URB.
	*
	* Unlike ELK_VEC4_VS_OPCODE_URB_WRITE, this opcode doesn't do an implied move from
	* R0 to the first MRF. This allows the geometry shader to override the
	* "Slot {0,1} Offset" fields in the message header.
	*/
	ELK_VEC4_GS_OPCODE_URB_WRITE,

	/**
	* Write geometry shader output data to the URB and request a new URB
	* handle (gfx6).
	*
	* This opcode doesn't do an implied move from R0 to the first MRF.
	*/
	ELK_VEC4_GS_OPCODE_URB_WRITE_ALLOCATE,

	/**
	* Terminate the geometry shader thread by doing an empty URB write.
	*
	* This opcode doesn't do an implied move from R0 to the first MRF. This
	* allows the geometry shader to override the "GS Number of Output Vertices
	* for Slot {0,1}" fields in the message header.
	*/
	ELK_GS_OPCODE_THREAD_END,

	/**
	* Set the "Slot {0,1} Offset" fields of a URB_WRITE message header.
	*
	* - dst is the MRF containing the message header.
	*
	* - src0.x indicates which portion of the URB should be written to (e.g. a
	* vertex number)
	*
	* - src1 is an immediate multiplier which will be applied to src0
	* (e.g. the size of a single vertex in the URB).
	*
	* Note: the hardware will apply this offset in addition to the offset in
	* vec4_instruction::offset.
	*/
	ELK_GS_OPCODE_SET_WRITE_OFFSET,

	/**
	* Set the "GS Number of Output Vertices for Slot {0,1}" fields of a
	* URB_WRITE message header.
	*
	* - dst is the MRF containing the message header.
	*
	* - src0.x is the vertex count. The upper 16 bits will be ignored.
	*/
	ELK_GS_OPCODE_SET_VERTEX_COUNT,

	/**
	* Set DWORD 2 of dst to the value in src.
	*/
	ELK_GS_OPCODE_SET_DWORD_2,

	/**
	* Prepare the dst register for storage in the "Channel Mask" fields of a
	* URB_WRITE message header.
	*
	* DWORD 4 of dst is shifted left by 4 bits, so that later,
	* ELK_GS_OPCODE_SET_CHANNEL_MASKS can OR DWORDs 0 and 4 together to form the
	* final channel mask.
	*
	* Note: since ELK_GS_OPCODE_SET_CHANNEL_MASKS ORs DWORDs 0 and 4 together to
	* form the final channel mask, DWORDs 0 and 4 of the dst register must not
	* have any extraneous bits set prior to execution of this opcode (that is,
	* they should be in the range 0x0 to 0xf).
	*/
	ELK_GS_OPCODE_PREPARE_CHANNEL_MASKS,

	/**
	* Set the "Channel Mask" fields of a URB_WRITE message header.
	*
	* - dst is the MRF containing the message header.
	*
	* - src.x is the channel mask, as prepared by
	* ELK_GS_OPCODE_PREPARE_CHANNEL_MASKS. DWORDs 0 and 4 are OR'ed together to
	* form the final channel mask.
	*/
	ELK_GS_OPCODE_SET_CHANNEL_MASKS,

	/**
	* Get the "Instance ID" fields from the payload.
	*
	* - dst is the GRF for gl_InvocationID.
	*/
	ELK_GS_OPCODE_GET_INSTANCE_ID,

	/**
	* Send a FF_SYNC message to allocate initial URB handles (gfx6).
	*
	* - dst will be used as the writeback register for the FF_SYNC operation.
	*
	* - src0 is the number of primitives written.
	*
	* - src1 is the value to hold in M0.0: number of SO vertices to write
	* and number of SO primitives needed. Its value will be overwritten
	* with the SVBI values if transform feedback is enabled.
	*
	* Note: This opcode uses an implicit MRF register for the ff_sync message
	* header, so the caller is expected to set inst->base_mrf and initialize
	* that MRF register to r0. This opcode will also write to this MRF register
	* to include the allocated URB handle so it can then be reused directly as
	* the header in the URB write operation we are allocating the handle for.
	*/
	ELK_GS_OPCODE_FF_SYNC,

	/**
	* Move r0.1 (which holds PrimitiveID information in gfx6) to a separate
	* register.
	*
	* - dst is the GRF where PrimitiveID information will be moved.
	*/
	ELK_GS_OPCODE_SET_PRIMITIVE_ID,

	/**
	* Write transform feedback data to the SVB by sending a SVB WRITE message.
	* Used in gfx6.
	*
	* - dst is the MRF register containing the message header.
	*
	* - src0 is the register where the vertex data is going to be copied from.
	*
	* - src1 is the destination register when write commit occurs.
	*/
	ELK_GS_OPCODE_SVB_WRITE,

	/**
	* Set destination index in the SVB write message payload (M0.5). Used
	* in gfx6 for transform feedback.
	*
	* - dst is the header to save the destination indices for SVB WRITE.
	* - src is the register that holds the destination indices value.
	*/
	ELK_GS_OPCODE_SVB_SET_DST_INDEX,

	/**
	* Prepare Mx.0 subregister for being used in the FF_SYNC message header.
	* Used in gfx6 for transform feedback.
	*
	* - dst will hold the register with the final Mx.0 value.
	*
	* - src0 has the number of vertices emitted in SO (NumSOVertsToWrite)
	*
	* - src1 has the number of needed primitives for SO (NumSOPrimsNeeded)
	*
	* - src2 is the value to hold in M0: number of SO vertices to write
	* and number of SO primitives needed.
	*/
	ELK_GS_OPCODE_FF_SYNC_SET_PRIMITIVES,

	/**
	* Terminate the compute shader.
	*/
	ELK_CS_OPCODE_CS_TERMINATE,

	/**
	* GLSL barrier()
	*/
	ELK_SHADER_OPCODE_BARRIER,

	/**
	* Calculate the high 32-bits of a 32x32 multiply.
	*/
	ELK_SHADER_OPCODE_MULH,

	/** Signed subtraction with saturation. */
	ELK_SHADER_OPCODE_ISUB_SAT,

	/** Unsigned subtraction with saturation. */
	ELK_SHADER_OPCODE_USUB_SAT,

	/**
	* A MOV that uses VxH indirect addressing.
	*
	* Source 0: A register to start from (HW_REG).
	* Source 1: An indirect offset (in bytes, UD GRF).
	* Source 2: The length of the region that could be accessed (in bytes,
	* UD immediate).
	*/
	ELK_SHADER_OPCODE_MOV_INDIRECT,

	/** Fills out a relocatable immediate */
	ELK_SHADER_OPCODE_MOV_RELOC_IMM,

	ELK_VEC4_OPCODE_URB_READ,
	ELK_TCS_OPCODE_GET_INSTANCE_ID,
	ELK_VEC4_TCS_OPCODE_URB_WRITE,
	ELK_VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS,
	ELK_VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS,
	ELK_TCS_OPCODE_GET_PRIMITIVE_ID,
	ELK_TCS_OPCODE_CREATE_BARRIER_HEADER,
	ELK_TCS_OPCODE_SRC0_010_IS_ZERO,
	ELK_TCS_OPCODE_RELEASE_INPUT,
	ELK_TCS_OPCODE_THREAD_END,

	ELK_TES_OPCODE_GET_PRIMITIVE_ID,
	ELK_TES_OPCODE_CREATE_INPUT_READ_HEADER,
	ELK_TES_OPCODE_ADD_INDIRECT_URB_OFFSET,

	ELK_SHADER_OPCODE_READ_SR_REG,
	};


	#ifdef __cplusplus
	}
	#endif