aco: use VOP2 version of v_cvt_pkrtz_f16_f32 on GFX_6_7_10

Totals from 767 (0.56% of 136546) affected shaders (NAVI):
CodeSize: 2862208 -> 2850036 (-0.43%)
Instrs: 561572 -> 561574 (+0.00%)
Cycles: 6455420 -> 6455428 (+0.00%)

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6777>
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index fb989eb..a7745d4 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -2141,7 +2141,10 @@
       Temp src = get_alu_src(ctx, instr->src[0]);
       if (instr->src[0].src.ssa->bit_size == 64)
          src = bld.vop1(aco_opcode::v_cvt_f32_f64, bld.def(v1), src);
-      bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32, Definition(dst), src, Operand(0u));
+      if (ctx->block->fp_mode.round16_64 == fp_round_tz)
+         bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
+      else
+         bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, Definition(dst), src, Operand(0u));
       break;
    }
    case nir_op_f2f32: {
@@ -2615,7 +2618,10 @@
             /* upper bits zero on GFX6-GFX9 */
             bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), get_alu_src(ctx, instr->src[0]));
          } else if (!ctx->block->fp_mode.care_about_round16_64 || ctx->block->fp_mode.round16_64 == fp_round_tz) {
-            emit_vop3a_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32, dst);
+            if (ctx->program->chip_class == GFX8 || ctx->program->chip_class == GFX9)
+               emit_vop3a_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32_e64, dst);
+            else
+               emit_vop2_instruction(ctx, instr, aco_opcode::v_cvt_pkrtz_f16_f32, dst, false);
          } else {
             Temp src0 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), get_alu_src(ctx, instr->src[0]));
             Temp src1 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), get_alu_src(ctx, instr->src[1]));
@@ -10343,7 +10349,7 @@
 
    case V_028714_SPI_SHADER_FP16_ABGR:
       enabled_channels = 0x5;
-      compr_op = aco_opcode::v_cvt_pkrtz_f16_f32;
+      compr_op = aco_opcode::v_cvt_pkrtz_f16_f32_e64;
       if (is_16bit) {
          if (ctx->options->chip_class >= GFX9) {
             /* Pack the FP16 values together instead of converting them to
diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py
index 22daeff..97be2ad 100644
--- a/src/amd/compiler/aco_opcodes.py
+++ b/src/amd/compiler/aco_opcodes.py
@@ -682,6 +682,7 @@
    (  -1,   -1,   -1,   -1, 0x2b, "v_fmac_f32", True),
    (  -1,   -1,   -1,   -1, 0x2c, "v_fmamk_f32", True),
    (  -1,   -1,   -1,   -1, 0x2d, "v_fmaak_f32", True),
+   (0x2f, 0x2f,   -1,   -1, 0x2f, "v_cvt_pkrtz_f16_f32", True),
    (  -1,   -1, 0x1f, 0x1f, 0x32, "v_add_f16", True),
    (  -1,   -1, 0x20, 0x20, 0x33, "v_sub_f16", True),
    (  -1,   -1, 0x21, 0x21, 0x34, "v_subrev_f16", True),
@@ -1051,7 +1052,7 @@
    (0x11e, 0x11e, 0x293, 0x293, 0x363, "v_bfm_b32", False, False),
    (0x12d, 0x12d, 0x294, 0x294, 0x368, "v_cvt_pknorm_i16_f32", True, False),
    (0x12e, 0x12e, 0x295, 0x295, 0x369, "v_cvt_pknorm_u16_f32", True, False),
-   (0x12f, 0x12f, 0x296, 0x296, 0x12f, "v_cvt_pkrtz_f16_f32", True, False), # GFX6_7_10 is VOP2 with opcode 0x02f
+   (0x12f, 0x12f, 0x296, 0x296, 0x12f, "v_cvt_pkrtz_f16_f32_e64", True, False), # GFX6_7_10 is VOP2 with opcode 0x02f
    (0x130, 0x130, 0x297, 0x297, 0x36a, "v_cvt_pk_u16_u32", False, False),
    (0x131, 0x131, 0x298, 0x298, 0x36b, "v_cvt_pk_i16_i32", False, False),
    (   -1,    -1,    -1, 0x299, 0x312, "v_cvt_pknorm_i16_f16", True, False),