[LLVM][TableGen] Parameterize NumToSkip in DecoderEmitter (#136456) - Add command line option `num-to-skip-size` to parameterize the size of `NumToSkip` bytes in the decoder table. Default value will be 2, and targets that need larger size can use 3. - Keep all existing targets, except AArch64, to use size 2, and change AArch64 to use size 3 since it run into the "disassembler decoding table too large" error with size 2. - Additional fixes on top of earlier revert: mark `decodeNumToSkip` as static (not necessary anymore as the generated code is now in anonymous namespace, but doing it for consistency) and incorporate Bazel build changes from https://github.com/llvm/llvm-project/pull/136212 - Following is a rough reduction in size for the decoder tables by switching to size 2. ``` Target Old Size New Size % Reduction ================================================ AArch64 153254 153254 0.00 AMDGPU 471566 412805 12.46 ARC 5724 5061 11.58 ARM 84936 73831 13.07 AVR 1497 1306 12.76 BPF 2172 1927 11.28 CSKY 10064 8692 13.63 Hexagon 47967 41965 12.51 Lanai 1108 982 11.37 LoongArch 24446 21621 11.56 MSP430 4200 3716 11.52 Mips 36330 31415 13.53 PPC 31897 28098 11.91 RISCV 37979 32790 13.66 Sparc 8331 7252 12.95 SystemZ 36722 32248 12.18 VE 48296 42873 11.23 XCore 2590 2316 10.58 Xtensa 3827 3316 13.35 ```

commit: e1bb7f6ddec37567230d3e46719aee5bcd268d5a [log] [tgz]
author: Rahul Joshi <rjoshi@nvidia.com> Mon Apr 21 08:15:08 2025 -0700
committer: GitHub <noreply@github.com> Mon Apr 21 08:15:08 2025 -0700
tree: 91fa11acfd1a9a0e871705ce2feb07e695085981
parent: cfc2b0d094f705aa0995eff0dc5f1faf1440a769 [diff]
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index 2300e47..583003f 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt

@@ -7,7 +7,8 @@
 tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
 tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv)
 tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
+tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler
+              --large-decoder-table)
 tablegen(LLVM AArch64GenFastISel.inc -gen-fast-isel)
 tablegen(LLVM AArch64GenGlobalISel.inc -gen-global-isel)
 tablegen(LLVM AArch64GenO0PreLegalizeGICombiner.inc -gen-global-isel-combiner

diff --git a/llvm/test/TableGen/VarLenDecoder.td b/llvm/test/TableGen/VarLenDecoder.td
index 5cf0bf8..0ca5c6c 100644
--- a/llvm/test/TableGen/VarLenDecoder.td
+++ b/llvm/test/TableGen/VarLenDecoder.td

@@ -1,4 +1,5 @@
-// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
+// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s --check-prefixes=CHECK,CHECK-SMALL
+// RUN: llvm-tblgen -gen-disassembler --large-decoder-table -I %p/../../include %s | FileCheck %s --check-prefixes=CHECK,CHECK-LARGE
 
 include "llvm/Target/Target.td"
 
@@ -46,12 +47,19 @@
   );
 }
 
-// CHECK:      MCD::OPC_ExtractField, 3, 5,  // Inst{7-3} ...
-// CHECK-NEXT: MCD::OPC_FilterValue, 8, 4, 0, 0, // Skip to: 12
-// CHECK-NEXT: MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 0, // Opcode: FOO16
-// CHECK-NEXT: MCD::OPC_FilterValue, 9, 4, 0, 0, // Skip to: 21
-// CHECK-NEXT: MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 1, // Opcode: FOO32
-// CHECK-NEXT: MCD::OPC_Fail,
+// CHECK-SMALL:      MCD::OPC_ExtractField, 3, 5,  // Inst{7-3} ...
+// CHECK-SMALL-NEXT: MCD::OPC_FilterValue, 8, 4, 0, // Skip to: 11
+// CHECK-SMALL-NEXT: MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 0, // Opcode: FOO16
+// CHECK-SMALL-NEXT: MCD::OPC_FilterValue, 9, 4, 0, // Skip to: 19
+// CHECK-SMALL-NEXT: MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 1, // Opcode: FOO32
+// CHECK-SMALL-NEXT: MCD::OPC_Fail,
+
+// CHECK-LARGE:      /* 0 */       MCD::OPC_ExtractField, 3, 5,  // Inst{7-3} ...
+// CHECK-LARGE-NEXT: /* 3 */       MCD::OPC_FilterValue, 8, 4, 0, 0, // Skip to: 12
+// CHECK-LARGE-NEXT: /* 8 */       MCD::OPC_Decode, 178, 2, 0, // Opcode: FOO16
+// CHECK-LARGE-NEXT: /* 12 */      MCD::OPC_FilterValue, 9, 4, 0, 0, // Skip to: 21
+// CHECK-LARGE-NEXT: /* 17 */      MCD::OPC_Decode, 179, 2, 1, // Opcode: FOO32
+// CHECK-LARGE-NEXT: /* 21 */      MCD::OPC_Fail,
 
 // Instruction length table
 // CHECK: 27,

diff --git a/llvm/test/TableGen/trydecode-emission.td b/llvm/test/TableGen/trydecode-emission.td
index 20d2446..ebd7169 100644
--- a/llvm/test/TableGen/trydecode-emission.td
+++ b/llvm/test/TableGen/trydecode-emission.td

@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
+// RUN: llvm-tblgen -gen-disassembler --large-decoder-table -I %p/../../include %s | FileCheck %s --check-prefix=CHECK-LARGE
 
 // Check that if decoding of an instruction fails and the instruction does not
 // have a complete decoder method that can determine if the bitpattern is valid
@@ -34,10 +35,28 @@
 }
 
 // CHECK:      /* 0 */       MCD::OPC_ExtractField, 4, 4,  // Inst{7-4} ...
-// CHECK-NEXT: /* 3 */       MCD::OPC_FilterValue, 0, 18, 0, 0, // Skip to: 26
-// CHECK-NEXT: /* 8 */       MCD::OPC_CheckField, 2, 2, 0, 7, 0, 0, // Skip to: 22
-// CHECK-NEXT: /* 15 */      MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 0, 0, 0, 0, // Opcode: InstB, skip to: 22
-// CHECK-NEXT: /* 22 */      MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 1, // Opcode: InstA
-// CHECK-NEXT: /* 26 */      MCD::OPC_Fail,
+// CHECK-NEXT: /* 3 */       MCD::OPC_FilterValue, 0, 16, 0, // Skip to: 23
+// CHECK-NEXT: /* 7 */       MCD::OPC_CheckField, 2, 2, 0, 6, 0, // Skip to: 19
+// CHECK-NEXT: /* 13 */      MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 0, 0, 0, // Opcode: InstB, skip to: 19
+// CHECK-NEXT: /* 19 */      MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 1, // Opcode: InstA
+// CHECK-NEXT: /* 23 */      MCD::OPC_Fail,
 
 // CHECK: if (!Check(S, DecodeInstB(MI, insn, Address, Decoder))) { DecodeComplete = false; return MCDisassembler::Fail; }
+
+// CHECK:       unsigned NumToSkip = *Ptr++;
+// CHECK-NEXT:  NumToSkip |= (*Ptr++) << 8;
+// CHECK-NEXT:  return NumToSkip;
+
+// CHECK-LARGE:      /* 0 */       MCD::OPC_ExtractField, 4, 4,  // Inst{7-4} ...
+// CHECK-LARGE-NEXT: /* 3 */       MCD::OPC_FilterValue, 0, 18, 0, 0, // Skip to: 26
+// CHECK-LARGE-NEXT: /* 8 */       MCD::OPC_CheckField, 2, 2, 0, 7, 0, 0, // Skip to: 22
+// CHECK-LARGE-NEXT: /* 15 */      MCD::OPC_TryDecode, 179, 2, 0, 0, 0, 0, // Opcode: InstB, skip to: 22
+// CHECK-LARGE-NEXT: /* 22 */      MCD::OPC_Decode, 178, 2, 1, // Opcode: InstA
+// CHECK-LARGE-NEXT: /* 26 */      MCD::OPC_Fail,
+
+// CHECK-LARGE: if (!Check(S, DecodeInstB(MI, insn, Address, Decoder))) { DecodeComplete = false; return MCDisassembler::Fail; }
+
+// CHECK-LARGE:       unsigned NumToSkip = *Ptr++;
+// CHECK-LARGE-NEXT:  NumToSkip |= (*Ptr++) << 8;
+// CHECK-LARGE-NEXT:  NumToSkip |= (*Ptr++) << 16;
+// CHECK-LARGE-NEXT:  return NumToSkip;

diff --git a/llvm/test/TableGen/trydecode-emission2.td b/llvm/test/TableGen/trydecode-emission2.td
index 0584034..f455c63 100644
--- a/llvm/test/TableGen/trydecode-emission2.td
+++ b/llvm/test/TableGen/trydecode-emission2.td

@@ -1,4 +1,5 @@
 // RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
+// RUN: llvm-tblgen -gen-disassembler --large-decoder-table -I %p/../../include %s | FileCheck %s --check-prefix=CHECK-LARGE
 
 include "llvm/Target/Target.td"
 
@@ -31,14 +32,27 @@
 }
 
 // CHECK:      /* 0 */       MCD::OPC_ExtractField, 2, 1,  // Inst{2} ...
-// CHECK-NEXT: /* 3 */       MCD::OPC_FilterValue, 0, 36, 0, 0, // Skip to: 44
-// CHECK-NEXT: /* 8 */       MCD::OPC_ExtractField, 5, 3,  // Inst{7-5} ...
-// CHECK-NEXT: /* 11 */      MCD::OPC_FilterValue, 0, 28, 0, 0, // Skip to: 44
-// CHECK-NEXT: /* 16 */      MCD::OPC_CheckField, 0, 2, 3, 7, 0, 0, // Skip to: 30
-// CHECK-NEXT: /* 23 */      MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 0, 0, 0, 0, // Opcode: InstB, skip to: 30
-// CHECK-NEXT: /* 30 */      MCD::OPC_CheckField, 3, 2, 0, 7, 0, 0, // Skip to: 44
-// CHECK-NEXT: /* 37 */      MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 1, 0, 0, 0, // Opcode: InstA, skip to: 44
-// CHECK-NEXT: /* 44 */      MCD::OPC_Fail,
+// CHECK-NEXT: /* 3 */       MCD::OPC_FilterValue, 0, 31, 0, // Skip to: 38
+// CHECK-NEXT: /* 7 */       MCD::OPC_ExtractField, 5, 3,  // Inst{7-5} ...
+// CHECK-NEXT: /* 10 */      MCD::OPC_FilterValue, 0, 24, 0, // Skip to: 38
+// CHECK-NEXT: /* 14 */      MCD::OPC_CheckField, 0, 2, 3, 6, 0, // Skip to: 26
+// CHECK-NEXT: /* 20 */      MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 0, 0, 0, // Opcode: InstB, skip to: 26
+// CHECK-NEXT: /* 26 */      MCD::OPC_CheckField, 3, 2, 0, 6, 0, // Skip to: 38
+// CHECK-NEXT: /* 32 */      MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 1, 0, 0, // Opcode: InstA, skip to: 38
+// CHECK-NEXT: /* 38 */      MCD::OPC_Fail,
 
 // CHECK: if (!Check(S, DecodeInstB(MI, insn, Address, Decoder))) { DecodeComplete = false; return MCDisassembler::Fail; }
 // CHECK: if (!Check(S, DecodeInstA(MI, insn, Address, Decoder))) { DecodeComplete = false; return MCDisassembler::Fail; }
+
+// CHECK-LARGE:      /* 0 */       MCD::OPC_ExtractField, 2, 1,  // Inst{2} ...
+// CHECK-LARGE-NEXT: /* 3 */       MCD::OPC_FilterValue, 0, 36, 0, 0, // Skip to: 44
+// CHECK-LARGE-NEXT: /* 8 */       MCD::OPC_ExtractField, 5, 3,  // Inst{7-5} ...
+// CHECK-LARGE-NEXT: /* 11 */      MCD::OPC_FilterValue, 0, 28, 0, 0, // Skip to: 44
+// CHECK-LARGE-NEXT: /* 16 */      MCD::OPC_CheckField, 0, 2, 3, 7, 0, 0, // Skip to: 30
+// CHECK-LARGE-NEXT: /* 23 */      MCD::OPC_TryDecode, 179, 2, 0, 0, 0, 0, // Opcode: InstB, skip to: 30
+// CHECK-LARGE-NEXT: /* 30 */      MCD::OPC_CheckField, 3, 2, 0, 7, 0, 0, // Skip to: 44
+// CHECK-LARGE-NEXT: /* 37 */      MCD::OPC_TryDecode, 178, 2, 1, 0, 0, 0, // Opcode: InstA, skip to: 44
+// CHECK-LARGE-NEXT: /* 44 */      MCD::OPC_Fail,
+
+// CHECK-LARGE: if (!Check(S, DecodeInstB(MI, insn, Address, Decoder))) { DecodeComplete = false; return MCDisassembler::Fail; }
+// CHECK-LARGE: if (!Check(S, DecodeInstA(MI, insn, Address, Decoder))) { DecodeComplete = false; return MCDisassembler::Fail; }

diff --git a/llvm/test/TableGen/trydecode-emission3.td b/llvm/test/TableGen/trydecode-emission3.td
index 4c5be7e..acd61a1 100644
--- a/llvm/test/TableGen/trydecode-emission3.td
+++ b/llvm/test/TableGen/trydecode-emission3.td

@@ -1,4 +1,5 @@
-// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
+// RUN: llvm-tblgen -gen-disassembler  -I %p/../../include %s | FileCheck %s
+// RUN: llvm-tblgen -gen-disassembler --large-decoder-table -I %p/../../include %s | FileCheck %s --check-prefix=CHECK-LARGE
 
 include "llvm/Target/Target.td"
 
@@ -35,10 +36,19 @@
 }
 
 // CHECK:      /* 0 */       MCD::OPC_ExtractField, 4, 4,  // Inst{7-4} ...
-// CHECK-NEXT: /* 3 */       MCD::OPC_FilterValue, 0, 18, 0, 0, // Skip to: 26
-// CHECK-NEXT: /* 8 */       MCD::OPC_CheckField, 2, 2, 0, 7, 0, 0, // Skip to: 22
-// CHECK-NEXT: /* 15 */      MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 0, 0, 0, 0, // Opcode: InstB, skip to: 22
-// CHECK-NEXT: /* 22 */      MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 1, // Opcode: InstA
-// CHECK-NEXT: /* 26 */      MCD::OPC_Fail,
+// CHECK-NEXT: /* 3 */       MCD::OPC_FilterValue, 0, 16, 0, // Skip to: 23
+// CHECK-NEXT: /* 7 */       MCD::OPC_CheckField, 2, 2, 0, 6, 0, // Skip to: 19
+// CHECK-NEXT: /* 13 */      MCD::OPC_TryDecode, 179, 2, 0, 0, 0, // Opcode: InstB, skip to: 19
+// CHECK-NEXT: /* 19 */      MCD::OPC_Decode, 178, 2, 1, // Opcode: InstA
+// CHECK-NEXT: /* 23 */      MCD::OPC_Fail,
 
 // CHECK: if (!Check(S, DecodeInstBOp(MI, tmp, Address, Decoder))) { DecodeComplete = false; return MCDisassembler::Fail; }
+
+// CHECK-LARGE:      /* 0 */       MCD::OPC_ExtractField, 4, 4,  // Inst{7-4} ...
+// CHECK-LARGE-NEXT: /* 3 */       MCD::OPC_FilterValue, 0, 18, 0, 0, // Skip to: 26
+// CHECK-LARGE-NEXT: /* 8 */       MCD::OPC_CheckField, 2, 2, 0, 7, 0, 0, // Skip to: 22
+// CHECK-LARGE-NEXT: /* 15 */      MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 0, 0, 0, 0, // Opcode: InstB, skip to: 22
+// CHECK-LARGE-NEXT: /* 22 */      MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 1, // Opcode: InstA
+// CHECK-LARGE-NEXT: /* 26 */      MCD::OPC_Fail,
+
+// CHECK-LARGE: if (!Check(S, DecodeInstBOp(MI, tmp, Address, Decoder))) { DecodeComplete = false; return MCDisassembler::Fail; }

diff --git a/llvm/test/TableGen/trydecode-emission4.td b/llvm/test/TableGen/trydecode-emission4.td
index 1e51ba5..57eec0f 100644
--- a/llvm/test/TableGen/trydecode-emission4.td
+++ b/llvm/test/TableGen/trydecode-emission4.td

@@ -1,4 +1,5 @@
-// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
+// RUN: llvm-tblgen -gen-disassembler  -I %p/../../include %s | FileCheck %s
+// RUN: llvm-tblgen -gen-disassembler --large-decoder-table -I %p/../../include %s | FileCheck %s --check-prefix=CHECK-LARGE
 
 // Test for OPC_ExtractField/OPC_CheckField with start bit > 255.
 // These large start values may arise for architectures with long instruction
@@ -32,13 +33,22 @@
   let hasCompleteDecoder = 0;
 }
 
-
 // CHECK:      /* 0 */       MCD::OPC_ExtractField, 250, 3, 4,  // Inst{509-506} ...
-// CHECK-NEXT: /* 4 */       MCD::OPC_FilterValue, 0, 19, 0, 0, // Skip to: 28
-// CHECK-NEXT: /* 9 */       MCD::OPC_CheckField, 248, 3, 2, 0, 7, 0, 0, // Skip to: 24
-// CHECK-NEXT: /* 17 */      MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 0, 0, 0, 0, // Opcode: InstB, skip to: 24
-// CHECK-NEXT: /* 24 */      MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 1, // Opcode: InstA
-// CHECK-NEXT: /* 28 */      MCD::OPC_Fail,
+// CHECK-NEXT: /* 4 */       MCD::OPC_FilterValue, 0, 17, 0, // Skip to: 25
+// CHECK-NEXT: /* 8 */       MCD::OPC_CheckField, 248, 3, 2, 0, 6, 0, // Skip to: 21
+// CHECK-NEXT: /* 15 */      MCD::OPC_TryDecode, 179, 2, 0, 0, 0, // Opcode: InstB, skip to: 21
+// CHECK-NEXT: /* 21 */      MCD::OPC_Decode, 178, 2, 1, // Opcode: InstA
+// CHECK-NEXT: /* 25 */      MCD::OPC_Fail,
 
 // CHECK: if (!Check(S, DecodeInstB(MI, insn, Address, Decoder))) { DecodeComplete = false; return MCDisassembler::Fail; }
 
+
+// CHECK-LARGE:      /* 0 */       MCD::OPC_ExtractField, 250, 3, 4,  // Inst{509-506} ...
+// CHECK-LARGE-NEXT: /* 4 */       MCD::OPC_FilterValue, 0, 19, 0, 0, // Skip to: 28
+// CHECK-LARGE-NEXT: /* 9 */       MCD::OPC_CheckField, 248, 3, 2, 0, 7, 0, 0, // Skip to: 24
+// CHECK-LARGE-NEXT: /* 17 */      MCD::OPC_TryDecode, {{[0-9]+}}, {{[0-9]+}}, 0, 0, 0, 0, // Opcode: InstB, skip to: 24
+// CHECK-LARGE-NEXT: /* 24 */      MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 1, // Opcode: InstA
+// CHECK-LARGE-NEXT: /* 28 */      MCD::OPC_Fail,
+
+// CHECK-LARGE: if (!Check(S, DecodeInstB(MI, insn, Address, Decoder))) { DecodeComplete = false; return MCDisassembler::Fail; }
+

diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 7d63126..ba36033 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp

@@ -32,8 +32,10 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/LEB128.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
@@ -76,6 +78,12 @@
             "significantly reducing Table Duplications")),
     cl::init(SUPPRESSION_DISABLE), cl::cat(DisassemblerEmitterCat));
 
+static cl::opt<bool> LargeTable(
+    "large-decoder-table",
+    cl::desc("Use large decoder table format. This uses 24 bits for offset\n"
+             "in the table instead of the default 16 bits."),
+    cl::init(false), cl::cat(DisassemblerEmitterCat));
+
 STATISTIC(NumEncodings, "Number of encodings considered");
 STATISTIC(NumEncodingsLackingDisasm,
           "Number of encodings without disassembler info");
@@ -85,6 +93,8 @@
 
 namespace {
 
+unsigned getNumToSkipInBytes() { return LargeTable ? 3 : 2; }
+
 struct EncodingField {
   unsigned Base, Width, Offset;
   EncodingField(unsigned B, unsigned W, unsigned O)
@@ -130,10 +140,29 @@
   // in the table for patching.
   size_t insertNumToSkip() {
     size_t Size = size();
-    insert(end(), 3, 0);
+    insert(end(), getNumToSkipInBytes(), 0);
     return Size;
   }
+
+  void patchNumToSkip(size_t FixupIdx, uint32_t DestIdx) {
+    // Calculate the distance from the byte following the fixup entry byte
+    // to the destination. The Target is calculated from after the
+    // `getNumToSkipInBytes()`-byte NumToSkip entry itself, so subtract
+    // `getNumToSkipInBytes()` from the displacement here to account for that.
+    assert(DestIdx >= FixupIdx + getNumToSkipInBytes() &&
+           "Expecting a forward jump in the decoding table");
+    uint32_t Delta = DestIdx - FixupIdx - getNumToSkipInBytes();
+    if (!isUIntN(8 * getNumToSkipInBytes(), Delta))
+      PrintFatalError(
+          "disassembler decoding table too large, try --large-decoder-table");
+
+    (*this)[FixupIdx] = static_cast<uint8_t>(Delta);
+    (*this)[FixupIdx + 1] = static_cast<uint8_t>(Delta >> 8);
+    if (getNumToSkipInBytes() == 3)
+      (*this)[FixupIdx + 2] = static_cast<uint8_t>(Delta >> 16);
+  }
 };
+
 struct DecoderTableInfo {
   DecoderTable Table;
   FixupScopeList FixupStack;
@@ -690,19 +719,8 @@
                                uint32_t DestIdx) {
   // Any NumToSkip fixups in the current scope can resolve to the
   // current location.
-  for (uint32_t FixupIdx : reverse(Fixups)) {
-    // Calculate the distance from the byte following the fixup entry byte
-    // to the destination. The Target is calculated from after the 24-bit
-    // NumToSkip entry itself, so subtract three from the displacement here
-    // to account for that.
-    uint32_t Delta = DestIdx - FixupIdx - 3;
-    // Our NumToSkip entries are 24-bits. Make sure our table isn't too
-    // big.
-    assert(isUInt<24>(Delta));
-    Table[FixupIdx] = (uint8_t)Delta;
-    Table[FixupIdx + 1] = (uint8_t)(Delta >> 8);
-    Table[FixupIdx + 2] = (uint8_t)(Delta >> 16);
-  }
+  for (uint32_t FixupIdx : Fixups)
+    Table.patchNumToSkip(FixupIdx, DestIdx);
 }
 
 // Emit table entries to decode instructions given a segment or segments
@@ -759,15 +777,9 @@
     Delegate->emitTableEntries(TableInfo);
 
     // Now that we've emitted the body of the handler, update the NumToSkip
-    // of the filter itself to be able to skip forward when false. Subtract
-    // three as to account for the width of the NumToSkip field itself.
-    if (PrevFilter) {
-      uint32_t NumToSkip = Table.size() - PrevFilter - 3;
-      assert(isUInt<24>(NumToSkip) && "disassembler decoding table too large!");
-      Table[PrevFilter] = (uint8_t)NumToSkip;
-      Table[PrevFilter + 1] = (uint8_t)(NumToSkip >> 8);
-      Table[PrevFilter + 2] = (uint8_t)(NumToSkip >> 16);
-    }
+    // of the filter itself to be able to skip forward when false.
+    if (PrevFilter)
+      Table.patchNumToSkip(PrevFilter, Table.size());
   }
 
   // If there is no fallthrough, then the final filter should get fixed
@@ -814,7 +826,8 @@
     OS << (unsigned)*I++ << ", ";
   };
 
-  // Emit 24-bit numtoskip value to OS, returning the NumToSkip value.
+  // Emit `getNumToSkipInBytes()`-byte numtoskip value to OS, returning the
+  // NumToSkip value.
   auto emitNumToSkip = [](DecoderTable::const_iterator &I,
                           formatted_raw_ostream &OS) {
     uint8_t Byte = *I++;
@@ -823,9 +836,11 @@
     Byte = *I++;
     OS << (unsigned)Byte << ", ";
     NumToSkip |= Byte << 8;
-    Byte = *I++;
-    OS << (unsigned)(Byte) << ", ";
-    NumToSkip |= Byte << 16;
+    if (getNumToSkipInBytes() == 3) {
+      Byte = *I++;
+      OS << (unsigned)(Byte) << ", ";
+      NumToSkip |= Byte << 16;
+    }
     return NumToSkip;
   };
 
@@ -866,7 +881,6 @@
       // The filter value is ULEB128 encoded.
       emitULEB128(I, OS);
 
-      // 24-bit numtoskip value.
       uint32_t NumToSkip = emitNumToSkip(I, OS);
       OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
       break;
@@ -881,7 +895,6 @@
       // ULEB128 encoded field value.
       emitULEB128(I, OS);
 
-      // 24-bit numtoskip value.
       uint32_t NumToSkip = emitNumToSkip(I, OS);
       OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
       break;
@@ -890,7 +903,6 @@
       OS << Indent << "MCD::OPC_CheckPredicate, ";
       emitULEB128(I, OS);
 
-      // 24-bit numtoskip value.
       uint32_t NumToSkip = emitNumToSkip(I, OS);
       OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
       break;
@@ -920,7 +932,6 @@
 
       // Fallthrough for OPC_TryDecode.
 
-      // 24-bit numtoskip value.
       uint32_t NumToSkip = emitNumToSkip(I, OS);
 
       OS << "// Opcode: " << NumberedEncodings[EncodingID]
@@ -1392,9 +1403,8 @@
     TableInfo.Table.push_back(NumBits);
     TableInfo.Table.insertULEB128(Ilnd.FieldVal);
 
-    // The fixup is always 24-bits, so go ahead and allocate the space
-    // in the table so all our relative position calculations work OK even
-    // before we fully resolve the real value here.
+    // Allocate space in the table for fixup so all our relative position
+    // calculations work OK even before we fully resolve the real value here.
 
     // Push location for NumToSkip backpatching.
     TableInfo.FixupStack.back().push_back(TableInfo.Table.insertNumToSkip());
@@ -2139,6 +2149,15 @@
 static void emitDecodeInstruction(formatted_raw_ostream &OS,
                                   bool IsVarLenInst) {
   OS << R"(
+static unsigned decodeNumToSkip(const uint8_t *&Ptr) {
+  unsigned NumToSkip = *Ptr++;
+  NumToSkip |= (*Ptr++) << 8;
+)";
+  if (getNumToSkipInBytes() == 3)
+    OS << "  NumToSkip |= (*Ptr++) << 16;\n";
+  OS << R"(  return NumToSkip;
+}
+
 template <typename InsnType>
 static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
                                       InsnType insn, uint64_t Address,
@@ -2176,10 +2195,7 @@
       // Decode the field value.
       uint64_t Val = decodeULEB128AndIncUnsafe(Ptr);
       bool Failed = Val != CurFieldValue;
-      // NumToSkip is a plain 24-bit integer.
-      unsigned NumToSkip = *Ptr++;
-      NumToSkip |= (*Ptr++) << 8;
-      NumToSkip |= (*Ptr++) << 16;
+      unsigned NumToSkip = decodeNumToSkip(Ptr);
 
       // Perform the filter operation.
       if (Failed)
@@ -2203,10 +2219,7 @@
       uint64_t ExpectedValue = decodeULEB128(++Ptr, &PtrLen);
       Ptr += PtrLen;
       bool Failed = ExpectedValue != FieldValue;
-      // NumToSkip is a plain 24-bit integer.
-      unsigned NumToSkip = *Ptr++;
-      NumToSkip |= (*Ptr++) << 8;
-      NumToSkip |= (*Ptr++) << 16;
+      unsigned NumToSkip = decodeNumToSkip(Ptr);
 
       // If the actual and expected values don't match, skip.
       if (Failed)
@@ -2221,10 +2234,7 @@
     case MCD::OPC_CheckPredicate: {
       // Decode the Predicate Index value.
       unsigned PIdx = decodeULEB128AndIncUnsafe(Ptr);
-      // NumToSkip is a plain 24-bit integer.
-      unsigned NumToSkip = *Ptr++;
-      NumToSkip |= (*Ptr++) << 8;
-      NumToSkip |= (*Ptr++) << 16;
+      unsigned NumToSkip = decodeNumToSkip(Ptr);
       // Check the predicate.
       bool Failed = !checkDecoderPredicate(PIdx, Bits);
       if (Failed)
@@ -2259,10 +2269,7 @@
       // Decode the Opcode value.
       unsigned Opc = decodeULEB128AndIncUnsafe(Ptr);
       unsigned DecodeIdx = decodeULEB128AndIncUnsafe(Ptr);
-      // NumToSkip is a plain 24-bit integer.
-      unsigned NumToSkip = *Ptr++;
-      NumToSkip |= (*Ptr++) << 8;
-      NumToSkip |= (*Ptr++) << 16;
+      unsigned NumToSkip = decodeNumToSkip(Ptr);
 
       // Perform the decode operation.
       MCInst TmpMI;

diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
index d2cb341..978f9e6 100644
--- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel

@@ -2149,7 +2149,7 @@
                 "lib/Target/AArch64/AArch64GenSubtargetInfo.inc",
             ),
             (
-                ["-gen-disassembler"],
+                ["-gen-disassembler", "--large-decoder-table"],
                 "lib/Target/AArch64/AArch64GenDisassemblerTables.inc",
             ),
             (
commit	e1bb7f6ddec37567230d3e46719aee5bcd268d5a	[log] [tgz]
author	Rahul Joshi <rjoshi@nvidia.com>	Mon Apr 21 08:15:08 2025 -0700
committer	GitHub <noreply@github.com>	Mon Apr 21 08:15:08 2025 -0700
tree	91fa11acfd1a9a0e871705ce2feb07e695085981
parent	cfc2b0d094f705aa0995eff0dc5f1faf1440a769 [diff]