diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index 5849344bcb0b5..306988cd0fc53 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -803,6 +803,8 @@ class ListInit final : public TypedInit,
   size_t size() const { return NumElements; }
   bool empty() const { return NumElements == 0; }
 
+  std::vector<int64_t> getAsListOfInts() const;
+
   const Init *getBit(unsigned Bit) const override {
     llvm_unreachable("Illegal bit reference off list");
   }
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index ce9a2b2751968..643441bec4268 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -1137,6 +1137,15 @@ class OptionalDefOperand<ValueType ty, dag OpTypes, dag defaultops>
   let MIOperandInfo = OpTypes;
 }
 
+// InstrDecoderOption - This class is used to provide some options to the
+// TableGen DecoderEmitter backend.
+class InstrDecoderOption<string ty, list<int> bws> {
+  string CPPType = ty;       // C++ type for generating non-templated code.
+  list<int> Bitwidths = bws; // List of bitwidths supported by the above type.
+
+  assert !not(!empty(CPPType)), "CPP type cannot be empty";
+  assert !not(!empty(Bitwidths)), "Bitwidths cannot be empty";
+}
 
 // InstrInfo - This class should only be instantiated once to provide parameters
 // which are global to the target machine.
@@ -1158,6 +1167,17 @@ class InstrInfo {
   //
   // This option is a temporary migration help. It will go away.
   bit guessInstructionProperties = true;
+
+  // This is a list of instruction decoder options for this target. When non
+  // empty, it should list all the C++ types and associated bitwidths that this
+  // target intends to use to call the TableGen generated `decodeInstruction`
+  // function. If this list is empty, the decoder emitter will generate
+  // templated code. However, if a target intends to call this function with
+  // more than one `InsnType`, it may be beneficial to provide these decoder
+  // options to generate non-templated form of `decodeInstruction` and
+  // associated helper functions and avoid some code duplication in the
+  // `decodeToMCInst` function.
+  list<InstrDecoderOption> DecoderOptions = [];
 }
 
 // Standard Pseudo Instructions.
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 7f2ed77a74099..69f6fa6edb49b 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -804,6 +804,15 @@ std::string ListInit::getAsString() const {
   return Result + "]";
 }
 
+std::vector<int64_t> ListInit::getAsListOfInts() const {
+  if (!isa<IntRecTy>(getElementType()))
+    PrintFatalError("List does not contain integer values");
+  std::vector<int64_t> Ints;
+  for (const Init *I : getElements())
+    Ints.push_back(cast<IntInit>(I)->getValue());
+  return Ints;
+}
+
 const Init *OpInit::getBit(unsigned Bit) const {
   if (getType() == BitRecTy::get(getRecordKeeper()))
     return this;
@@ -3119,32 +3128,26 @@ int64_t Record::getValueAsInt(StringRef FieldName) const {
 std::vector<int64_t>
 Record::getValueAsListOfInts(StringRef FieldName) const {
   const ListInit *List = getValueAsListInit(FieldName);
-  std::vector<int64_t> Ints;
-  for (const Init *I : List->getElements()) {
-    if (const auto *II = dyn_cast<IntInit>(I))
-      Ints.push_back(II->getValue());
-    else
-      PrintFatalError(getLoc(),
-                      Twine("Record `") + getName() + "', field `" + FieldName +
-                          "' exists but does not have a list of ints value: " +
-                          I->getAsString());
-  }
-  return Ints;
+  if (!isa<IntRecTy>(List->getElementType()))
+    PrintFatalError(getLoc(),
+                    Twine("Record `") + getName() + "', field `" + FieldName +
+                        "' exists but does not have a list of ints value: " +
+                        List->getAsString());
+  return List->getAsListOfInts();
 }
 
 std::vector<StringRef>
 Record::getValueAsListOfStrings(StringRef FieldName) const {
   const ListInit *List = getValueAsListInit(FieldName);
+  if (!isa<StringRecTy>(List->getElementType()))
+    PrintFatalError(getLoc(),
+                    Twine("Record `") + getName() + "', field `" + FieldName +
+                        "' exists but does not have a list of string value: " +
+                        List->getAsString());
+
   std::vector<StringRef> Strings;
-  for (const Init *I : List->getElements()) {
-    if (const auto *SI = dyn_cast<StringInit>(I))
-      Strings.push_back(SI->getValue());
-    else
-      PrintFatalError(getLoc(),
-                      Twine("Record `") + getName() + "', field `" + FieldName +
-                          "' exists but does not have a list of strings value: " +
-                          I->getAsString());
-  }
+  for (const Init *I : List->getElements())
+    Strings.push_back(cast<StringInit>(I)->getValue());
   return Strings;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 1a1c32fba9d18..0ce66f7b88cb1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1973,6 +1973,13 @@ def FeatureISAVersion12_Generic: FeatureSet<
 
 def AMDGPUInstrInfo : InstrInfo {
   let guessInstructionProperties = 1;
+
+  // Opt-in into non-templated code for instruction decoder.
+  let DecoderOptions = [
+    InstrDecoderOption<"uint32_t", [32]>,
+    InstrDecoderOption<"uint64_t", [64]>,
+    InstrDecoderOption<"DecoderUInt128", [96, 128]>,
+  ];
 }
 
 def AMDGPUAsmParser : AsmParser {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 59c72fcbff18a..cd582de176c25 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -591,7 +591,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
 
     // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
     // encodings
-    if (isGFX11Plus() && Bytes.size() >= 12 ) {
+    if (isGFX11Plus() && Bytes.size() >= 12) {
       DecoderUInt128 DecW = eat12Bytes(Bytes);
 
       if (isGFX11() &&
diff --git a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
index 4ec18fe6bf544..ee453cfc0924e 100644
--- a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
+++ b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
@@ -111,13 +111,13 @@ static DecodeStatus DecodeFPCSCRegisterClass(MCInst &Inst, uint64_t RegNo,
 }
 #define DecodeFPICRegisterClass DecodeFPCSCRegisterClass
 
-static DecodeStatus DecodeCCRCRegisterClass(MCInst &Inst, APInt &Insn,
+static DecodeStatus DecodeCCRCRegisterClass(MCInst &Inst, const APInt &Insn,
                                             uint64_t Address,
                                             const void *Decoder) {
   llvm_unreachable("unimplemented");
 }
 
-static DecodeStatus DecodeSRCRegisterClass(MCInst &Inst, APInt &Insn,
+static DecodeStatus DecodeSRCRegisterClass(MCInst &Inst, const APInt &Insn,
                                            uint64_t Address,
                                            const void *Decoder) {
   llvm_unreachable("unimplemented");
diff --git a/llvm/test/TableGen/DecoderEmitterFnTable.td b/llvm/test/TableGen/DecoderEmitterFnTable.td
index 7bed18c19a513..837ec3bdc5c84 100644
--- a/llvm/test/TableGen/DecoderEmitterFnTable.td
+++ b/llvm/test/TableGen/DecoderEmitterFnTable.td
@@ -71,11 +71,11 @@ def Inst3 : TestInstruction {
   let AsmString = "Inst3";
 }
 
-// CHECK-LABEL: DecodeStatus decodeFn0(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete)
-// CHECK-LABEL: DecodeStatus decodeFn1(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete)
-// CHECK-LABEL: DecodeStatus decodeFn2(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete)
-// CHECK-LABEL: DecodeStatus decodeFn3(DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete)
-// CHECK-LABEL: decodeToMCInst(unsigned Idx, DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete)
+// CHECK-LABEL: DecodeStatus decodeFn0(DecodeStatus S, const InsnType &insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete)
+// CHECK-LABEL: DecodeStatus decodeFn1(DecodeStatus S, const InsnType &insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete)
+// CHECK-LABEL: DecodeStatus decodeFn2(DecodeStatus S, const InsnType &insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete)
+// CHECK-LABEL: DecodeStatus decodeFn3(DecodeStatus S, const InsnType &insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete)
+// CHECK-LABEL: decodeToMCInst(unsigned Idx, DecodeStatus S, const InsnType &insn, MCInst &MI, uint64_t Address, const MCDisassembler *Decoder, bool &DecodeComplete)
 // CHECK: static constexpr DecodeFnTy decodeFnTable[]
 // CHECK-NEXT: decodeFn0,
 // CHECK-NEXT: decodeFn1,
diff --git a/llvm/test/TableGen/VarLenDecoder.td b/llvm/test/TableGen/VarLenDecoder.td
index 06ff62294a196..d438e64264cbd 100644
--- a/llvm/test/TableGen/VarLenDecoder.td
+++ b/llvm/test/TableGen/VarLenDecoder.td
@@ -47,6 +47,12 @@ def FOO32 : MyVarInst<MemOp32> {
   );
 }
 
+// Instruction length table
+// CHECK-LABEL: InstrLenTable
+// CHECK: 27,
+// CHECK-NEXT: 43,
+// CHECK-NEXT: };
+
 // CHECK-SMALL:      /* 0 */       MCD::OPC_ExtractField, 3, 5,  // Inst{7-3} ...
 // CHECK-SMALL-NEXT: /* 3 */       MCD::OPC_FilterValue, 8, 4, 0, // Skip to: 11
 // CHECK-SMALL-NEXT: /* 7 */       MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 0, // Opcode: FOO16
@@ -61,11 +67,7 @@ def FOO32 : MyVarInst<MemOp32> {
 // CHECK-LARGE-NEXT: /* 14 */      MCD::OPC_Decode, {{[0-9]+}}, {{[0-9]+}}, 1, // Opcode: FOO32
 // CHECK-LARGE-NEXT: /* 18 */      MCD::OPC_Fail,
 
-// Instruction length table
-// CHECK: 27,
-// CHECK-NEXT: 43,
-// CHECK-NEXT: };
-
+// CHECK-LABEL: decodeToMCInst
 // CHECK:      case 0:
 // CHECK-NEXT: tmp = fieldFromInstruction(insn, 8, 3);
 // CHECK-NEXT: if (!Check(S, DecodeRegClassRegisterClass(MI, tmp, Address, Decoder))) { return MCDisassembler::Fail; }
@@ -85,6 +87,7 @@ def FOO32 : MyVarInst<MemOp32> {
 // CHECK-NEXT: MI.addOperand(MCOperand::createImm(tmp));
 // CHECK-NEXT: return S;
 
+// CHECK-LABEL: decodeInstruction
 // CHECK-LABEL: case MCD::OPC_ExtractField: {
 // CHECK: makeUp(insn, Start + Len);
 
diff --git a/llvm/test/TableGen/trydecode-emission.td b/llvm/test/TableGen/trydecode-emission.td
index c3178dd71cf4b..8e427c8a7107b 100644
--- a/llvm/test/TableGen/trydecode-emission.td
+++ b/llvm/test/TableGen/trydecode-emission.td
@@ -34,6 +34,17 @@ def InstB : TestInstruction {
   let hasCompleteDecoder = 0;
 }
 
+// CHECK-LABEL: decodeNumToSkip
+// CHECK-NEXT:  unsigned NumToSkip = *Ptr++;
+// CHECK-NEXT:  NumToSkip |= (*Ptr++) << 8;
+// CHECK-NEXT:  return NumToSkip;
+
+// CHECK-LARGE-LABEL: decodeNumToSkip
+// CHECK-LARGE-NEXT:  unsigned NumToSkip = *Ptr++;
+// CHECK-LARGE-NEXT:  NumToSkip |= (*Ptr++) << 8;
+// CHECK-LARGE-NEXT:  NumToSkip |= (*Ptr++) << 16;
+// CHECK-LARGE-NEXT:  return NumToSkip;
+
 // CHECK:      /* 0 */       MCD::OPC_ExtractField, 4, 4,  // Inst{7-4} ...
 // CHECK-NEXT: /* 3 */       MCD::OPC_FilterValueOrFail, 0,
 // CHECK-NEXT: /* 5 */       MCD::OPC_CheckField, 2, 2, 0, 6, 0, // Skip to: 17
@@ -43,10 +54,6 @@ def InstB : TestInstruction {
 
 // CHECK: if (!Check(S, DecodeInstB(MI, insn, Address, Decoder))) { DecodeComplete = false; return MCDisassembler::Fail; }
 
-// CHECK:       unsigned NumToSkip = *Ptr++;
-// CHECK-NEXT:  NumToSkip |= (*Ptr++) << 8;
-// CHECK-NEXT:  return NumToSkip;
-
 // CHECK-LARGE:      /* 0 */       MCD::OPC_ExtractField, 4, 4,  // Inst{7-4} ...
 // CHECK-LARGE-NEXT: /* 3 */       MCD::OPC_FilterValueOrFail, 0,
 // CHECK-LARGE-NEXT: /* 5 */       MCD::OPC_CheckField, 2, 2, 0, 7, 0, 0, // Skip to: 19
@@ -55,8 +62,3 @@ def InstB : TestInstruction {
 // CHECK-LARGE-NEXT: /* 23 */      MCD::OPC_Fail,
 
 // CHECK-LARGE: if (!Check(S, DecodeInstB(MI, insn, Address, Decoder))) { DecodeComplete = false; return MCDisassembler::Fail; }
-
-// CHECK-LARGE:       unsigned NumToSkip = *Ptr++;
-// CHECK-LARGE-NEXT:  NumToSkip |= (*Ptr++) << 8;
-// CHECK-LARGE-NEXT:  NumToSkip |= (*Ptr++) << 16;
-// CHECK-LARGE-NEXT:  return NumToSkip;
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index d582309a6fd4a..10f72ffa5b172 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -23,6 +23,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
@@ -216,6 +217,16 @@ struct EncodingIDAndOpcode {
 using EncodingIDsVec = std::vector<EncodingIDAndOpcode>;
 using NamespacesHwModesMap = std::map<std::string, std::set<StringRef>>;
 
+// Result of parsing the `DecodeOptions` field in the Target instruction set.
+using BitwidthSet = SmallSet<unsigned, 4>;
+struct DecoderOption {
+  StringRef CPPType;
+  BitwidthSet Bitwidths;
+
+  DecoderOption(StringRef CPPType, BitwidthSet Bitwidths)
+      : CPPType(CPPType), Bitwidths(std::move(Bitwidths)) {}
+};
+
 class DecoderEmitter {
   const RecordKeeper &RK;
   std::vector<EncodingAndInst> NumberedEncodings;
@@ -234,12 +245,14 @@ class DecoderEmitter {
   void emitPredicateFunction(formatted_raw_ostream &OS,
                              PredicateSet &Predicates, indent Indent) const;
   void emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders,
-                           indent Indent) const;
+                           StringRef SpecializedInsnType, indent Indent) const;
 
   // run - Output the code emitter
   void run(raw_ostream &o);
 
 private:
+  SmallVector<DecoderOption> parseDecoderOptions(BitwidthSet &InstrBitwidths);
+
   CodeGenTarget Target;
 
 public:
@@ -1066,26 +1079,41 @@ void DecoderEmitter::emitPredicateFunction(formatted_raw_ostream &OS,
   OS << Indent << "}\n\n";
 }
 
+static void emitTemplate(formatted_raw_ostream &OS,
+                         StringRef SpecializedInsnType) {
+  if (SpecializedInsnType.empty())
+    OS << "template <typename InsnType>\n";
+}
+
+static StringRef getInsnType(StringRef SpecializedInsnType) {
+  return SpecializedInsnType.empty() ? "InsnType" : SpecializedInsnType;
+}
+
 void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
                                          DecoderSet &Decoders,
+                                         StringRef SpecializedInsnType,
                                          indent Indent) const {
   // The decoder function is just a big switch statement or a table of function
   // pointers based on the input decoder index.
 
+  StringRef InsnType = getInsnType(SpecializedInsnType);
+
   // TODO: When InsnType is large, using uint64_t limits all fields to 64 bits
   // It would be better for emitBinaryParser to use a 64-bit tmp whenever
   // possible but fall back to an InsnType-sized tmp for truly large fields.
-  StringRef TmpTypeDecl =
-      "using TmpType = std::conditional_t<std::is_integral<InsnType>::value, "
-      "InsnType, uint64_t>;\n";
-  StringRef DecodeParams =
-      "DecodeStatus S, InsnType insn, MCInst &MI, uint64_t Address, const "
-      "MCDisassembler *Decoder, bool &DecodeComplete";
+  auto TmpTypeDecl = formatv(
+      "using TmpType = std::conditional_t<std::is_integral<{0}>::value, {0}, "
+      "uint64_t>;\n",
+      InsnType);
+  auto DecodeParams =
+      formatv("DecodeStatus S, const {} &insn, MCInst &MI, uint64_t Address, "
+              "const MCDisassembler *Decoder, bool &DecodeComplete",
+              InsnType);
 
   if (UseFnTableInDecodeToMCInst) {
     // Emit a function for each case first.
     for (const auto &[Index, Decoder] : enumerate(Decoders)) {
-      OS << Indent << "template <typename InsnType>\n";
+      emitTemplate(OS, SpecializedInsnType);
       OS << Indent << "DecodeStatus decodeFn" << Index << "(" << DecodeParams
          << ") {\n";
       Indent += 2;
@@ -1099,7 +1127,7 @@ void DecoderEmitter::emitDecoderFunction(formatted_raw_ostream &OS,
   }
 
   OS << Indent << "// Handling " << Decoders.size() << " cases.\n";
-  OS << Indent << "template <typename InsnType>\n";
+  emitTemplate(OS, SpecializedInsnType);
   OS << Indent << "static DecodeStatus decodeToMCInst(unsigned Idx, "
      << DecodeParams << ") {\n";
   Indent += 2;
@@ -2174,12 +2202,19 @@ populateInstruction(const CodeGenTarget &Target, const Record &EncodingDef,
   return Bits.getNumBits();
 }
 
+static bool isKnownIntegralType(StringRef InsnType) {
+  static constexpr StringLiteral KnownIntegralTypes[] = {"uint16_t", "uint32_t",
+                                                         "uint64_t"};
+  return llvm::is_contained(KnownIntegralTypes, InsnType);
+}
+
 // emitFieldFromInstruction - Emit the templated helper function
 // fieldFromInstruction().
 // On Windows we make sure that this function is not inlined when
 // using the VS compiler. It has a bug which causes the function
 // to be optimized out in some circumstances. See llvm.org/pr38292
-static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
+static void emitFieldFromInstruction(formatted_raw_ostream &OS,
+                                     StringRef SpecializedInsnType) {
   OS << R"(
 // Helper functions for extracting fields from encoded instructions.
 // InsnType must either be integral or an APInt-like object that must:
@@ -2190,56 +2225,125 @@ static void emitFieldFromInstruction(formatted_raw_ostream &OS) {
 // * Support the ~, &, ==, and != operators with other objects of the same type
 // * Support the != and bitwise & with uint64_t
 // * Support put (<<) to raw_ostream&
-template <typename InsnType>
+
+// Helper macro to disable inlining of `fieldFromInstruction`.
 #if defined(_MSC_VER) && !defined(__clang__)
-__declspec(noinline)
+#define DEC_EMIT_NO_INLINE __declspec(noinline)
+#else
+#define DEC_EMIT_NO_INLINE
 #endif
-static std::enable_if_t<std::is_integral<InsnType>::value, InsnType>
-fieldFromInstruction(const InsnType &insn, unsigned startBit,
-                     unsigned numBits) {
+
+)";
+  StringRef InsnType = getInsnType(SpecializedInsnType);
+
+  // If InsnType is not a template type argument, we cannot use std::enable_if_t
+  // to enable or disable one of the versions of `fieldFromInstruction`. Use a
+  // set if pre-defined strings to detect which version of
+  // `fieldFromInstruction` to emit.
+  bool IsIntegralType = isKnownIntegralType(InsnType);
+  bool GenerateTemplatedForm = SpecializedInsnType.empty();
+
+  if (GenerateTemplatedForm || IsIntegralType) {
+    if (GenerateTemplatedForm) {
+      emitTemplate(OS, SpecializedInsnType);
+      OS << formatv("DEC_EMIT_NO_INLINE static "
+                    "std::enable_if_t<std::is_integral<{0}>::value, {0}>\n",
+                    InsnType);
+    } else {
+      OS << formatv("DEC_EMIT_NO_INLINE static {} ", InsnType);
+    }
+
+    OS << formatv(
+        R"(fieldFromInstruction(const {0} &insn, unsigned startBit, unsigned numBits) {{
   assert(startBit + numBits <= 64 && "Cannot support >64-bit extractions!");
-  assert(startBit + numBits <= (sizeof(InsnType) * 8) &&
+  assert(startBit + numBits <= (sizeof({0}) * 8) &&
          "Instruction field out of bounds!");
-  InsnType fieldMask;
-  if (numBits == sizeof(InsnType) * 8)
-    fieldMask = (InsnType)(-1LL);
+  {0} fieldMask;
+  if (numBits == sizeof({0}) * 8)
+    fieldMask = ({0})(-1LL);
   else
-    fieldMask = (((InsnType)1 << numBits) - 1) << startBit;
+    fieldMask = ((({0})1 << numBits) - 1) << startBit;
   return (insn & fieldMask) >> startBit;
 }
 
-template <typename InsnType>
-static std::enable_if_t<!std::is_integral<InsnType>::value, uint64_t>
-fieldFromInstruction(const InsnType &insn, unsigned startBit,
-                     unsigned numBits) {
+)",
+        InsnType);
+  } // if (GenerateTemplatedForm || IsIntegralType) {
+
+  if (GenerateTemplatedForm || !IsIntegralType) {
+    if (GenerateTemplatedForm) {
+      emitTemplate(OS, SpecializedInsnType);
+      OS << formatv(
+          "static std::enable_if_t<!std::is_integral<{0}>::value, uint64_t>\n",
+          InsnType);
+    } else {
+      OS << formatv("static uint64_t ");
+    }
+
+    OS << formatv(R"(fieldFromInstruction(const {0} &insn, unsigned startBit,
+                     unsigned numBits) {{
   return insn.extractBitsAsZExtValue(numBits, startBit);
 }
-)";
+)",
+                  InsnType);
+  } // if (GenerateTemplatedForm || !IsIntegralType)
+  OS << "#undef DEC_EMIT_NO_INLINE\n";
 }
 
 // emitInsertBits - Emit the templated helper function insertBits().
-static void emitInsertBits(formatted_raw_ostream &OS) {
+static void emitInsertBits(formatted_raw_ostream &OS,
+                           StringRef SpecializedInsnType) {
+  bool GenerateTemplatedForm = SpecializedInsnType.empty();
+  StringRef InsnType = getInsnType(SpecializedInsnType);
+  bool IsIntegralType = isKnownIntegralType(InsnType);
+
+  auto FuncDecl = formatv(R"([[maybe_unused]]
+static void insertBits({0} &field, {0} bits, unsigned startBit,
+                       unsigned numBits) {{)",
+                          InsnType);
+
   OS << R"(
 // Helper function for inserting bits extracted from an encoded instruction into
 // a field.
-template <typename InsnType>
-static void insertBits(InsnType &field, InsnType bits, unsigned startBit,
-                       unsigned numBits) {
-  if constexpr (std::is_integral<InsnType>::value) {
+)";
+  if (GenerateTemplatedForm) {
+    emitTemplate(OS, SpecializedInsnType);
+    OS << FuncDecl;
+    OS << formatv(R"(
+  if constexpr (std::is_integral<{0}>::value) {{
     assert(startBit + numBits <= sizeof field * 8);
     (void)numBits;
-    field |= (InsnType)bits << startBit;
-  } else {
+    field |= ({0})bits << startBit;
+  } else {{
     field.insertBits(bits, startBit, numBits);
   }
 }
+)",
+                  InsnType);
+  } else if (IsIntegralType) {
+    OS << FuncDecl;
+    OS << formatv(R"(
+  assert(startBit + numBits <= sizeof field * 8);
+  (void)numBits;
+  field |= ({0})bits << startBit;
+}
+)",
+                  InsnType);
+  } else {
+    // Code for !IsIntegralType
+    OS << FuncDecl;
+    OS << R"(
+  field.insertBits(bits, startBit, numBits);
+}
 )";
+  }
+  OS << '\n';
 }
 
-// emitDecodeInstruction - Emit the templated helper function
-// decodeInstruction().
+// emitDecodeInstruction - Emit the entry function function decodeInstruction().
 static void emitDecodeInstruction(formatted_raw_ostream &OS, bool IsVarLenInst,
-                                  unsigned OpcodeMask) {
+                                  unsigned OpcodeMask,
+                                  StringRef SpecializedInsnType) {
   const bool HasTryDecode = OpcodeMask & ((1 << MCD::OPC_TryDecode) |
                                           (1 << MCD::OPC_TryDecodeOrFail));
   const bool HasCheckPredicate =
@@ -2247,19 +2351,19 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS, bool IsVarLenInst,
       ((1 << MCD::OPC_CheckPredicate) | (1 << MCD::OPC_CheckPredicateOrFail));
   const bool HasSoftFail = OpcodeMask & (1 << MCD::OPC_SoftFail);
 
-  OS << R"(
-static unsigned decodeNumToSkip(const uint8_t *&Ptr) {
-  unsigned NumToSkip = *Ptr++;
-  NumToSkip |= (*Ptr++) << 8;
-)";
-  if (getNumToSkipInBytes() == 3)
-    OS << "  NumToSkip |= (*Ptr++) << 16;\n";
-  OS << R"(  return NumToSkip;
-}
+  StringRef InsnType = getInsnType(SpecializedInsnType);
 
-template <typename InsnType>
-static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
-                                      InsnType insn, uint64_t Address,
+  emitTemplate(OS, SpecializedInsnType);
+  OS << R"(
+static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI, )";
+  // For variable length instructions, use a non-const reference to match the
+  // signature of the `makeUp` function passed in.
+  if (IsVarLenInst)
+    OS << InsnType << " &insn,";
+  else
+    OS << "const " << InsnType << " &insn,";
+  OS << R"(
+                                      uint64_t Address,
                                       const MCDisassembler *DisAsm,
                                       const MCSubtargetInfo &STI)";
   if (IsVarLenInst) {
@@ -2463,16 +2567,31 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
 )";
 }
 
+static void emitCommonFunctions(formatted_raw_ostream &OS) {
+  OS << R"(
 // Helper to propagate SoftFail status. Returns false if the status is Fail;
 // callers are expected to early-exit in that condition. (Note, the '&' operator
 // is correct to propagate the values of this enum; see comment on 'enum
 // DecodeStatus'.)
-static void emitCheck(formatted_raw_ostream &OS) {
-  OS << R"(
 static bool Check(DecodeStatus &Out, DecodeStatus In) {
   Out = static_cast<DecodeStatus>(Out & In);
   return Out != MCDisassembler::Fail;
 }
+)";
+
+  OS << R"(
+// Helper to decode the `NumToSkip` value encoded in the decoder table.
+static unsigned decodeNumToSkip(const uint8_t *&Ptr) {
+  unsigned NumToSkip = *Ptr++;
+  NumToSkip |= (*Ptr++) << 8;
+)";
+  if (getNumToSkipInBytes() == 3)
+    OS << "  NumToSkip |= (*Ptr++) << 16;\n";
+  OS << R"(  return NumToSkip;
+}
+
+// Forward declaration.
+[[maybe_unused]] static bool checkDecoderPredicate(unsigned Idx, const FeatureBitset &Bits);
 
 )";
 }
@@ -2537,6 +2656,64 @@ handleHwModesUnrelatedEncodings(const CodeGenInstruction *Instr,
     break;
   }
 }
+SmallVector<DecoderOption>
+DecoderEmitter::parseDecoderOptions(BitwidthSet &InstrBitwidths) {
+  SmallVector<DecoderOption> Parsed;
+
+  const Record *InstructionSet = Target.getInstructionSet();
+  std::vector<const Record *> DecoderOptions =
+      InstructionSet->getValueAsListOfDefs("DecoderOptions");
+
+  if (DecoderOptions.empty()) {
+    // If no `DecoderOptions` is specified in the instruction info, create one
+    // with empty values, which will trigger generation of a template code.
+    Parsed.emplace_back(StringRef(""), BitwidthSet{});
+    return Parsed;
+  }
+
+  Parsed.reserve(DecoderOptions.size());
+  BitwidthSet OptionBitwidths;
+
+  for (const Record *Option : DecoderOptions) {
+    // Use field locations for error reporting.
+    SMLoc CPPTypesLoc = Option->getFieldLoc("CPPType");
+    SMLoc BitwidthsLoc = Option->getFieldLoc("Bitwidths");
+
+    StringRef CPPType = Option->getValueAsString("CPPType");
+    if (CPPType.empty())
+      PrintFatalError(CPPTypesLoc,
+                      "CPP Type cannot be empty in DecoderOptions");
+
+    const ListInit *BWL = Option->getValueAsListInit("Bitwidths");
+    if (!BWL || BWL->empty())
+      PrintFatalError(BitwidthsLoc,
+                      "No bitwidths specified for CPPType : " + CPPType);
+
+    BitwidthSet Bitwidths;
+    for (int64_t Bitwidth : BWL->getAsListOfInts()) {
+      if (!OptionBitwidths.insert(Bitwidth).second)
+        PrintFatalError(BitwidthsLoc,
+                        "Bitwidth " + Twine(Bitwidth) + " already specified.");
+
+      if (!InstrBitwidths.contains(Bitwidth))
+        PrintFatalError(BitwidthsLoc, "No instruction of bitwidth " +
+                                          Twine(Bitwidth) + " supported.");
+      InstrBitwidths.erase(Bitwidth);
+      Bitwidths.insert(Bitwidth);
+    }
+    Parsed.emplace_back(CPPType, Bitwidths);
+  }
+
+  if (!InstrBitwidths.empty()) {
+    // FIXME: Add PrintFatalError that accepts a location and a function_ref.
+    PrintFatalError([&InstrBitwidths](raw_ostream &OS) {
+      OS << "Bitwidth(s) ";
+      interleaveComma(InstrBitwidths, OS);
+      OS << " missing in DecoderOptions";
+    });
+  }
+  return Parsed;
+}
 
 // Emits disassembler code for instruction decoding.
 void DecoderEmitter::run(raw_ostream &o) {
@@ -2554,9 +2731,7 @@ void DecoderEmitter::run(raw_ostream &o) {
 namespace {
 )";
 
-  emitFieldFromInstruction(OS);
-  emitInsertBits(OS);
-  emitCheck(OS);
+  emitCommonFunctions(OS);
 
   Target.reverseBitsForLittleEndianEncoding();
 
@@ -2606,7 +2781,7 @@ namespace {
       OpcMap;
   std::map<unsigned, std::vector<OperandInfo>> Operands;
   std::vector<unsigned> InstrLen;
-  bool IsVarLenInst = Target.hasVariableLengthEncodings();
+  const bool IsVarLenInst = Target.hasVariableLengthEncodings();
   unsigned MaxInstLen = 0;
 
   for (const auto &[NEI, NumberedEncoding] : enumerate(NumberedEncodings)) {
@@ -2649,57 +2824,87 @@ namespace {
     }
   }
 
-  DecoderTableInfo TableInfo;
-  unsigned OpcodeMask = 0;
-  for (const auto &[NSAndByteSize, EncodingIDs] : OpcMap) {
-    const std::string &DecoderNamespace = NSAndByteSize.first;
-    const unsigned BitWidth = 8 * NSAndByteSize.second;
-    // Emit the decoder for this namespace+width combination.
-    FilterChooser FC(NumberedEncodings, EncodingIDs, Operands,
-                     IsVarLenInst ? MaxInstLen : BitWidth, this);
-
-    // The decode table is cleared for each top level decoder function. The
-    // predicates and decoders themselves, however, are shared across all
-    // decoders to give more opportunities for uniqueing.
-    TableInfo.Table.clear();
-    TableInfo.FixupStack.clear();
-    TableInfo.FixupStack.emplace_back();
-    FC.emitTableEntries(TableInfo);
-    // Any NumToSkip fixups in the top level scope can resolve to the
-    // OPC_Fail at the end of the table.
-    assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!");
-    // Resolve any NumToSkip fixups in the current scope.
-    resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(),
-                       TableInfo.Table.size());
-    TableInfo.FixupStack.clear();
-
-    TableInfo.Table.push_back(MCD::OPC_Fail);
-
-    // Print the table to the output stream.
-    OpcodeMask |= emitTable(OS, TableInfo.Table, indent(0), FC.getBitWidth(),
-                            DecoderNamespace, EncodingIDs);
-  }
-
   // For variable instruction, we emit a instruction length table
   // to let the decoder know how long the instructions are.
   // You can see example usage in M68k's disassembler.
   if (IsVarLenInst)
     emitInstrLenTable(OS, InstrLen);
 
-  const bool HasCheckPredicate =
-      OpcodeMask &
-      ((1 << MCD::OPC_CheckPredicate) | (1 << MCD::OPC_CheckPredicateOrFail));
+  // Collect all allowed Bitwidths for instructions.
+  BitwidthSet InstrBitwidths;
+  for (const auto &[NSAndByteSize, _] : OpcMap) {
+    const unsigned Bitwidth = 8 * NSAndByteSize.second;
+    InstrBitwidths.insert(Bitwidth);
+  }
+  SmallVector<DecoderOption> DecoderOptions =
+      parseDecoderOptions(InstrBitwidths);
+
+  DecoderTableInfo TableInfo;
+  bool HasCheckPredicate = false;
+  for (const auto &[CPPType, Bitwidths] : DecoderOptions) {
+    // Reset the Decoders for each non-templated type.
+    TableInfo.Decoders.clear();
+    unsigned OpcodeMask = 0;
+
+    if (!CPPType.empty()) {
+      OS << "// ------------------------------------------------------------\n";
+      OS << "// Decoder tables and functions for bitwidths: ";
+      interleaveComma(Bitwidths, OS);
+      OS << "\n// Using InsnType = " << CPPType << '\n';
+    }
+
+    emitFieldFromInstruction(OS, CPPType);
+    emitInsertBits(OS, CPPType);
+
+    for (const auto &[NSAndByteSize, EncodingIDs] : OpcMap) {
+      const std::string &DecoderNamespace = NSAndByteSize.first;
+      const unsigned InstrBitwidth =
+          IsVarLenInst ? MaxInstLen : 8 * NSAndByteSize.second;
+
+      // Only handle instruction of the non-templated bitwidth size when
+      // non-templated bitwidth option is enabled.
+      if (!Bitwidths.empty() && !Bitwidths.contains(InstrBitwidth))
+        continue;
+
+      // Emit the decoder for this namespace+width combination.
+      FilterChooser FC(NumberedEncodings, EncodingIDs, Operands,
+                       IsVarLenInst ? MaxInstLen : InstrBitwidth, this);
+
+      // The decode table is cleared for each top level decoder function. The
+      // predicates and decoders themselves, however, are shared across all
+      // decoders to give more opportunities for uniqueing.
+      TableInfo.Table.clear();
+      TableInfo.FixupStack.clear();
+      TableInfo.FixupStack.emplace_back();
+      FC.emitTableEntries(TableInfo);
+      // Any NumToSkip fixups in the top level scope can resolve to the
+      // OPC_Fail at the end of the table.
+      assert(TableInfo.FixupStack.size() == 1 && "fixup stack phasing error!");
+      // Resolve any NumToSkip fixups in the current scope.
+      resolveTableFixups(TableInfo.Table, TableInfo.FixupStack.back(),
+                         TableInfo.Table.size());
+      TableInfo.FixupStack.clear();
+
+      TableInfo.Table.push_back(MCD::OPC_Fail);
+
+      // Print the table to the output stream.
+      OpcodeMask |= emitTable(OS, TableInfo.Table, indent(0), FC.getBitWidth(),
+                              DecoderNamespace, EncodingIDs);
+    }
+
+    // Emit the decoder function for this BitWidth.
+    emitDecoderFunction(OS, TableInfo.Decoders, CPPType, indent(0));
+
+    HasCheckPredicate |= OpcodeMask & ((1 << MCD::OPC_CheckPredicate) |
+                                       (1 << MCD::OPC_CheckPredicateOrFail));
+
+    emitDecodeInstruction(OS, IsVarLenInst, OpcodeMask, CPPType);
+  }
 
   // Emit the predicate function.
   if (HasCheckPredicate)
     emitPredicateFunction(OS, TableInfo.Predicates, indent(0));
 
-  // Emit the decoder function.
-  emitDecoderFunction(OS, TableInfo.Decoders, indent(0));
-
-  // Emit the main entry point for the decoder, decodeInstruction().
-  emitDecodeInstruction(OS, IsVarLenInst, OpcodeMask);
-
   OS << "\n} // namespace\n";
 }