Skip to content

Commit e6fb973

Browse files
committed
[Hexagon][llvm-objdump] Improve disassembly of Hexagon bundles
Hexagon instructions are VLIW "bundles" of up to four instruction words encoded as a single MCInst with operands for each sub-instruction. Previously, the disassembler's getInstruction() returned the full bundle, which made it difficult to work with llvm-objdump. For example, since all instructions are bundles, and bundles do not branch, branch targets could not be printed. This patch modifies the Hexagon disassembler to return individual sub-instructions instead of entire bundles, enabling correct printing of branch targets and relocations. It also introduces `MCDisassembler::getInstructionBundle` for cases where the full bundle is still needed. By default, llvm-objdump separates instructions with newlines. However, this does not work well for Hexagon syntax: { inst1 inst2 inst3 inst4 <branch> } :endloop0 Instructions may be followed by a closing brace, a closing brace with `:endloop`, or a newline. Branches must appear within the braces. To address this, `PrettyPrinter::getInstructionSeparator()` is added and overridden for Hexagon.
1 parent 00f6d6a commit e6fb973

File tree

7 files changed

+261
-125
lines changed

7 files changed

+261
-125
lines changed

llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,18 @@ class LLVM_ABI MCDisassembler {
136136
ArrayRef<uint8_t> Bytes, uint64_t Address,
137137
raw_ostream &CStream) const = 0;
138138

139+
/// Returns the disassembly of an instruction bundle for VLIW architectures
140+
/// like Hexagon.
141+
///
142+
/// \param Instr - An MCInst to populate with the contents of
143+
/// the Bundle with sub-instructions encoded as Inst operands.
144+
virtual DecodeStatus getInstructionBundle(MCInst &Instr, uint64_t &Size,
145+
ArrayRef<uint8_t> Bytes,
146+
uint64_t Address,
147+
raw_ostream &CStream) const {
148+
return Fail;
149+
}
150+
139151
/// Used to perform separate target specific disassembly for a particular
140152
/// symbol. May parse any prelude that precedes instructions after the
141153
/// start of a symbol, or the entire symbol.

llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp

Lines changed: 85 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,12 @@ namespace {
4343
class HexagonDisassembler : public MCDisassembler {
4444
public:
4545
std::unique_ptr<MCInstrInfo const> const MCII;
46-
std::unique_ptr<MCInst *> CurrentBundle;
46+
mutable std::unique_ptr<MCInst> CurrentBundle;
4747
mutable MCInst const *CurrentExtender;
4848

4949
HexagonDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
5050
MCInstrInfo const *MCII)
51-
: MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(new MCInst *),
51+
: MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(nullptr),
5252
CurrentExtender(nullptr) {}
5353

5454
DecodeStatus getSingleInstruction(MCInst &Instr, MCInst &MCB,
@@ -57,7 +57,23 @@ class HexagonDisassembler : public MCDisassembler {
5757
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
5858
ArrayRef<uint8_t> Bytes, uint64_t Address,
5959
raw_ostream &CStream) const override;
60+
61+
DecodeStatus getInstructionBundle(MCInst &Instr, uint64_t &Size,
62+
ArrayRef<uint8_t> Bytes, uint64_t Address,
63+
raw_ostream &CStream) const override;
64+
6065
void remapInstruction(MCInst &Instr) const;
66+
67+
private:
68+
bool makeBundle(ArrayRef<uint8_t> Bytes, uint64_t Address,
69+
uint64_t &BytesToSkip, raw_ostream &CS) const;
70+
71+
void resetBundle() const {
72+
CurrentBundle.reset();
73+
CurrentInstruction = nullptr;
74+
}
75+
76+
mutable MCOperand *CurrentInstruction = nullptr;
6177
};
6278

6379
static uint64_t fullValue(HexagonDisassembler const &Disassembler, MCInst &MI,
@@ -171,43 +187,88 @@ LLVMInitializeHexagonDisassembler() {
171187
createHexagonDisassembler);
172188
}
173189

174-
DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
175-
ArrayRef<uint8_t> Bytes,
176-
uint64_t Address,
177-
raw_ostream &CS) const {
178-
CommentStream = &CS;
179-
180-
DecodeStatus Result = DecodeStatus::Success;
190+
bool HexagonDisassembler::makeBundle(ArrayRef<uint8_t> Bytes, uint64_t Address,
191+
uint64_t &BytesToSkip,
192+
raw_ostream &CS) const {
181193
bool Complete = false;
182-
Size = 0;
194+
DecodeStatus Result = DecodeStatus::Success;
183195

184-
*CurrentBundle = &MI;
185-
MI.setOpcode(Hexagon::BUNDLE);
186-
MI.addOperand(MCOperand::createImm(0));
196+
CurrentBundle.reset(new MCInst);
197+
CurrentBundle->setOpcode(Hexagon::BUNDLE);
198+
CurrentBundle->addOperand(MCOperand::createImm(0));
187199
while (Result == Success && !Complete) {
188200
if (Bytes.size() < HEXAGON_INSTR_SIZE)
189-
return MCDisassembler::Fail;
201+
return false;
190202
MCInst *Inst = getContext().createMCInst();
191-
Result = getSingleInstruction(*Inst, MI, Bytes, Address, CS, Complete);
192-
MI.addOperand(MCOperand::createInst(Inst));
193-
Size += HEXAGON_INSTR_SIZE;
203+
Result = getSingleInstruction(*Inst, *CurrentBundle, Bytes, Address, CS,
204+
Complete);
205+
CurrentBundle->addOperand(MCOperand::createInst(Inst));
206+
BytesToSkip += HEXAGON_INSTR_SIZE;
194207
Bytes = Bytes.slice(HEXAGON_INSTR_SIZE);
195208
}
196209
if (Result == MCDisassembler::Fail)
197-
return Result;
198-
if (Size > HEXAGON_MAX_PACKET_SIZE)
199-
return MCDisassembler::Fail;
210+
return false;
211+
if (BytesToSkip > HEXAGON_MAX_PACKET_SIZE)
212+
return false;
200213

201214
const auto ArchSTI = Hexagon_MC::getArchSubtarget(&STI);
202215
const auto STI_ = (ArchSTI != nullptr) ? *ArchSTI : STI;
203-
HexagonMCChecker Checker(getContext(), *MCII, STI_, MI,
216+
HexagonMCChecker Checker(getContext(), *MCII, STI_, *CurrentBundle,
204217
*getContext().getRegisterInfo(), false);
205218
if (!Checker.check())
206-
return MCDisassembler::Fail;
207-
remapInstruction(MI);
219+
return false;
220+
remapInstruction(*CurrentBundle);
221+
return true;
222+
}
223+
224+
DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
225+
ArrayRef<uint8_t> Bytes,
226+
uint64_t Address,
227+
raw_ostream &CS) const {
228+
CommentStream = &CS;
229+
230+
Size = 0;
231+
uint64_t BytesToSkip = 0;
232+
233+
if (!CurrentBundle) {
234+
if (!makeBundle(Bytes, Address, BytesToSkip, CS)) {
235+
Size = BytesToSkip;
236+
resetBundle();
237+
return MCDisassembler::Fail;
238+
}
239+
CurrentInstruction = (CurrentBundle->begin() + 1);
240+
}
241+
242+
MI = *(CurrentInstruction->getInst());
243+
Size = HEXAGON_INSTR_SIZE;
244+
if (++CurrentInstruction == CurrentBundle->end())
245+
resetBundle();
208246
return MCDisassembler::Success;
209247
}
210248

249+
DecodeStatus HexagonDisassembler::getInstructionBundle(MCInst &MI,
250+
uint64_t &Size,
251+
ArrayRef<uint8_t> Bytes,
252+
uint64_t Address,
253+
raw_ostream &CS) const {
254+
CommentStream = &CS;
255+
Size = 0;
256+
uint64_t BytesToSkip = 0;
257+
assert(!CurrentBundle);
258+
259+
if (!makeBundle(Bytes, Address, BytesToSkip, CS)) {
260+
Size = BytesToSkip;
261+
resetBundle();
262+
return MCDisassembler::Fail;
263+
}
264+
265+
MI = *CurrentBundle;
266+
Size = HEXAGON_INSTR_SIZE * HexagonMCInstrInfo::bundleSize(MI);
267+
resetBundle();
268+
269+
return Success;
270+
}
271+
211272
void HexagonDisassembler::remapInstruction(MCInst &Instr) const {
212273
for (auto I: HexagonMCInstrInfo::bundleInstructions(Instr)) {
213274
auto &MI = const_cast<MCInst &>(*I.getInst());
@@ -482,7 +543,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
482543
unsigned Offset = 1;
483544
bool Vector = HexagonMCInstrInfo::isVector(*MCII, MI);
484545
bool PrevVector = false;
485-
auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
546+
auto Instructions = HexagonMCInstrInfo::bundleInstructions(*CurrentBundle);
486547
auto i = Instructions.end() - 1;
487548
for (auto n = Instructions.begin() - 1;; --i, ++Offset) {
488549
if (i == n)

llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -33,30 +33,17 @@ void HexagonInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) {
3333
void HexagonInstPrinter::printInst(const MCInst *MI, uint64_t Address,
3434
StringRef Annot, const MCSubtargetInfo &STI,
3535
raw_ostream &OS) {
36-
assert(HexagonMCInstrInfo::isBundle(*MI));
37-
assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE);
38-
assert(HexagonMCInstrInfo::bundleSize(*MI) > 0);
39-
HasExtender = false;
40-
for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MI)) {
41-
MCInst const &MCI = *I.getInst();
42-
if (HexagonMCInstrInfo::isDuplex(MII, MCI)) {
43-
printInstruction(MCI.getOperand(1).getInst(), Address, OS);
44-
OS << '\v';
45-
HasExtender = false;
46-
printInstruction(MCI.getOperand(0).getInst(), Address, OS);
47-
} else
48-
printInstruction(&MCI, Address, OS);
49-
HasExtender = HexagonMCInstrInfo::isImmext(MCI);
50-
OS << "\n";
51-
}
52-
53-
bool IsLoop0 = HexagonMCInstrInfo::isInnerLoop(*MI);
54-
bool IsLoop1 = HexagonMCInstrInfo::isOuterLoop(*MI);
55-
if (IsLoop0) {
56-
OS << (IsLoop1 ? " :endloop01" : " :endloop0");
57-
} else if (IsLoop1) {
58-
OS << " :endloop1";
59-
}
36+
if (HexagonMCInstrInfo::isDuplex(MII, *MI)) {
37+
printInstruction(MI->getOperand(1).getInst(), Address, OS);
38+
OS << '\v';
39+
HasExtender = false;
40+
printInstruction(MI->getOperand(0).getInst(), Address, OS);
41+
} else
42+
printInstruction(MI, Address, OS);
43+
HasExtender = HexagonMCInstrInfo::isImmext(*MI);
44+
if ((MI->getOpcode() & HexagonII::INST_PARSE_MASK) ==
45+
HexagonII::INST_PARSE_PACKET_END)
46+
HasExtender = false;
6047
}
6148

6249
void HexagonInstPrinter::printOperand(MCInst const *MI, unsigned OpNo,

llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,21 @@ class HexagonTargetAsmStreamer : public HexagonTargetStreamer {
252252
std::string Buffer;
253253
{
254254
raw_string_ostream TempStream(Buffer);
255-
InstPrinter.printInst(&Inst, Address, "", STI, TempStream);
255+
for (auto &I : HexagonMCInstrInfo::bundleInstructions(Inst)) {
256+
InstPrinter.printInst(I.getInst(), Address, "", STI, TempStream);
257+
TempStream << "\n";
258+
}
259+
}
260+
261+
std::string LoopString = "";
262+
bool IsLoop0 = HexagonMCInstrInfo::isInnerLoop(Inst);
263+
bool IsLoop1 = HexagonMCInstrInfo::isOuterLoop(Inst);
264+
if (IsLoop0) {
265+
LoopString += (IsLoop1 ? " :endloop01" : " :endloop0");
266+
} else if (IsLoop1) {
267+
LoopString += " :endloop1";
256268
}
269+
257270
StringRef Contents(Buffer);
258271
auto PacketBundle = Contents.rsplit('\n');
259272
auto HeadTail = PacketBundle.first.split('\n');
@@ -275,9 +288,9 @@ class HexagonTargetAsmStreamer : public HexagonTargetStreamer {
275288
}
276289

277290
if (HexagonMCInstrInfo::isMemReorderDisabled(Inst))
278-
OS << "\n\t} :mem_noshuf" << PacketBundle.second;
291+
OS << "\n\t} :mem_noshuf" << LoopString;
279292
else
280-
OS << "\t}" << PacketBundle.second;
293+
OS << "\t}" << LoopString;
281294
}
282295

283296
void finish() override { finishAttributeSection(); }
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
## Check that branch targets are printed within instruction packets for Hexagon
2+
3+
--- !ELF
4+
FileHeader:
5+
Class: ELFCLASS32
6+
Data: ELFDATA2LSB
7+
Type: ET_REL
8+
Machine: EM_HEXAGON
9+
Flags: [ EF_HEXAGON_MACH_V68, EF_HEXAGON_ISA_V68 ]
10+
Sections:
11+
- Name: .text
12+
Type: SHT_PROGBITS
13+
Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
14+
AddressAlign: 0x10
15+
Content: 00C09DA000C000781EC01E9600C09DA000C0005A1EC01E96
16+
...
17+
18+
# RUN: yaml2obj %s | llvm-objdump -d - | FileCheck %s
19+
20+
# CHECK: 00000000 <.text>:
21+
# CHECK-NEXT: 0: 00 c0 9d a0 a09dc000 { allocframe(#0x0) }
22+
# CHECK-NEXT: 4: 00 c0 00 78 7800c000 { r0 = #0x0 }
23+
# CHECK-NEXT: 8: 1e c0 1e 96 961ec01e { dealloc_return }
24+
# CHECK-NEXT: c: 00 c0 9d a0 a09dc000 { allocframe(#0x0) }
25+
# CHECK-NEXT: 10: 00 c0 00 5a 5a00c000 { call 0x10 <.text+0x10> }
26+
# CHECK-NEXT: 14: 1e c0 1e 96 961ec01e { dealloc_return }

llvm/tools/llvm-mc/Disassembler.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,11 @@ static bool PrintInsts(const MCDisassembler &DisAsm, const ByteArrayTy &Bytes,
4545
MCInst Inst;
4646

4747
MCDisassembler::DecodeStatus S;
48-
S = DisAsm.getInstruction(Inst, Size, Data.slice(Index), Index, nulls());
48+
if (STI.getTargetTriple().getArch() == Triple::hexagon)
49+
S = DisAsm.getInstructionBundle(Inst, Size, Data.slice(Index), Index,
50+
nulls());
51+
else
52+
S = DisAsm.getInstruction(Inst, Size, Data.slice(Index), Index, nulls());
4953
switch (S) {
5054
case MCDisassembler::Fail:
5155
SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]),

0 commit comments

Comments
 (0)