LLDB  mainline
DisassemblerLLVMC.cpp
Go to the documentation of this file.
1 //===-- DisassemblerLLVMC.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "DisassemblerLLVMC.h"
10 
11 #include "llvm-c/Disassembler.h"
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/ADT/StringExtras.h"
14 #include "llvm/MC/MCAsmInfo.h"
15 #include "llvm/MC/MCContext.h"
16 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
17 #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h"
18 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
19 #include "llvm/MC/MCInst.h"
20 #include "llvm/MC/MCInstPrinter.h"
21 #include "llvm/MC/MCInstrInfo.h"
22 #include "llvm/MC/MCRegisterInfo.h"
23 #include "llvm/MC/MCSubtargetInfo.h"
24 #include "llvm/MC/MCTargetOptions.h"
25 #include "llvm/MC/TargetRegistry.h"
26 #include "llvm/Support/AArch64TargetParser.h"
27 #include "llvm/Support/ErrorHandling.h"
28 #include "llvm/Support/ScopedPrinter.h"
29 #include "llvm/Support/TargetSelect.h"
30 
31 #include "lldb/Core/Address.h"
32 #include "lldb/Core/Module.h"
35 #include "lldb/Target/Process.h"
38 #include "lldb/Target/StackFrame.h"
39 #include "lldb/Target/Target.h"
41 #include "lldb/Utility/LLDBLog.h"
42 #include "lldb/Utility/Log.h"
44 #include "lldb/Utility/Stream.h"
45 
46 using namespace lldb;
47 using namespace lldb_private;
48 
50 
52 public:
53  static std::unique_ptr<MCDisasmInstance>
54  Create(const char *triple, const char *cpu, const char *features_str,
55  unsigned flavor, DisassemblerLLVMC &owner);
56 
57  ~MCDisasmInstance() = default;
58 
59  uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len,
60  lldb::addr_t pc, llvm::MCInst &mc_inst) const;
61  void PrintMCInst(llvm::MCInst &mc_inst, std::string &inst_string,
62  std::string &comments_string);
63  void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style);
64  bool CanBranch(llvm::MCInst &mc_inst) const;
65  bool HasDelaySlot(llvm::MCInst &mc_inst) const;
66  bool IsCall(llvm::MCInst &mc_inst) const;
67  bool IsLoad(llvm::MCInst &mc_inst) const;
68  bool IsAuthenticated(llvm::MCInst &mc_inst) const;
69 
70 private:
71  MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,
72  std::unique_ptr<llvm::MCRegisterInfo> &&reg_info_up,
73  std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up,
74  std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up,
75  std::unique_ptr<llvm::MCContext> &&context_up,
76  std::unique_ptr<llvm::MCDisassembler> &&disasm_up,
77  std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up);
78 
79  std::unique_ptr<llvm::MCInstrInfo> m_instr_info_up;
80  std::unique_ptr<llvm::MCRegisterInfo> m_reg_info_up;
81  std::unique_ptr<llvm::MCSubtargetInfo> m_subtarget_info_up;
82  std::unique_ptr<llvm::MCAsmInfo> m_asm_info_up;
83  std::unique_ptr<llvm::MCContext> m_context_up;
84  std::unique_ptr<llvm::MCDisassembler> m_disasm_up;
85  std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up;
86 };
87 
88 namespace x86 {
89 
90 /// These are the three values deciding instruction control flow kind.
91 /// InstructionLengthDecode function decodes an instruction and get this struct.
92 ///
93 /// primary_opcode
94 /// Primary opcode of the instruction.
95 /// For one-byte opcode instruction, it's the first byte after prefix.
96 /// For two- and three-byte opcodes, it's the second byte.
97 ///
98 /// opcode_len
99 /// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3.
100 ///
101 /// modrm
102 /// ModR/M byte of the instruction.
103 /// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0]
104 /// may contain a register or specify an addressing mode, depending on MOD.
106  uint8_t primary_opcode;
107  uint8_t opcode_len;
108  uint8_t modrm;
109 };
110 
111 /// Determine the InstructionControlFlowKind based on opcode and modrm bytes.
112 /// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and
113 /// instruction set.
114 ///
115 /// \param[in] opcode_and_modrm
116 /// Contains primary_opcode byte, its length, and ModR/M byte.
117 /// Refer to the struct InstructionOpcodeAndModrm for details.
118 ///
119 /// \return
120 /// The control flow kind of the instruction or
121 /// eInstructionControlFlowKindOther if the instruction doesn't affect
122 /// the control flow of the program.
125  uint8_t opcode = opcode_and_modrm.primary_opcode;
126  uint8_t opcode_len = opcode_and_modrm.opcode_len;
127  uint8_t modrm = opcode_and_modrm.modrm;
128 
129  if (opcode_len > 2)
131 
132  if (opcode >= 0x70 && opcode <= 0x7F) {
133  if (opcode_len == 1)
135  else
137  }
138 
139  if (opcode >= 0x80 && opcode <= 0x8F) {
140  if (opcode_len == 2)
142  else
144  }
145 
146  switch (opcode) {
147  case 0x9A:
148  if (opcode_len == 1)
150  break;
151  case 0xFF:
152  if (opcode_len == 1) {
153  uint8_t modrm_reg = (modrm >> 3) & 7;
154  if (modrm_reg == 2)
156  else if (modrm_reg == 3)
158  else if (modrm_reg == 4)
160  else if (modrm_reg == 5)
162  }
163  break;
164  case 0xE8:
165  if (opcode_len == 1)
167  break;
168  case 0xCD:
169  case 0xCC:
170  case 0xCE:
171  case 0xF1:
172  if (opcode_len == 1)
174  break;
175  case 0xCF:
176  if (opcode_len == 1)
178  break;
179  case 0xE9:
180  case 0xEB:
181  if (opcode_len == 1)
183  break;
184  case 0xEA:
185  if (opcode_len == 1)
187  break;
188  case 0xE3:
189  case 0xE0:
190  case 0xE1:
191  case 0xE2:
192  if (opcode_len == 1)
194  break;
195  case 0xC3:
196  case 0xC2:
197  if (opcode_len == 1)
199  break;
200  case 0xCB:
201  case 0xCA:
202  if (opcode_len == 1)
204  break;
205  case 0x05:
206  case 0x34:
207  if (opcode_len == 2)
209  break;
210  case 0x35:
211  case 0x07:
212  if (opcode_len == 2)
214  break;
215  case 0x01:
216  if (opcode_len == 2) {
217  switch (modrm) {
218  case 0xc1:
220  case 0xc2:
221  case 0xc3:
223  default:
224  break;
225  }
226  }
227  break;
228  default:
229  break;
230  }
231 
233 }
234 
235 /// Decode an instruction into opcode, modrm and opcode_len.
236 /// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout.
237 /// Opcodes in x86 are generally the first byte of instruction, though two-byte
238 /// instructions and prefixes exist. ModR/M is the byte following the opcode
239 /// and adds additional information for how the instruction is executed.
240 ///
241 /// \param[in] inst_bytes
242 /// Raw bytes of the instruction
243 ///
244 ///
245 /// \param[in] bytes_len
246 /// The length of the inst_bytes array.
247 ///
248 /// \param[in] is_exec_mode_64b
249 /// If true, the execution mode is 64 bit.
250 ///
251 /// \return
252 /// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding
253 /// primary_opcode, opcode_len and modrm byte. Refer to the struct definition
254 /// for more details.
255 /// Otherwise if the given instruction is invalid, returns None.
256 llvm::Optional<InstructionOpcodeAndModrm>
257 InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len,
258  bool is_exec_mode_64b) {
259  int op_idx = 0;
260  bool prefix_done = false;
261  InstructionOpcodeAndModrm ret = {0, 0, 0};
262 
263  // In most cases, the primary_opcode is the first byte of the instruction
264  // but some instructions have a prefix to be skipped for these calculations.
265  // The following mapping is inspired from libipt's instruction decoding logic
266  // in `src/pt_ild.c`
267  while (!prefix_done) {
268  if (op_idx >= bytes_len)
269  return llvm::None;
270 
271  ret.primary_opcode = inst_bytes[op_idx];
272  switch (ret.primary_opcode) {
273  // prefix_ignore
274  case 0x26:
275  case 0x2e:
276  case 0x36:
277  case 0x3e:
278  case 0x64:
279  case 0x65:
280  // prefix_osz, prefix_asz
281  case 0x66:
282  case 0x67:
283  // prefix_lock, prefix_f2, prefix_f3
284  case 0xf0:
285  case 0xf2:
286  case 0xf3:
287  op_idx++;
288  break;
289 
290  // prefix_rex
291  case 0x40:
292  case 0x41:
293  case 0x42:
294  case 0x43:
295  case 0x44:
296  case 0x45:
297  case 0x46:
298  case 0x47:
299  case 0x48:
300  case 0x49:
301  case 0x4a:
302  case 0x4b:
303  case 0x4c:
304  case 0x4d:
305  case 0x4e:
306  case 0x4f:
307  if (is_exec_mode_64b)
308  op_idx++;
309  else
310  prefix_done = true;
311  break;
312 
313  // prefix_vex_c4, c5
314  case 0xc5:
315  if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
316  prefix_done = true;
317  break;
318  }
319 
320  ret.opcode_len = 2;
321  ret.primary_opcode = inst_bytes[op_idx + 2];
322  ret.modrm = inst_bytes[op_idx + 3];
323  return ret;
324 
325  case 0xc4:
326  if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
327  prefix_done = true;
328  break;
329  }
330  ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f;
331  ret.primary_opcode = inst_bytes[op_idx + 3];
332  ret.modrm = inst_bytes[op_idx + 4];
333  return ret;
334 
335  // prefix_evex
336  case 0x62:
337  if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
338  prefix_done = true;
339  break;
340  }
341  ret.opcode_len = inst_bytes[op_idx + 1] & 0x03;
342  ret.primary_opcode = inst_bytes[op_idx + 4];
343  ret.modrm = inst_bytes[op_idx + 5];
344  return ret;
345 
346  default:
347  prefix_done = true;
348  break;
349  }
350  } // prefix done
351 
352  ret.primary_opcode = inst_bytes[op_idx];
353  ret.modrm = inst_bytes[op_idx + 1];
354  ret.opcode_len = 1;
355 
356  // If the first opcode is 0F, it's two- or three- byte opcodes.
357  if (ret.primary_opcode == 0x0F) {
358  ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
359 
360  if (ret.primary_opcode == 0x38) {
361  ret.opcode_len = 3;
362  ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
363  ret.modrm = inst_bytes[op_idx + 1];
364  } else if (ret.primary_opcode == 0x3A) {
365  ret.opcode_len = 3;
366  ret.primary_opcode = inst_bytes[++op_idx];
367  ret.modrm = inst_bytes[op_idx + 1];
368  } else if ((ret.primary_opcode & 0xf8) == 0x38) {
369  ret.opcode_len = 0;
370  ret.primary_opcode = inst_bytes[++op_idx];
371  ret.modrm = inst_bytes[op_idx + 1];
372  } else if (ret.primary_opcode == 0x0F) {
373  ret.opcode_len = 3;
374  // opcode is 0x0F, no needs to update
375  ret.modrm = inst_bytes[op_idx + 1];
376  } else {
377  ret.opcode_len = 2;
378  ret.modrm = inst_bytes[op_idx + 1];
379  }
380  }
381 
382  return ret;
383 }
384 
386  Opcode m_opcode) {
387  llvm::Optional<InstructionOpcodeAndModrm> ret = llvm::None;
388 
389  if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) {
390  // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes
392  }
393 
394  // Opcode bytes will be decoded into primary_opcode, modrm and opcode length.
395  // These are the three values deciding instruction control flow kind.
396  ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(),
397  m_opcode.GetByteSize(), is_exec_mode_64b);
398  if (!ret)
400  else
401  return MapOpcodeIntoControlFlowKind(ret.value());
402 }
403 
404 } // namespace x86
405 
407 public:
409  const lldb_private::Address &address,
410  AddressClass addr_class)
411  : Instruction(address, addr_class),
412  m_disasm_wp(std::static_pointer_cast<DisassemblerLLVMC>(
413  disasm.shared_from_this())) {}
414 
415  ~InstructionLLVMC() override = default;
416 
417  bool DoesBranch() override {
418  VisitInstruction();
419  return m_does_branch;
420  }
421 
422  bool HasDelaySlot() override {
423  VisitInstruction();
424  return m_has_delay_slot;
425  }
426 
427  bool IsLoad() override {
428  VisitInstruction();
429  return m_is_load;
430  }
431 
432  bool IsAuthenticated() override {
433  VisitInstruction();
434  return m_is_authenticated;
435  }
436 
438  DisassemblerScope disasm(*this);
439  return GetDisasmToUse(is_alternate_isa, disasm);
440  }
441 
442  size_t Decode(const lldb_private::Disassembler &disassembler,
443  const lldb_private::DataExtractor &data,
444  lldb::offset_t data_offset) override {
445  // All we have to do is read the opcode which can be easy for some
446  // architectures
447  bool got_op = false;
448  DisassemblerScope disasm(*this);
449  if (disasm) {
450  const ArchSpec &arch = disasm->GetArchitecture();
451  const lldb::ByteOrder byte_order = data.GetByteOrder();
452 
453  const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize();
454  const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize();
455  if (min_op_byte_size == max_op_byte_size) {
456  // Fixed size instructions, just read that amount of data.
457  if (!data.ValidOffsetForDataOfSize(data_offset, min_op_byte_size))
458  return false;
459 
460  switch (min_op_byte_size) {
461  case 1:
462  m_opcode.SetOpcode8(data.GetU8(&data_offset), byte_order);
463  got_op = true;
464  break;
465 
466  case 2:
467  m_opcode.SetOpcode16(data.GetU16(&data_offset), byte_order);
468  got_op = true;
469  break;
470 
471  case 4:
472  m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order);
473  got_op = true;
474  break;
475 
476  case 8:
477  m_opcode.SetOpcode64(data.GetU64(&data_offset), byte_order);
478  got_op = true;
479  break;
480 
481  default:
482  m_opcode.SetOpcodeBytes(data.PeekData(data_offset, min_op_byte_size),
483  min_op_byte_size);
484  got_op = true;
485  break;
486  }
487  }
488  if (!got_op) {
489  bool is_alternate_isa = false;
490  DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
491  GetDisasmToUse(is_alternate_isa, disasm);
492 
493  const llvm::Triple::ArchType machine = arch.GetMachine();
494  if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb) {
495  if (machine == llvm::Triple::thumb || is_alternate_isa) {
496  uint32_t thumb_opcode = data.GetU16(&data_offset);
497  if ((thumb_opcode & 0xe000) != 0xe000 ||
498  ((thumb_opcode & 0x1800u) == 0)) {
499  m_opcode.SetOpcode16(thumb_opcode, byte_order);
500  m_is_valid = true;
501  } else {
502  thumb_opcode <<= 16;
503  thumb_opcode |= data.GetU16(&data_offset);
504  m_opcode.SetOpcode16_2(thumb_opcode, byte_order);
505  m_is_valid = true;
506  }
507  } else {
508  m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order);
509  m_is_valid = true;
510  }
511  } else {
512  // The opcode isn't evenly sized, so we need to actually use the llvm
513  // disassembler to parse it and get the size.
514  uint8_t *opcode_data =
515  const_cast<uint8_t *>(data.PeekData(data_offset, 1));
516  const size_t opcode_data_len = data.BytesLeft(data_offset);
517  const addr_t pc = m_address.GetFileAddress();
518  llvm::MCInst inst;
519 
520  const size_t inst_size =
521  mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
522  if (inst_size == 0)
523  m_opcode.Clear();
524  else {
525  m_opcode.SetOpcodeBytes(opcode_data, inst_size);
526  m_is_valid = true;
527  }
528  }
529  }
530  return m_opcode.GetByteSize();
531  }
532  return 0;
533  }
534 
535  void AppendComment(std::string &description) {
536  if (m_comment.empty())
537  m_comment.swap(description);
538  else {
539  m_comment.append(", ");
540  m_comment.append(description);
541  }
542  }
543 
546  DisassemblerScope disasm(*this, exe_ctx);
547  if (disasm){
548  if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86)
549  return x86::GetControlFlowKind(/*is_64b=*/false, m_opcode);
550  else if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86_64)
551  return x86::GetControlFlowKind(/*is_64b=*/true, m_opcode);
552  }
553 
555  }
556 
558  const lldb_private::ExecutionContext *exe_ctx) override {
559  DataExtractor data;
560  const AddressClass address_class = GetAddressClass();
561 
562  if (m_opcode.GetData(data)) {
563  std::string out_string;
564  std::string comment_string;
565 
566  DisassemblerScope disasm(*this, exe_ctx);
567  if (disasm) {
569 
570  if (address_class == AddressClass::eCodeAlternateISA)
571  mc_disasm_ptr = disasm->m_alternate_disasm_up.get();
572  else
573  mc_disasm_ptr = disasm->m_disasm_up.get();
574 
575  lldb::addr_t pc = m_address.GetFileAddress();
576  m_using_file_addr = true;
577 
578  const bool data_from_file = disasm->m_data_from_file;
579  bool use_hex_immediates = true;
580  Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC;
581 
582  if (exe_ctx) {
583  Target *target = exe_ctx->GetTargetPtr();
584  if (target) {
585  use_hex_immediates = target->GetUseHexImmediates();
586  hex_style = target->GetHexImmediateStyle();
587 
588  if (!data_from_file) {
589  const lldb::addr_t load_addr = m_address.GetLoadAddress(target);
590  if (load_addr != LLDB_INVALID_ADDRESS) {
591  pc = load_addr;
592  m_using_file_addr = false;
593  }
594  }
595  }
596  }
597 
598  const uint8_t *opcode_data = data.GetDataStart();
599  const size_t opcode_data_len = data.GetByteSize();
600  llvm::MCInst inst;
601  size_t inst_size =
602  mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
603 
604  if (inst_size > 0) {
605  mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style);
606  mc_disasm_ptr->PrintMCInst(inst, out_string, comment_string);
607 
608  if (!comment_string.empty()) {
609  AppendComment(comment_string);
610  }
611  }
612 
613  if (inst_size == 0) {
614  m_comment.assign("unknown opcode");
615  inst_size = m_opcode.GetByteSize();
616  StreamString mnemonic_strm;
617  lldb::offset_t offset = 0;
618  lldb::ByteOrder byte_order = data.GetByteOrder();
619  switch (inst_size) {
620  case 1: {
621  const uint8_t uval8 = data.GetU8(&offset);
622  m_opcode.SetOpcode8(uval8, byte_order);
623  m_opcode_name.assign(".byte");
624  mnemonic_strm.Printf("0x%2.2x", uval8);
625  } break;
626  case 2: {
627  const uint16_t uval16 = data.GetU16(&offset);
628  m_opcode.SetOpcode16(uval16, byte_order);
629  m_opcode_name.assign(".short");
630  mnemonic_strm.Printf("0x%4.4x", uval16);
631  } break;
632  case 4: {
633  const uint32_t uval32 = data.GetU32(&offset);
634  m_opcode.SetOpcode32(uval32, byte_order);
635  m_opcode_name.assign(".long");
636  mnemonic_strm.Printf("0x%8.8x", uval32);
637  } break;
638  case 8: {
639  const uint64_t uval64 = data.GetU64(&offset);
640  m_opcode.SetOpcode64(uval64, byte_order);
641  m_opcode_name.assign(".quad");
642  mnemonic_strm.Printf("0x%16.16" PRIx64, uval64);
643  } break;
644  default:
645  if (inst_size == 0)
646  return;
647  else {
648  const uint8_t *bytes = data.PeekData(offset, inst_size);
649  if (bytes == nullptr)
650  return;
651  m_opcode_name.assign(".byte");
652  m_opcode.SetOpcodeBytes(bytes, inst_size);
653  mnemonic_strm.Printf("0x%2.2x", bytes[0]);
654  for (uint32_t i = 1; i < inst_size; ++i)
655  mnemonic_strm.Printf(" 0x%2.2x", bytes[i]);
656  }
657  break;
658  }
659  m_mnemonics = std::string(mnemonic_strm.GetString());
660  return;
661  }
662 
663  static RegularExpression s_regex(
664  llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?"));
665 
666  llvm::SmallVector<llvm::StringRef, 4> matches;
667  if (s_regex.Execute(out_string, &matches)) {
668  m_opcode_name = matches[1].str();
669  m_mnemonics = matches[2].str();
670  }
671  }
672  }
673  }
674 
675  bool IsValid() const { return m_is_valid; }
676 
677  bool UsingFileAddress() const { return m_using_file_addr; }
678  size_t GetByteSize() const { return m_opcode.GetByteSize(); }
679 
680  /// Grants exclusive access to the disassembler and initializes it with the
681  /// given InstructionLLVMC and an optional ExecutionContext.
683  std::shared_ptr<DisassemblerLLVMC> m_disasm;
684 
685  public:
687  InstructionLLVMC &i,
688  const lldb_private::ExecutionContext *exe_ctx = nullptr)
689  : m_disasm(i.m_disasm_wp.lock()) {
690  m_disasm->m_mutex.lock();
691  m_disasm->m_inst = &i;
692  m_disasm->m_exe_ctx = exe_ctx;
693  }
694  ~DisassemblerScope() { m_disasm->m_mutex.unlock(); }
695 
696  /// Evaluates to true if this scope contains a valid disassembler.
697  operator bool() const { return static_cast<bool>(m_disasm); }
698 
699  std::shared_ptr<DisassemblerLLVMC> operator->() { return m_disasm; }
700  };
701 
702  static llvm::StringRef::const_iterator
703  ConsumeWhitespace(llvm::StringRef::const_iterator osi,
704  llvm::StringRef::const_iterator ose) {
705  while (osi != ose) {
706  switch (*osi) {
707  default:
708  return osi;
709  case ' ':
710  case '\t':
711  break;
712  }
713  ++osi;
714  }
715 
716  return osi;
717  }
718 
719  static std::pair<bool, llvm::StringRef::const_iterator>
720  ConsumeChar(llvm::StringRef::const_iterator osi, const char c,
721  llvm::StringRef::const_iterator ose) {
722  bool found = false;
723 
724  osi = ConsumeWhitespace(osi, ose);
725  if (osi != ose && *osi == c) {
726  found = true;
727  ++osi;
728  }
729 
730  return std::make_pair(found, osi);
731  }
732 
733  static std::pair<Operand, llvm::StringRef::const_iterator>
734  ParseRegisterName(llvm::StringRef::const_iterator osi,
735  llvm::StringRef::const_iterator ose) {
736  Operand ret;
737  ret.m_type = Operand::Type::Register;
738  std::string str;
739 
740  osi = ConsumeWhitespace(osi, ose);
741 
742  while (osi != ose) {
743  if (*osi >= '0' && *osi <= '9') {
744  if (str.empty()) {
745  return std::make_pair(Operand(), osi);
746  } else {
747  str.push_back(*osi);
748  }
749  } else if (*osi >= 'a' && *osi <= 'z') {
750  str.push_back(*osi);
751  } else {
752  switch (*osi) {
753  default:
754  if (str.empty()) {
755  return std::make_pair(Operand(), osi);
756  } else {
757  ret.m_register = ConstString(str);
758  return std::make_pair(ret, osi);
759  }
760  case '%':
761  if (!str.empty()) {
762  return std::make_pair(Operand(), osi);
763  }
764  break;
765  }
766  }
767  ++osi;
768  }
769 
770  ret.m_register = ConstString(str);
771  return std::make_pair(ret, osi);
772  }
773 
774  static std::pair<Operand, llvm::StringRef::const_iterator>
775  ParseImmediate(llvm::StringRef::const_iterator osi,
776  llvm::StringRef::const_iterator ose) {
777  Operand ret;
778  ret.m_type = Operand::Type::Immediate;
779  std::string str;
780  bool is_hex = false;
781 
782  osi = ConsumeWhitespace(osi, ose);
783 
784  while (osi != ose) {
785  if (*osi >= '0' && *osi <= '9') {
786  str.push_back(*osi);
787  } else if (*osi >= 'a' && *osi <= 'f') {
788  if (is_hex) {
789  str.push_back(*osi);
790  } else {
791  return std::make_pair(Operand(), osi);
792  }
793  } else {
794  switch (*osi) {
795  default:
796  if (str.empty()) {
797  return std::make_pair(Operand(), osi);
798  } else {
799  ret.m_immediate = strtoull(str.c_str(), nullptr, 0);
800  return std::make_pair(ret, osi);
801  }
802  case 'x':
803  if (!str.compare("0")) {
804  is_hex = true;
805  str.push_back(*osi);
806  } else {
807  return std::make_pair(Operand(), osi);
808  }
809  break;
810  case '#':
811  case '$':
812  if (!str.empty()) {
813  return std::make_pair(Operand(), osi);
814  }
815  break;
816  case '-':
817  if (str.empty()) {
818  ret.m_negative = true;
819  } else {
820  return std::make_pair(Operand(), osi);
821  }
822  }
823  }
824  ++osi;
825  }
826 
827  ret.m_immediate = strtoull(str.c_str(), nullptr, 0);
828  return std::make_pair(ret, osi);
829  }
830 
831  // -0x5(%rax,%rax,2)
832  static std::pair<Operand, llvm::StringRef::const_iterator>
833  ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi,
834  llvm::StringRef::const_iterator ose) {
835  std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
836  ParseImmediate(osi, ose);
837  if (offset_and_iterator.first.IsValid()) {
838  osi = offset_and_iterator.second;
839  }
840 
841  bool found = false;
842  std::tie(found, osi) = ConsumeChar(osi, '(', ose);
843  if (!found) {
844  return std::make_pair(Operand(), osi);
845  }
846 
847  std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
848  ParseRegisterName(osi, ose);
849  if (base_and_iterator.first.IsValid()) {
850  osi = base_and_iterator.second;
851  } else {
852  return std::make_pair(Operand(), osi);
853  }
854 
855  std::tie(found, osi) = ConsumeChar(osi, ',', ose);
856  if (!found) {
857  return std::make_pair(Operand(), osi);
858  }
859 
860  std::pair<Operand, llvm::StringRef::const_iterator> index_and_iterator =
861  ParseRegisterName(osi, ose);
862  if (index_and_iterator.first.IsValid()) {
863  osi = index_and_iterator.second;
864  } else {
865  return std::make_pair(Operand(), osi);
866  }
867 
868  std::tie(found, osi) = ConsumeChar(osi, ',', ose);
869  if (!found) {
870  return std::make_pair(Operand(), osi);
871  }
872 
873  std::pair<Operand, llvm::StringRef::const_iterator>
874  multiplier_and_iterator = ParseImmediate(osi, ose);
875  if (index_and_iterator.first.IsValid()) {
876  osi = index_and_iterator.second;
877  } else {
878  return std::make_pair(Operand(), osi);
879  }
880 
881  std::tie(found, osi) = ConsumeChar(osi, ')', ose);
882  if (!found) {
883  return std::make_pair(Operand(), osi);
884  }
885 
886  Operand product;
887  product.m_type = Operand::Type::Product;
888  product.m_children.push_back(index_and_iterator.first);
889  product.m_children.push_back(multiplier_and_iterator.first);
890 
891  Operand index;
892  index.m_type = Operand::Type::Sum;
893  index.m_children.push_back(base_and_iterator.first);
894  index.m_children.push_back(product);
895 
896  if (offset_and_iterator.first.IsValid()) {
897  Operand offset;
898  offset.m_type = Operand::Type::Sum;
899  offset.m_children.push_back(offset_and_iterator.first);
900  offset.m_children.push_back(index);
901 
902  Operand deref;
903  deref.m_type = Operand::Type::Dereference;
904  deref.m_children.push_back(offset);
905  return std::make_pair(deref, osi);
906  } else {
907  Operand deref;
908  deref.m_type = Operand::Type::Dereference;
909  deref.m_children.push_back(index);
910  return std::make_pair(deref, osi);
911  }
912  }
913 
914  // -0x10(%rbp)
915  static std::pair<Operand, llvm::StringRef::const_iterator>
916  ParseIntelDerefAccess(llvm::StringRef::const_iterator osi,
917  llvm::StringRef::const_iterator ose) {
918  std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
919  ParseImmediate(osi, ose);
920  if (offset_and_iterator.first.IsValid()) {
921  osi = offset_and_iterator.second;
922  }
923 
924  bool found = false;
925  std::tie(found, osi) = ConsumeChar(osi, '(', ose);
926  if (!found) {
927  return std::make_pair(Operand(), osi);
928  }
929 
930  std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
931  ParseRegisterName(osi, ose);
932  if (base_and_iterator.first.IsValid()) {
933  osi = base_and_iterator.second;
934  } else {
935  return std::make_pair(Operand(), osi);
936  }
937 
938  std::tie(found, osi) = ConsumeChar(osi, ')', ose);
939  if (!found) {
940  return std::make_pair(Operand(), osi);
941  }
942 
943  if (offset_and_iterator.first.IsValid()) {
944  Operand offset;
945  offset.m_type = Operand::Type::Sum;
946  offset.m_children.push_back(offset_and_iterator.first);
947  offset.m_children.push_back(base_and_iterator.first);
948 
949  Operand deref;
950  deref.m_type = Operand::Type::Dereference;
951  deref.m_children.push_back(offset);
952  return std::make_pair(deref, osi);
953  } else {
954  Operand deref;
955  deref.m_type = Operand::Type::Dereference;
956  deref.m_children.push_back(base_and_iterator.first);
957  return std::make_pair(deref, osi);
958  }
959  }
960 
961  // [sp, #8]!
962  static std::pair<Operand, llvm::StringRef::const_iterator>
963  ParseARMOffsetAccess(llvm::StringRef::const_iterator osi,
964  llvm::StringRef::const_iterator ose) {
965  bool found = false;
966  std::tie(found, osi) = ConsumeChar(osi, '[', ose);
967  if (!found) {
968  return std::make_pair(Operand(), osi);
969  }
970 
971  std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
972  ParseRegisterName(osi, ose);
973  if (base_and_iterator.first.IsValid()) {
974  osi = base_and_iterator.second;
975  } else {
976  return std::make_pair(Operand(), osi);
977  }
978 
979  std::tie(found, osi) = ConsumeChar(osi, ',', ose);
980  if (!found) {
981  return std::make_pair(Operand(), osi);
982  }
983 
984  std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
985  ParseImmediate(osi, ose);
986  if (offset_and_iterator.first.IsValid()) {
987  osi = offset_and_iterator.second;
988  }
989 
990  std::tie(found, osi) = ConsumeChar(osi, ']', ose);
991  if (!found) {
992  return std::make_pair(Operand(), osi);
993  }
994 
995  Operand offset;
996  offset.m_type = Operand::Type::Sum;
997  offset.m_children.push_back(offset_and_iterator.first);
998  offset.m_children.push_back(base_and_iterator.first);
999 
1000  Operand deref;
1001  deref.m_type = Operand::Type::Dereference;
1002  deref.m_children.push_back(offset);
1003  return std::make_pair(deref, osi);
1004  }
1005 
1006  // [sp]
1007  static std::pair<Operand, llvm::StringRef::const_iterator>
1008  ParseARMDerefAccess(llvm::StringRef::const_iterator osi,
1009  llvm::StringRef::const_iterator ose) {
1010  bool found = false;
1011  std::tie(found, osi) = ConsumeChar(osi, '[', ose);
1012  if (!found) {
1013  return std::make_pair(Operand(), osi);
1014  }
1015 
1016  std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
1017  ParseRegisterName(osi, ose);
1018  if (base_and_iterator.first.IsValid()) {
1019  osi = base_and_iterator.second;
1020  } else {
1021  return std::make_pair(Operand(), osi);
1022  }
1023 
1024  std::tie(found, osi) = ConsumeChar(osi, ']', ose);
1025  if (!found) {
1026  return std::make_pair(Operand(), osi);
1027  }
1028 
1029  Operand deref;
1030  deref.m_type = Operand::Type::Dereference;
1031  deref.m_children.push_back(base_and_iterator.first);
1032  return std::make_pair(deref, osi);
1033  }
1034 
1035  static void DumpOperand(const Operand &op, Stream &s) {
1036  switch (op.m_type) {
1037  case Operand::Type::Dereference:
1038  s.PutCString("*");
1039  DumpOperand(op.m_children[0], s);
1040  break;
1041  case Operand::Type::Immediate:
1042  if (op.m_negative) {
1043  s.PutCString("-");
1044  }
1045  s.PutCString(llvm::to_string(op.m_immediate));
1046  break;
1047  case Operand::Type::Invalid:
1048  s.PutCString("Invalid");
1049  break;
1050  case Operand::Type::Product:
1051  s.PutCString("(");
1052  DumpOperand(op.m_children[0], s);
1053  s.PutCString("*");
1054  DumpOperand(op.m_children[1], s);
1055  s.PutCString(")");
1056  break;
1057  case Operand::Type::Register:
1059  break;
1060  case Operand::Type::Sum:
1061  s.PutCString("(");
1062  DumpOperand(op.m_children[0], s);
1063  s.PutCString("+");
1064  DumpOperand(op.m_children[1], s);
1065  s.PutCString(")");
1066  break;
1067  }
1068  }
1069 
1071  llvm::SmallVectorImpl<Instruction::Operand> &operands) override {
1072  const char *operands_string = GetOperands(nullptr);
1073 
1074  if (!operands_string) {
1075  return false;
1076  }
1077 
1078  llvm::StringRef operands_ref(operands_string);
1079 
1080  llvm::StringRef::const_iterator osi = operands_ref.begin();
1081  llvm::StringRef::const_iterator ose = operands_ref.end();
1082 
1083  while (osi != ose) {
1084  Operand operand;
1085  llvm::StringRef::const_iterator iter;
1086 
1087  if ((std::tie(operand, iter) = ParseIntelIndexedAccess(osi, ose),
1088  operand.IsValid()) ||
1089  (std::tie(operand, iter) = ParseIntelDerefAccess(osi, ose),
1090  operand.IsValid()) ||
1091  (std::tie(operand, iter) = ParseARMOffsetAccess(osi, ose),
1092  operand.IsValid()) ||
1093  (std::tie(operand, iter) = ParseARMDerefAccess(osi, ose),
1094  operand.IsValid()) ||
1095  (std::tie(operand, iter) = ParseRegisterName(osi, ose),
1096  operand.IsValid()) ||
1097  (std::tie(operand, iter) = ParseImmediate(osi, ose),
1098  operand.IsValid())) {
1099  osi = iter;
1100  operands.push_back(operand);
1101  } else {
1102  return false;
1103  }
1104 
1105  std::pair<bool, llvm::StringRef::const_iterator> found_and_iter =
1106  ConsumeChar(osi, ',', ose);
1107  if (found_and_iter.first) {
1108  osi = found_and_iter.second;
1109  }
1110 
1111  osi = ConsumeWhitespace(osi, ose);
1112  }
1113 
1114  DisassemblerSP disasm_sp = m_disasm_wp.lock();
1115 
1116  if (disasm_sp && operands.size() > 1) {
1117  // TODO tie this into the MC Disassembler's notion of clobbers.
1118  switch (disasm_sp->GetArchitecture().GetMachine()) {
1119  default:
1120  break;
1121  case llvm::Triple::x86:
1122  case llvm::Triple::x86_64:
1123  operands[operands.size() - 1].m_clobbered = true;
1124  break;
1125  case llvm::Triple::arm:
1126  operands[0].m_clobbered = true;
1127  break;
1128  }
1129  }
1130 
1131  if (Log *log = GetLog(LLDBLog::Process)) {
1132  StreamString ss;
1133 
1134  ss.Printf("[%s] expands to %zu operands:\n", operands_string,
1135  operands.size());
1136  for (const Operand &operand : operands) {
1137  ss.PutCString(" ");
1138  DumpOperand(operand, ss);
1139  ss.PutCString("\n");
1140  }
1141 
1142  log->PutString(ss.GetString());
1143  }
1144 
1145  return true;
1146  }
1147 
1148  bool IsCall() override {
1149  VisitInstruction();
1150  return m_is_call;
1151  }
1152 
1153 protected:
1154  std::weak_ptr<DisassemblerLLVMC> m_disasm_wp;
1155 
1156  bool m_is_valid = false;
1157  bool m_using_file_addr = false;
1158  bool m_has_visited_instruction = false;
1159 
1160  // Be conservative. If we didn't understand the instruction, say it:
1161  // - Might branch
1162  // - Does not have a delay slot
1163  // - Is not a call
1164  // - Is not a load
1165  // - Is not an authenticated instruction
1166  bool m_does_branch = true;
1167  bool m_has_delay_slot = false;
1168  bool m_is_call = false;
1169  bool m_is_load = false;
1170  bool m_is_authenticated = false;
1171 
1173  if (m_has_visited_instruction)
1174  return;
1175 
1176  DisassemblerScope disasm(*this);
1177  if (!disasm)
1178  return;
1179 
1180  DataExtractor data;
1181  if (!m_opcode.GetData(data))
1182  return;
1183 
1184  bool is_alternate_isa;
1185  lldb::addr_t pc = m_address.GetFileAddress();
1186  DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
1187  GetDisasmToUse(is_alternate_isa, disasm);
1188  const uint8_t *opcode_data = data.GetDataStart();
1189  const size_t opcode_data_len = data.GetByteSize();
1190  llvm::MCInst inst;
1191  const size_t inst_size =
1192  mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
1193  if (inst_size == 0)
1194  return;
1195 
1196  m_has_visited_instruction = true;
1197  m_does_branch = mc_disasm_ptr->CanBranch(inst);
1198  m_has_delay_slot = mc_disasm_ptr->HasDelaySlot(inst);
1199  m_is_call = mc_disasm_ptr->IsCall(inst);
1200  m_is_load = mc_disasm_ptr->IsLoad(inst);
1201  m_is_authenticated = mc_disasm_ptr->IsAuthenticated(inst);
1202  }
1203 
1204 private:
1206  GetDisasmToUse(bool &is_alternate_isa, DisassemblerScope &disasm) {
1207  is_alternate_isa = false;
1208  if (disasm) {
1209  if (disasm->m_alternate_disasm_up) {
1210  const AddressClass address_class = GetAddressClass();
1211 
1212  if (address_class == AddressClass::eCodeAlternateISA) {
1213  is_alternate_isa = true;
1214  return disasm->m_alternate_disasm_up.get();
1215  }
1216  }
1217  return disasm->m_disasm_up.get();
1218  }
1219  return nullptr;
1220  }
1221 };
1222 
1223 std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>
1224 DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu,
1225  const char *features_str,
1226  unsigned flavor,
1227  DisassemblerLLVMC &owner) {
1228  using Instance = std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>;
1229 
1231  const llvm::Target *curr_target =
1232  llvm::TargetRegistry::lookupTarget(triple, Status);
1233  if (!curr_target)
1234  return Instance();
1235 
1236  std::unique_ptr<llvm::MCInstrInfo> instr_info_up(
1237  curr_target->createMCInstrInfo());
1238  if (!instr_info_up)
1239  return Instance();
1240 
1241  std::unique_ptr<llvm::MCRegisterInfo> reg_info_up(
1242  curr_target->createMCRegInfo(triple));
1243  if (!reg_info_up)
1244  return Instance();
1245 
1246  std::unique_ptr<llvm::MCSubtargetInfo> subtarget_info_up(
1247  curr_target->createMCSubtargetInfo(triple, cpu, features_str));
1248  if (!subtarget_info_up)
1249  return Instance();
1250 
1251  llvm::MCTargetOptions MCOptions;
1252  std::unique_ptr<llvm::MCAsmInfo> asm_info_up(
1253  curr_target->createMCAsmInfo(*reg_info_up, triple, MCOptions));
1254  if (!asm_info_up)
1255  return Instance();
1256 
1257  std::unique_ptr<llvm::MCContext> context_up(
1258  new llvm::MCContext(llvm::Triple(triple), asm_info_up.get(),
1259  reg_info_up.get(), subtarget_info_up.get()));
1260  if (!context_up)
1261  return Instance();
1262 
1263  std::unique_ptr<llvm::MCDisassembler> disasm_up(
1264  curr_target->createMCDisassembler(*subtarget_info_up, *context_up));
1265  if (!disasm_up)
1266  return Instance();
1267 
1268  std::unique_ptr<llvm::MCRelocationInfo> rel_info_up(
1269  curr_target->createMCRelocationInfo(triple, *context_up));
1270  if (!rel_info_up)
1271  return Instance();
1272 
1273  std::unique_ptr<llvm::MCSymbolizer> symbolizer_up(
1274  curr_target->createMCSymbolizer(
1275  triple, nullptr, DisassemblerLLVMC::SymbolLookupCallback, &owner,
1276  context_up.get(), std::move(rel_info_up)));
1277  disasm_up->setSymbolizer(std::move(symbolizer_up));
1278 
1279  unsigned asm_printer_variant =
1280  flavor == ~0U ? asm_info_up->getAssemblerDialect() : flavor;
1281 
1282  std::unique_ptr<llvm::MCInstPrinter> instr_printer_up(
1283  curr_target->createMCInstPrinter(llvm::Triple{triple},
1284  asm_printer_variant, *asm_info_up,
1285  *instr_info_up, *reg_info_up));
1286  if (!instr_printer_up)
1287  return Instance();
1288 
1289  return Instance(
1290  new MCDisasmInstance(std::move(instr_info_up), std::move(reg_info_up),
1291  std::move(subtarget_info_up), std::move(asm_info_up),
1292  std::move(context_up), std::move(disasm_up),
1293  std::move(instr_printer_up)));
1294 }
1295 
1297  std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,
1298  std::unique_ptr<llvm::MCRegisterInfo> &&reg_info_up,
1299  std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up,
1300  std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up,
1301  std::unique_ptr<llvm::MCContext> &&context_up,
1302  std::unique_ptr<llvm::MCDisassembler> &&disasm_up,
1303  std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up)
1304  : m_instr_info_up(std::move(instr_info_up)),
1305  m_reg_info_up(std::move(reg_info_up)),
1306  m_subtarget_info_up(std::move(subtarget_info_up)),
1307  m_asm_info_up(std::move(asm_info_up)),
1308  m_context_up(std::move(context_up)), m_disasm_up(std::move(disasm_up)),
1309  m_instr_printer_up(std::move(instr_printer_up)) {
1312 }
1313 
1315  const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc,
1316  llvm::MCInst &mc_inst) const {
1317  llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len);
1318  llvm::MCDisassembler::DecodeStatus status;
1319 
1320  uint64_t new_inst_size;
1321  status = m_disasm_up->getInstruction(mc_inst, new_inst_size, data, pc,
1322  llvm::nulls());
1323  if (status == llvm::MCDisassembler::Success)
1324  return new_inst_size;
1325  else
1326  return 0;
1327 }
1328 
1330  llvm::MCInst &mc_inst, std::string &inst_string,
1331  std::string &comments_string) {
1332  llvm::raw_string_ostream inst_stream(inst_string);
1333  llvm::raw_string_ostream comments_stream(comments_string);
1334 
1335  m_instr_printer_up->setCommentStream(comments_stream);
1336  m_instr_printer_up->printInst(&mc_inst, 0, llvm::StringRef(),
1337  *m_subtarget_info_up, inst_stream);
1338  m_instr_printer_up->setCommentStream(llvm::nulls());
1339  comments_stream.flush();
1340 
1341  static std::string g_newlines("\r\n");
1342 
1343  for (size_t newline_pos = 0;
1344  (newline_pos = comments_string.find_first_of(g_newlines, newline_pos)) !=
1345  comments_string.npos;
1346  /**/) {
1347  comments_string.replace(comments_string.begin() + newline_pos,
1348  comments_string.begin() + newline_pos + 1, 1, ' ');
1349  }
1350 }
1351 
1353  bool use_hex_immed, HexImmediateStyle hex_style) {
1354  m_instr_printer_up->setPrintImmHex(use_hex_immed);
1355  switch (hex_style) {
1356  case eHexStyleC:
1357  m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::C);
1358  break;
1359  case eHexStyleAsm:
1360  m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::Asm);
1361  break;
1362  }
1363 }
1364 
1366  llvm::MCInst &mc_inst) const {
1367  return m_instr_info_up->get(mc_inst.getOpcode())
1368  .mayAffectControlFlow(mc_inst, *m_reg_info_up);
1369 }
1370 
1372  llvm::MCInst &mc_inst) const {
1373  return m_instr_info_up->get(mc_inst.getOpcode()).hasDelaySlot();
1374 }
1375 
1376 bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const {
1377  return m_instr_info_up->get(mc_inst.getOpcode()).isCall();
1378 }
1379 
1380 bool DisassemblerLLVMC::MCDisasmInstance::IsLoad(llvm::MCInst &mc_inst) const {
1381  return m_instr_info_up->get(mc_inst.getOpcode()).mayLoad();
1382 }
1383 
1385  llvm::MCInst &mc_inst) const {
1386  auto InstrDesc = m_instr_info_up->get(mc_inst.getOpcode());
1387 
1388  // Treat software auth traps (brk 0xc470 + aut key, where 0x70 == 'p', 0xc4
1389  // == 'a' + 'c') as authenticated instructions for reporting purposes, in
1390  // addition to the standard authenticated instructions specified in ARMv8.3.
1391  bool IsBrkC47x = false;
1392  if (InstrDesc.isTrap() && mc_inst.getNumOperands() == 1) {
1393  const llvm::MCOperand &Op0 = mc_inst.getOperand(0);
1394  if (Op0.isImm() && Op0.getImm() >= 0xc470 && Op0.getImm() <= 0xc474)
1395  IsBrkC47x = true;
1396  }
1397 
1398  return InstrDesc.isAuthenticated() || IsBrkC47x;
1399 }
1400 
1402  const char *flavor_string)
1403  : Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr),
1405  m_adrp_insn() {
1406  if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) {
1407  m_flavor.assign("default");
1408  }
1409 
1410  unsigned flavor = ~0U;
1411  llvm::Triple triple = arch.GetTriple();
1412 
1413  // So far the only supported flavor is "intel" on x86. The base class will
1414  // set this correctly coming in.
1415  if (triple.getArch() == llvm::Triple::x86 ||
1416  triple.getArch() == llvm::Triple::x86_64) {
1417  if (m_flavor == "intel") {
1418  flavor = 1;
1419  } else if (m_flavor == "att") {
1420  flavor = 0;
1421  }
1422  }
1423 
1424  ArchSpec thumb_arch(arch);
1425  if (triple.getArch() == llvm::Triple::arm) {
1426  std::string thumb_arch_name(thumb_arch.GetTriple().getArchName().str());
1427  // Replace "arm" with "thumb" so we get all thumb variants correct
1428  if (thumb_arch_name.size() > 3) {
1429  thumb_arch_name.erase(0, 3);
1430  thumb_arch_name.insert(0, "thumb");
1431  } else {
1432  thumb_arch_name = "thumbv9.3a";
1433  }
1434  thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name));
1435  }
1436 
1437  // If no sub architecture specified then use the most recent arm architecture
1438  // so the disassembler will return all instructions. Without it we will see a
1439  // lot of unknown opcodes if the code uses instructions which are not
1440  // available in the oldest arm version (which is used when no sub architecture
1441  // is specified).
1442  if (triple.getArch() == llvm::Triple::arm &&
1443  triple.getSubArch() == llvm::Triple::NoSubArch)
1444  triple.setArchName("armv9.3a");
1445 
1446  std::string features_str;
1447  const char *triple_str = triple.getTriple().c_str();
1448 
1449  // ARM Cortex M0-M7 devices only execute thumb instructions
1450  if (arch.IsAlwaysThumbInstructions()) {
1451  triple_str = thumb_arch.GetTriple().getTriple().c_str();
1452  features_str += "+fp-armv8,";
1453  }
1454 
1455  const char *cpu = "";
1456 
1457  switch (arch.GetCore()) {
1458  case ArchSpec::eCore_mips32:
1459  case ArchSpec::eCore_mips32el:
1460  cpu = "mips32";
1461  break;
1462  case ArchSpec::eCore_mips32r2:
1463  case ArchSpec::eCore_mips32r2el:
1464  cpu = "mips32r2";
1465  break;
1466  case ArchSpec::eCore_mips32r3:
1467  case ArchSpec::eCore_mips32r3el:
1468  cpu = "mips32r3";
1469  break;
1470  case ArchSpec::eCore_mips32r5:
1471  case ArchSpec::eCore_mips32r5el:
1472  cpu = "mips32r5";
1473  break;
1474  case ArchSpec::eCore_mips32r6:
1475  case ArchSpec::eCore_mips32r6el:
1476  cpu = "mips32r6";
1477  break;
1478  case ArchSpec::eCore_mips64:
1479  case ArchSpec::eCore_mips64el:
1480  cpu = "mips64";
1481  break;
1482  case ArchSpec::eCore_mips64r2:
1483  case ArchSpec::eCore_mips64r2el:
1484  cpu = "mips64r2";
1485  break;
1486  case ArchSpec::eCore_mips64r3:
1487  case ArchSpec::eCore_mips64r3el:
1488  cpu = "mips64r3";
1489  break;
1490  case ArchSpec::eCore_mips64r5:
1491  case ArchSpec::eCore_mips64r5el:
1492  cpu = "mips64r5";
1493  break;
1494  case ArchSpec::eCore_mips64r6:
1495  case ArchSpec::eCore_mips64r6el:
1496  cpu = "mips64r6";
1497  break;
1498  default:
1499  cpu = "";
1500  break;
1501  }
1502 
1503  if (arch.IsMIPS()) {
1504  uint32_t arch_flags = arch.GetFlags();
1505  if (arch_flags & ArchSpec::eMIPSAse_msa)
1506  features_str += "+msa,";
1507  if (arch_flags & ArchSpec::eMIPSAse_dsp)
1508  features_str += "+dsp,";
1509  if (arch_flags & ArchSpec::eMIPSAse_dspr2)
1510  features_str += "+dspr2,";
1511  }
1512 
1513  // If any AArch64 variant, enable latest ISA with all extensions.
1514  if (triple.isAArch64()) {
1515  features_str += "+all,";
1516 
1517  if (triple.getVendor() == llvm::Triple::Apple)
1518  cpu = "apple-latest";
1519  }
1520 
1521  if (triple.isRISCV()) {
1522  uint32_t arch_flags = arch.GetFlags();
1523  if (arch_flags & ArchSpec::eRISCV_rvc)
1524  features_str += "+c,";
1525  if (arch_flags & ArchSpec::eRISCV_rve)
1526  features_str += "+e,";
1527  if ((arch_flags & ArchSpec::eRISCV_float_abi_single) ==
1528  ArchSpec::eRISCV_float_abi_single)
1529  features_str += "+f,";
1530  if ((arch_flags & ArchSpec::eRISCV_float_abi_double) ==
1531  ArchSpec::eRISCV_float_abi_double)
1532  features_str += "+f,+d,";
1533  if ((arch_flags & ArchSpec::eRISCV_float_abi_quad) ==
1534  ArchSpec::eRISCV_float_abi_quad)
1535  features_str += "+f,+d,+q,";
1536  // FIXME: how do we detect features such as `+a`, `+m`?
1537  }
1538 
1539  // We use m_disasm_up.get() to tell whether we are valid or not, so if this
1540  // isn't good for some reason, we won't be valid and FindPlugin will fail and
1541  // we won't get used.
1542  m_disasm_up = MCDisasmInstance::Create(triple_str, cpu, features_str.c_str(),
1543  flavor, *this);
1544 
1545  llvm::Triple::ArchType llvm_arch = triple.getArch();
1546 
1547  // For arm CPUs that can execute arm or thumb instructions, also create a
1548  // thumb instruction disassembler.
1549  if (llvm_arch == llvm::Triple::arm) {
1550  std::string thumb_triple(thumb_arch.GetTriple().getTriple());
1552  MCDisasmInstance::Create(thumb_triple.c_str(), "", features_str.c_str(),
1553  flavor, *this);
1554  if (!m_alternate_disasm_up)
1555  m_disasm_up.reset();
1556 
1557  } else if (arch.IsMIPS()) {
1558  /* Create alternate disassembler for MIPS16 and microMIPS */
1559  uint32_t arch_flags = arch.GetFlags();
1560  if (arch_flags & ArchSpec::eMIPSAse_mips16)
1561  features_str += "+mips16,";
1562  else if (arch_flags & ArchSpec::eMIPSAse_micromips)
1563  features_str += "+micromips,";
1564 
1566  triple_str, cpu, features_str.c_str(), flavor, *this);
1567  if (!m_alternate_disasm_up)
1568  m_disasm_up.reset();
1569  }
1570 }
1571 
1573 
1575  const char *flavor) {
1576  if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch) {
1577  std::unique_ptr<DisassemblerLLVMC> disasm_up(
1578  new DisassemblerLLVMC(arch, flavor));
1579 
1580  if (disasm_up.get() && disasm_up->IsValid())
1581  return disasm_up.release();
1582  }
1583  return nullptr;
1584 }
1585 
1587  const DataExtractor &data,
1588  lldb::offset_t data_offset,
1589  size_t num_instructions,
1590  bool append, bool data_from_file) {
1591  if (!append)
1593 
1594  if (!IsValid())
1595  return 0;
1596 
1597  m_data_from_file = data_from_file;
1598  uint32_t data_cursor = data_offset;
1599  const size_t data_byte_size = data.GetByteSize();
1600  uint32_t instructions_parsed = 0;
1601  Address inst_addr(base_addr);
1602 
1603  while (data_cursor < data_byte_size &&
1604  instructions_parsed < num_instructions) {
1605 
1606  AddressClass address_class = AddressClass::eCode;
1607 
1609  address_class = inst_addr.GetAddressClass();
1610 
1611  InstructionSP inst_sp(
1612  new InstructionLLVMC(*this, inst_addr, address_class));
1613 
1614  if (!inst_sp)
1615  break;
1616 
1617  uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor);
1618 
1619  if (inst_size == 0)
1620  break;
1621 
1622  m_instruction_list.Append(inst_sp);
1623  data_cursor += inst_size;
1624  inst_addr.Slide(inst_size);
1625  instructions_parsed++;
1626  }
1627 
1628  return data_cursor - data_offset;
1629 }
1630 
1632  PluginManager::RegisterPlugin(GetPluginNameStatic(),
1633  "Disassembler that uses LLVM MC to disassemble "
1634  "i386, x86_64, ARM, and ARM64.",
1635  CreateInstance);
1636 
1637  llvm::InitializeAllTargetInfos();
1638  llvm::InitializeAllTargetMCs();
1639  llvm::InitializeAllAsmParsers();
1640  llvm::InitializeAllDisassemblers();
1641 }
1642 
1644  PluginManager::UnregisterPlugin(CreateInstance);
1645 }
1646 
1647 int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc,
1648  uint64_t offset, uint64_t size,
1649  int tag_type, void *tag_bug) {
1650  return static_cast<DisassemblerLLVMC *>(disassembler)
1651  ->OpInfo(pc, offset, size, tag_type, tag_bug);
1652 }
1653 
1654 const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler,
1655  uint64_t value,
1656  uint64_t *type, uint64_t pc,
1657  const char **name) {
1658  return static_cast<DisassemblerLLVMC *>(disassembler)
1659  ->SymbolLookup(value, type, pc, name);
1660 }
1661 
1663  const lldb_private::ArchSpec &arch, const char *flavor) {
1664  llvm::Triple triple = arch.GetTriple();
1665  if (flavor == nullptr || strcmp(flavor, "default") == 0)
1666  return true;
1667 
1668  if (triple.getArch() == llvm::Triple::x86 ||
1669  triple.getArch() == llvm::Triple::x86_64) {
1670  return strcmp(flavor, "intel") == 0 || strcmp(flavor, "att") == 0;
1671  } else
1672  return false;
1673 }
1674 
1675 bool DisassemblerLLVMC::IsValid() const { return m_disasm_up.operator bool(); }
1676 
1677 int DisassemblerLLVMC::OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size,
1678  int tag_type, void *tag_bug) {
1679  switch (tag_type) {
1680  default:
1681  break;
1682  case 1:
1683  memset(tag_bug, 0, sizeof(::LLVMOpInfo1));
1684  break;
1685  }
1686  return 0;
1687 }
1688 
1689 const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
1690  uint64_t pc, const char **name) {
1691  if (*type_ptr) {
1692  if (m_exe_ctx && m_inst) {
1693  // std::string remove_this_prior_to_checkin;
1694  Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr;
1695  Address value_so_addr;
1696  Address pc_so_addr;
1697  if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 ||
1698  target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be ||
1699  target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) {
1700  if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) {
1701  m_adrp_address = pc;
1702  m_adrp_insn = value;
1703  *name = nullptr;
1704  *type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
1705  return nullptr;
1706  }
1707  // If this instruction is an ADD and
1708  // the previous instruction was an ADRP and
1709  // the ADRP's register and this ADD's register are the same,
1710  // then this is a pc-relative address calculation.
1711  if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri &&
1712  m_adrp_insn && m_adrp_address == pc - 4 &&
1713  (m_adrp_insn.value() & 0x1f) == ((value >> 5) & 0x1f)) {
1714  uint32_t addxri_inst;
1715  uint64_t adrp_imm, addxri_imm;
1716  // Get immlo and immhi bits, OR them together to get the ADRP imm
1717  // value.
1718  adrp_imm = ((m_adrp_insn.value() & 0x00ffffe0) >> 3) |
1719  ((m_adrp_insn.value() >> 29) & 0x3);
1720  // if high bit of immhi after right-shifting set, sign extend
1721  if (adrp_imm & (1ULL << 20))
1722  adrp_imm |= ~((1ULL << 21) - 1);
1723 
1724  addxri_inst = value;
1725  addxri_imm = (addxri_inst >> 10) & 0xfff;
1726  // check if 'sh' bit is set, shift imm value up if so
1727  // (this would make no sense, ADRP already gave us this part)
1728  if ((addxri_inst >> (12 + 5 + 5)) & 1)
1729  addxri_imm <<= 12;
1730  value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) +
1731  addxri_imm;
1732  }
1734  m_adrp_insn.reset();
1735  }
1736 
1737  if (m_inst->UsingFileAddress()) {
1738  ModuleSP module_sp(m_inst->GetAddress().GetModule());
1739  if (module_sp) {
1740  module_sp->ResolveFileAddress(value, value_so_addr);
1741  module_sp->ResolveFileAddress(pc, pc_so_addr);
1742  }
1743  } else if (target && !target->GetSectionLoadList().IsEmpty()) {
1744  target->GetSectionLoadList().ResolveLoadAddress(value, value_so_addr);
1745  target->GetSectionLoadList().ResolveLoadAddress(pc, pc_so_addr);
1746  }
1747 
1748  SymbolContext sym_ctx;
1749  const SymbolContextItem resolve_scope =
1750  eSymbolContextFunction | eSymbolContextSymbol;
1751  if (pc_so_addr.IsValid() && pc_so_addr.GetModule()) {
1752  pc_so_addr.GetModule()->ResolveSymbolContextForAddress(
1753  pc_so_addr, resolve_scope, sym_ctx);
1754  }
1755 
1756  if (value_so_addr.IsValid() && value_so_addr.GetSection()) {
1757  StreamString ss;
1758 
1759  bool format_omitting_current_func_name = false;
1760  if (sym_ctx.symbol || sym_ctx.function) {
1761  AddressRange range;
1762  if (sym_ctx.GetAddressRange(resolve_scope, 0, false, range) &&
1763  range.GetBaseAddress().IsValid() &&
1764  range.ContainsLoadAddress(value_so_addr, target)) {
1765  format_omitting_current_func_name = true;
1766  }
1767  }
1768 
1769  // If the "value" address (the target address we're symbolicating) is
1770  // inside the same SymbolContext as the current instruction pc
1771  // (pc_so_addr), don't print the full function name - just print it
1772  // with DumpStyleNoFunctionName style, e.g. "<+36>".
1773  if (format_omitting_current_func_name) {
1774  value_so_addr.Dump(&ss, target, Address::DumpStyleNoFunctionName,
1775  Address::DumpStyleSectionNameOffset);
1776  } else {
1777  value_so_addr.Dump(
1778  &ss, target,
1779  Address::DumpStyleResolvedDescriptionNoFunctionArguments,
1780  Address::DumpStyleSectionNameOffset);
1781  }
1782 
1783  if (!ss.GetString().empty()) {
1784  // If Address::Dump returned a multi-line description, most commonly
1785  // seen when we have multiple levels of inlined functions at an
1786  // address, only show the first line.
1787  std::string str = std::string(ss.GetString());
1788  size_t first_eol_char = str.find_first_of("\r\n");
1789  if (first_eol_char != std::string::npos) {
1790  str.erase(first_eol_char);
1791  }
1792  m_inst->AppendComment(str);
1793  }
1794  }
1795  }
1796  }
1797 
1798  // TODO: llvm-objdump sets the type_ptr to the
1799  // LLVMDisassembler_ReferenceType_Out_* values
1800  // based on where value_so_addr is pointing, with
1801  // Mach-O specific augmentations in MachODump.cpp. e.g.
1802  // see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand
1803  // handles.
1804  *type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
1805  *name = nullptr;
1806  return nullptr;
1807 }
RegularExpression.h
DisassemblerLLVMC::m_inst
InstructionLLVMC * m_inst
Definition: DisassemblerLLVMC.h:72
lldb_private::AddressRange::GetBaseAddress
Address & GetBaseAddress()
Get accessor for the base address of the range.
Definition: AddressRange.h:209
lldb_private::ArchSpec::GetMinimumOpcodeByteSize
uint32_t GetMinimumOpcodeByteSize() const
Definition: ArchSpec.cpp:917
lldb::eInstructionControlFlowKindFarCall
@ eInstructionControlFlowKindFarCall
The instruction is a call-like far transfer.
Definition: lldb-enumerations.h:991
InstructionLLVMC::IsValid
bool IsValid() const
Definition: DisassemblerLLVMC.cpp:675
InstructionLLVMC::ParseIntelDerefAccess
static std::pair< Operand, llvm::StringRef::const_iterator > ParseIntelDerefAccess(llvm::StringRef::const_iterator osi, llvm::StringRef::const_iterator ose)
Definition: DisassemblerLLVMC.cpp:916
DisassemblerLLVMC::m_disasm_up
std::unique_ptr< MCDisasmInstance > m_disasm_up
Definition: DisassemblerLLVMC.h:85
lldb_private::ExecutionContext
Definition: ExecutionContext.h:292
lldb_private::ArchSpec
Definition: ArchSpec.h:33
lldb_private::ArchSpec::IsMIPS
bool IsMIPS() const
if MIPS architecture return true.
Definition: ArchSpec.cpp:546
lldb_private::Instruction::Operand::m_immediate
lldb::addr_t m_immediate
Definition: Disassembler.h:208
lldb_private::RegularExpression
Definition: RegularExpression.h:18
lldb_private::Instruction::Operand::m_negative
bool m_negative
Definition: Disassembler.h:210
InstructionLLVMC::UsingFileAddress
bool UsingFileAddress() const
Definition: DisassemblerLLVMC.cpp:677
lldb_private::ArchSpec::GetMaximumOpcodeByteSize
uint32_t GetMaximumOpcodeByteSize() const
Definition: ArchSpec.cpp:924
lldb_private::Address::IsValid
bool IsValid() const
Check if the object state is valid.
Definition: Address.h:345
lldb_private::Instruction::Operand::m_register
ConstString m_register
Definition: Disassembler.h:209
lldb_private::Opcode
Definition: Opcode.h:29
lldb_private::ArchSpec::GetMachine
llvm::Triple::ArchType GetMachine() const
Returns a machine family for the current architecture.
Definition: ArchSpec.cpp:668
DisassemblerLLVMC::MCDisasmInstance::m_asm_info_up
std::unique_ptr< llvm::MCAsmInfo > m_asm_info_up
Definition: DisassemblerLLVMC.cpp:82
lldb_private::AddressClass
AddressClass
Definition: lldb-private-enumerations.h:48
DisassemblerLLVMC::OpInfo
int OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size, int TagType, void *TagBug)
Definition: DisassemblerLLVMC.cpp:1677
DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance
MCDisasmInstance(std::unique_ptr< llvm::MCInstrInfo > &&instr_info_up, std::unique_ptr< llvm::MCRegisterInfo > &&reg_info_up, std::unique_ptr< llvm::MCSubtargetInfo > &&subtarget_info_up, std::unique_ptr< llvm::MCAsmInfo > &&asm_info_up, std::unique_ptr< llvm::MCContext > &&context_up, std::unique_ptr< llvm::MCDisassembler > &&disasm_up, std::unique_ptr< llvm::MCInstPrinter > &&instr_printer_up)
Definition: DisassemblerLLVMC.cpp:1296
lldb_private::SymbolContext::GetAddressRange
bool GetAddressRange(uint32_t scope, uint32_t range_idx, bool use_inline_block_range, AddressRange &range) const
Get the address range contained within a symbol context.
Definition: SymbolContext.cpp:380
lldb_private::ArchSpec::GetCore
Core GetCore() const
Definition: ArchSpec.h:434
DisassemblerLLVMC::MCDisasmInstance::m_reg_info_up
std::unique_ptr< llvm::MCRegisterInfo > m_reg_info_up
Definition: DisassemblerLLVMC.cpp:80
InstructionLLVMC::DisassemblerScope
Grants exclusive access to the disassembler and initializes it with the given InstructionLLVMC and an...
Definition: DisassemblerLLVMC.cpp:682
x86::InstructionOpcodeAndModrm::primary_opcode
uint8_t primary_opcode
Definition: DisassemblerLLVMC.cpp:106
Module.h
lldb_private::DataExtractor::PeekData
const uint8_t * PeekData(lldb::offset_t offset, lldb::offset_t length) const
Peek at a bytes at offset.
Definition: DataExtractor.h:832
DisassemblerLLVMC::MCDisasmInstance::m_subtarget_info_up
std::unique_ptr< llvm::MCSubtargetInfo > m_subtarget_info_up
Definition: DisassemblerLLVMC.cpp:81
DisassemblerLLVMC::SymbolLookup
const char * SymbolLookup(uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
Definition: DisassemblerLLVMC.cpp:1689
lldb_private::ArchSpec::GetFlags
uint32_t GetFlags() const
Definition: ArchSpec.h:531
x86::GetControlFlowKind
lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b, Opcode m_opcode)
Definition: DisassemblerLLVMC.cpp:385
DisassemblerLLVMC::DisassemblerLLVMC
DisassemblerLLVMC(const lldb_private::ArchSpec &arch, const char *flavor)
Definition: DisassemblerLLVMC.cpp:1401
SectionLoadList.h
InstructionLLVMC::ConsumeChar
static std::pair< bool, llvm::StringRef::const_iterator > ConsumeChar(llvm::StringRef::const_iterator osi, const char c, llvm::StringRef::const_iterator ose)
Definition: DisassemblerLLVMC.cpp:720
lldb::offset_t
uint64_t offset_t
Definition: lldb-types.h:87
InstructionLLVMC
Definition: DisassemblerLLVMC.cpp:406
StackFrame.h
lldb_private::Instruction
Definition: Disassembler.h:58
lldb_private::Stream
Definition: Stream.h:28
lldb_private::ArchSpec::GetTriple
llvm::Triple & GetTriple()
Architecture triple accessor.
Definition: ArchSpec.h:455
lldb::addr_t
uint64_t addr_t
Definition: lldb-types.h:83
pc
@ pc
Definition: CompactUnwindInfo.cpp:1251
lldb_private::Instruction::Operand::m_children
std::vector< Operand > m_children
Definition: Disassembler.h:207
DisassemblerLLVMC::~DisassemblerLLVMC
~DisassemblerLLVMC() override
InstructionLLVMC::IsAuthenticated
bool IsAuthenticated() override
Definition: DisassemblerLLVMC.cpp:432
lldb_private::SymbolContext
Definition: SymbolContext.h:33
lldb_private::Target
Definition: Target.h:467
lldb_private::DataExtractor::GetU64
uint64_t GetU64(lldb::offset_t *offset_ptr) const
Extract a uint64_t value from *offset_ptr.
Definition: DataExtractor.cpp:474
DisassemblerLLVMC::MCDisasmInstance::m_instr_printer_up
std::unique_ptr< llvm::MCInstPrinter > m_instr_printer_up
Definition: DisassemblerLLVMC.cpp:85
lldb_private::StreamString::GetString
llvm::StringRef GetString() const
Definition: StreamString.cpp:51
InstructionLLVMC::DisassemblerScope::~DisassemblerScope
~DisassemblerScope()
Definition: DisassemblerLLVMC.cpp:694
InstructionLLVMC::DumpOperand
static void DumpOperand(const Operand &op, Stream &s)
Definition: DisassemblerLLVMC.cpp:1035
Process.h
InstructionLLVMC::ParseRegisterName
static std::pair< Operand, llvm::StringRef::const_iterator > ParseRegisterName(llvm::StringRef::const_iterator osi, llvm::StringRef::const_iterator ose)
Definition: DisassemblerLLVMC.cpp:734
DisassemblerLLVMC::m_adrp_address
lldb::addr_t m_adrp_address
Definition: DisassemblerLLVMC.h:79
DisassemblerLLVMC::FlavorValidForArchSpec
bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch, const char *flavor) override
Definition: DisassemblerLLVMC.cpp:1662
DisassemblerLLVMC::GetPluginNameStatic
static llvm::StringRef GetPluginNameStatic()
Definition: DisassemblerLLVMC.h:35
DisassemblerLLVMC::IsValid
bool IsValid() const
Definition: DisassemblerLLVMC.cpp:1675
InstructionLLVMC::ParseOperands
bool ParseOperands(llvm::SmallVectorImpl< Instruction::Operand > &operands) override
Definition: DisassemblerLLVMC.cpp:1070
Target.h
lldb::eInstructionControlFlowKindReturn
@ eInstructionControlFlowKindReturn
The instruction is a near (function) return.
Definition: lldb-enumerations.h:984
lldb::eInstructionControlFlowKindCall
@ eInstructionControlFlowKindCall
The instruction is a near (function) call.
Definition: lldb-enumerations.h:982
lldb_private::Opcode::GetOpcodeBytes
const void * GetOpcodeBytes() const
Definition: Opcode.h:198
InstructionLLVMC::GetDisasmToUse
DisassemblerLLVMC::MCDisasmInstance * GetDisasmToUse(bool &is_alternate_isa)
Definition: DisassemblerLLVMC.cpp:437
lldb_private::Disassembler::m_flavor
std::string m_flavor
Definition: Disassembler.h:547
lldb_private::SymbolContext::symbol
Symbol * symbol
The Symbol for a given query.
Definition: SymbolContext.h:323
DisassemblerLLVMC::MCDisasmInstance::m_instr_info_up
std::unique_ptr< llvm::MCInstrInfo > m_instr_info_up
Definition: DisassemblerLLVMC.cpp:79
InstructionLLVMC::Decode
size_t Decode(const lldb_private::Disassembler &disassembler, const lldb_private::DataExtractor &data, lldb::offset_t data_offset) override
Definition: DisassemblerLLVMC.cpp:442
lldb_private::Disassembler::eHexStyleC
@ eHexStyleC
Definition: Disassembler.h:399
InstructionLLVMC::ParseARMOffsetAccess
static std::pair< Operand, llvm::StringRef::const_iterator > ParseARMOffsetAccess(llvm::StringRef::const_iterator osi, llvm::StringRef::const_iterator ose)
Definition: DisassemblerLLVMC.cpp:963
x86::InstructionOpcodeAndModrm::modrm
uint8_t modrm
Definition: DisassemblerLLVMC.cpp:108
lldb_private::Address::GetAddressClass
AddressClass GetAddressClass() const
Definition: Address.cpp:1026
lldb_private::DataExtractor
Definition: DataExtractor.h:48
lldb_private::ConstString::GetStringRef
llvm::StringRef GetStringRef() const
Get the string value as a llvm::StringRef.
Definition: ConstString.h:202
lldb_private::Disassembler::eHexStyleAsm
@ eHexStyleAsm
Definition: Disassembler.h:400
Log.h
x86::InstructionOpcodeAndModrm
These are the three values deciding instruction control flow kind.
Definition: DisassemblerLLVMC.cpp:105
DisassemblerLLVMC::DecodeInstructions
size_t DecodeInstructions(const lldb_private::Address &base_addr, const lldb_private::DataExtractor &data, lldb::offset_t data_offset, size_t num_instructions, bool append, bool data_from_file) override
Definition: DisassemblerLLVMC.cpp:1586
DisassemblerLLVMC::m_alternate_disasm_up
std::unique_ptr< MCDisasmInstance > m_alternate_disasm_up
Definition: DisassemblerLLVMC.h:87
InstructionLLVMC::VisitInstruction
void VisitInstruction()
Definition: DisassemblerLLVMC.cpp:1172
DisassemblerLLVMC::MCDisasmInstance::IsLoad
bool IsLoad(llvm::MCInst &mc_inst) const
Definition: DisassemblerLLVMC.cpp:1380
InstructionLLVMC::IsLoad
bool IsLoad() override
Definition: DisassemblerLLVMC.cpp:427
DisassemblerLLVMC
Definition: DisassemblerLLVMC.h:23
lldb::eInstructionControlFlowKindCondJump
@ eInstructionControlFlowKindCondJump
The instruction is a near conditional jump.
Definition: lldb-enumerations.h:988
lldb_private::SymbolContext::function
Function * function
The Function for a given query.
Definition: SymbolContext.h:320
lldb::InstructionControlFlowKind
InstructionControlFlowKind
Architecture-agnostic categorization of instructions for traversing the control flow of a trace.
Definition: lldb-enumerations.h:975
x86::InstructionLengthDecode
llvm::Optional< InstructionOpcodeAndModrm > InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len, bool is_exec_mode_64b)
Decode an instruction into opcode, modrm and opcode_len.
Definition: DisassemblerLLVMC.cpp:257
lldb_private::DataExtractor::GetDataStart
const uint8_t * GetDataStart() const
Get the data start pointer.
Definition: DataExtractor.h:422
lldb_private::Instruction::GetAddress
const Address & GetAddress() const
Definition: Disassembler.h:65
lldb_private::DataExtractor::GetByteOrder
lldb::ByteOrder GetByteOrder() const
Get the current byte order value.
Definition: DataExtractor.h:594
lldb_private::ConstString
Definition: ConstString.h:40
DisassemblerLLVMC::m_adrp_insn
llvm::Optional< uint32_t > m_adrp_insn
Definition: DisassemblerLLVMC.h:80
DisassemblerLLVMC.h
lldb_private::StreamString
Definition: StreamString.h:23
lldb_private::DataExtractor::GetU8
uint8_t GetU8(lldb::offset_t *offset_ptr) const
Extract a uint8_t value from *offset_ptr.
Definition: DataExtractor.cpp:316
lldb_private::AddressRange
Definition: AddressRange.h:25
lldb_private::Instruction::Operand
Definition: Disassembler.h:198
lldb_private::Disassembler::m_instruction_list
InstructionList m_instruction_list
Definition: Disassembler.h:545
string
string(SUBSTRING ${p} 10 -1 pStripped) if($
Definition: Plugins/CMakeLists.txt:40
lldb::eInstructionControlFlowKindFarJump
@ eInstructionControlFlowKindFarJump
The instruction is a jump-like far transfer.
Definition: lldb-enumerations.h:997
lldb_private::Address::GetSection
lldb::SectionSP GetSection() const
Get const accessor for the section.
Definition: Address.h:429
lldb_private::SectionLoadList::ResolveLoadAddress
bool ResolveLoadAddress(lldb::addr_t load_addr, Address &so_addr, bool allow_section_end=false) const
Definition: SectionLoadList.cpp:209
Address.h
InstructionLLVMC::m_disasm_wp
std::weak_ptr< DisassemblerLLVMC > m_disasm_wp
Definition: DisassemblerLLVMC.cpp:1154
DisassemblerLLVMC::MCDisasmInstance::IsAuthenticated
bool IsAuthenticated(llvm::MCInst &mc_inst) const
Definition: DisassemblerLLVMC.cpp:1384
arm64_dwarf::x3
@ x3
Definition: ARM64_DWARF_Registers.h:20
x86::InstructionOpcodeAndModrm::opcode_len
uint8_t opcode_len
Definition: DisassemblerLLVMC.cpp:107
DisassemblerLLVMC::Initialize
static void Initialize()
Definition: DisassemblerLLVMC.cpp:1631
InstructionLLVMC::HasDelaySlot
bool HasDelaySlot() override
Definition: DisassemblerLLVMC.cpp:422
InstructionLLVMC::IsCall
bool IsCall() override
Definition: DisassemblerLLVMC.cpp:1148
InstructionLLVMC::DoesBranch
bool DoesBranch() override
Definition: DisassemblerLLVMC.cpp:417
DisassemblerLLVMC::MCDisasmInstance::m_disasm_up
std::unique_ptr< llvm::MCDisassembler > m_disasm_up
Definition: DisassemblerLLVMC.cpp:84
lldb_private::SectionLoadList::IsEmpty
bool IsEmpty() const
Definition: SectionLoadList.cpp:38
lldb::eInstructionControlFlowKindOther
@ eInstructionControlFlowKindOther
The instruction is something not listed below, i.e.
Definition: lldb-enumerations.h:980
lldb_private::Target::GetArchitecture
const ArchSpec & GetArchitecture() const
Definition: Target.h:984
DisassemblerLLVMC::CreateInstance
static lldb_private::Disassembler * CreateInstance(const lldb_private::ArchSpec &arch, const char *flavor)
Definition: DisassemblerLLVMC.cpp:1574
DisassemblerLLVMC::SymbolLookupCallback
static const char * SymbolLookupCallback(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
Definition: DisassemblerLLVMC.cpp:1654
lldb_private::Status
Definition: Status.h:44
DisassemblerLLVMC::MCDisasmInstance::Create
static std::unique_ptr< MCDisasmInstance > Create(const char *triple, const char *cpu, const char *features_str, unsigned flavor, DisassemblerLLVMC &owner)
Definition: DisassemblerLLVMC.cpp:1224
InstructionLLVMC::ParseImmediate
static std::pair< Operand, llvm::StringRef::const_iterator > ParseImmediate(llvm::StringRef::const_iterator osi, llvm::StringRef::const_iterator ose)
Definition: DisassemblerLLVMC.cpp:775
lldb::eInstructionControlFlowKindJump
@ eInstructionControlFlowKindJump
The instruction is a near unconditional jump.
Definition: lldb-enumerations.h:986
lldb_private::Address::Dump
bool Dump(Stream *s, ExecutionContextScope *exe_scope, DumpStyle style, DumpStyle fallback_style=DumpStyleInvalid, uint32_t addr_byte_size=UINT32_MAX, bool all_ranges=false) const
Dump a description of this object to a Stream.
Definition: Address.cpp:406
DisassemblerLLVMC::OpInfoCallback
static int OpInfoCallback(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t Size, int TagType, void *TagBug)
Definition: DisassemblerLLVMC.cpp:1647
uint32_t
DisassemblerLLVMC::m_exe_ctx
const lldb_private::ExecutionContext * m_exe_ctx
Definition: DisassemblerLLVMC.h:71
lldb_private::ArchSpec::IsAlwaysThumbInstructions
bool IsAlwaysThumbInstructions() const
Detect whether this architecture uses thumb code exclusively.
Definition: ArchSpec.cpp:1433
lldb_private::Address
Definition: Address.h:59
lldb::eInstructionControlFlowKindFarReturn
@ eInstructionControlFlowKindFarReturn
The instruction is a return-like far transfer.
Definition: lldb-enumerations.h:994
InstructionLLVMC::DisassemblerScope::DisassemblerScope
DisassemblerScope(InstructionLLVMC &i, const lldb_private::ExecutionContext *exe_ctx=nullptr)
Definition: DisassemblerLLVMC.cpp:686
lldb_private::TargetProperties::GetHexImmediateStyle
Disassembler::HexImmediateStyle GetHexImmediateStyle() const
Definition: Target.cpp:4519
lldb_private::Disassembler::HexImmediateStyle
HexImmediateStyle
Definition: Disassembler.h:398
x86
Definition: DisassemblerLLVMC.cpp:88
lldb_private::Opcode::GetByteSize
uint32_t GetByteSize() const
Definition: Opcode.h:202
InstructionLLVMC::InstructionLLVMC
InstructionLLVMC(DisassemblerLLVMC &disasm, const lldb_private::Address &address, AddressClass addr_class)
Definition: DisassemblerLLVMC.cpp:408
InstructionLLVMC::ParseARMDerefAccess
static std::pair< Operand, llvm::StringRef::const_iterator > ParseARMDerefAccess(llvm::StringRef::const_iterator osi, llvm::StringRef::const_iterator ose)
Definition: DisassemblerLLVMC.cpp:1008
DisassemblerLLVMC::InstructionLLVMC
friend class InstructionLLVMC
Definition: DisassemblerLLVMC.h:49
uint16_t
DisassemblerLLVMC::MCDisasmInstance
Definition: DisassemblerLLVMC.cpp:51
lldb_private::DataExtractor::GetU32
uint32_t GetU32(lldb::offset_t *offset_ptr) const
Extract a uint32_t value from *offset_ptr.
Definition: DataExtractor.cpp:425
lldb_private::Target::GetSectionLoadList
SectionLoadList & GetSectionLoadList()
Definition: Target.h:1092
LLDB_INVALID_ADDRESS
#define LLDB_INVALID_ADDRESS
Definition: lldb-defines.h:74
lldb_private::Instruction::Operand::m_type
enum lldb_private::Instruction::Operand::Type m_type
lldb_private::TargetProperties::GetUseHexImmediates
bool GetUseHexImmediates() const
Definition: Target.cpp:4488
DataExtractor.h
lldb_private::Stream::Printf
size_t Printf(const char *format,...) __attribute__((format(printf
Output printf formatted output to the stream.
Definition: Stream.cpp:107
lldb_private::InstructionList::Clear
void Clear()
Definition: Disassembler.cpp:1021
DisassemblerLLVMC::MCDisasmInstance::CanBranch
bool CanBranch(llvm::MCInst &mc_inst) const
Definition: DisassemblerLLVMC.cpp:1365
SymbolContext.h
DisassemblerLLVMC::m_data_from_file
bool m_data_from_file
Definition: DisassemblerLLVMC.h:74
lldb_private::DataExtractor::BytesLeft
lldb::offset_t BytesLeft(lldb::offset_t offset) const
Definition: DataExtractor.h:976
lldb_private::Disassembler
Definition: Disassembler.h:384
lldb::eInstructionControlFlowKindUnknown
@ eInstructionControlFlowKindUnknown
The instruction could not be classified.
Definition: lldb-enumerations.h:977
lldb_private
A class that represents a running process on the host machine.
Definition: SBCommandInterpreterRunOptions.h:16
lldb_private::DataExtractor::GetU16
uint16_t GetU16(lldb::offset_t *offset_ptr) const
Extract a uint16_t value from *offset_ptr.
Definition: DataExtractor.cpp:347
DisassemblerLLVMC::Terminate
static void Terminate()
Definition: DisassemblerLLVMC.cpp:1643
lldb_private::RegularExpression::Execute
bool Execute(llvm::StringRef string, llvm::SmallVectorImpl< llvm::StringRef > *matches=nullptr) const
Execute a regular expression match using the compiled regular expression that is already in this obje...
Definition: RegularExpression.cpp:23
lldb_private::Address::GetModule
lldb::ModuleSP GetModule() const
Get accessor for the module for this address.
Definition: Address.cpp:283
InstructionLLVMC::ParseIntelIndexedAccess
static std::pair< Operand, llvm::StringRef::const_iterator > ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi, llvm::StringRef::const_iterator ose)
Definition: DisassemblerLLVMC.cpp:833
lldb_private::InstructionList::Append
void Append(lldb::InstructionSP &inst_sp)
Definition: Disassembler.cpp:1023
Stream.h
LLDB_PLUGIN_DEFINE
#define LLDB_PLUGIN_DEFINE(PluginName)
Definition: PluginManager.h:31
lldb_private::Log
Definition: Log.h:115
lldb_private::Stream::PutCString
size_t PutCString(llvm::StringRef cstr)
Output a C string to the stream.
Definition: Stream.cpp:63
x86::MapOpcodeIntoControlFlowKind
lldb::InstructionControlFlowKind MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm)
Determine the InstructionControlFlowKind based on opcode and modrm bytes.
Definition: DisassemblerLLVMC.cpp:124
llvm::SmallVectorImpl
Definition: Disassembler.h:42
InstructionLLVMC::GetDisasmToUse
DisassemblerLLVMC::MCDisasmInstance * GetDisasmToUse(bool &is_alternate_isa, DisassemblerScope &disasm)
Definition: DisassemblerLLVMC.cpp:1206
DisassemblerLLVMC::MCDisasmInstance::IsCall
bool IsCall(llvm::MCInst &mc_inst) const
Definition: DisassemblerLLVMC.cpp:1376
lldb_private::GetLog
Log * GetLog(Cat mask)
Retrieve the Log object for the channel associated with the given log enum.
Definition: Log.h:308
lldb_private::AddressRange::ContainsLoadAddress
bool ContainsLoadAddress(const Address &so_addr, Target *target) const
Check if a section offset so_addr when represented as a load address is contained within this object'...
Definition: AddressRange.cpp:93
lldb_private::DataExtractor::GetByteSize
uint64_t GetByteSize() const
Get the number of bytes contained in this object.
Definition: DataExtractor.h:270
lldb_private::Instruction::Operand::IsValid
bool IsValid()
Definition: Disassembler.h:213
lldb_private::Address::Slide
bool Slide(int64_t offset)
Definition: Address.h:449
lldb_private::DataExtractor::ValidOffsetForDataOfSize
bool ValidOffsetForDataOfSize(lldb::offset_t offset, lldb::offset_t length) const
Test the availability of length bytes of data from offset.
Definition: DataExtractor.h:965
lldb
Definition: SBAddress.h:15
InstructionLLVMC::DisassemblerScope::operator->
std::shared_ptr< DisassemblerLLVMC > operator->()
Definition: DisassemblerLLVMC.cpp:699
DisassemblerLLVMC::MCDisasmInstance::SetStyle
void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style)
Definition: DisassemblerLLVMC.cpp:1352
InstructionLLVMC::DisassemblerScope::m_disasm
std::shared_ptr< DisassemblerLLVMC > m_disasm
Definition: DisassemblerLLVMC.cpp:683
RegisterContext.h
InstructionLLVMC::ConsumeWhitespace
static llvm::StringRef::const_iterator ConsumeWhitespace(llvm::StringRef::const_iterator osi, llvm::StringRef::const_iterator ose)
Definition: DisassemblerLLVMC.cpp:703
DisassemblerLLVMC::MCDisasmInstance::m_context_up
std::unique_ptr< llvm::MCContext > m_context_up
Definition: DisassemblerLLVMC.cpp:83
LLDBLog.h
InstructionLLVMC::CalculateMnemonicOperandsAndComment
void CalculateMnemonicOperandsAndComment(const lldb_private::ExecutionContext *exe_ctx) override
Definition: DisassemblerLLVMC.cpp:557
InstructionLLVMC::GetByteSize
size_t GetByteSize() const
Definition: DisassemblerLLVMC.cpp:678
lldb_private::ExecutionContext::GetTargetPtr
Target * GetTargetPtr() const
Returns a pointer to the target object.
Definition: ExecutionContext.cpp:198
ExecutionContext.h
InstructionLLVMC::AppendComment
void AppendComment(std::string &description)
Definition: DisassemblerLLVMC.cpp:535
DisassemblerLLVMC::MCDisasmInstance::PrintMCInst
void PrintMCInst(llvm::MCInst &mc_inst, std::string &inst_string, std::string &comments_string)
Definition: DisassemblerLLVMC.cpp:1329
DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot
bool HasDelaySlot(llvm::MCInst &mc_inst) const
Definition: DisassemblerLLVMC.cpp:1371
InstructionLLVMC::GetControlFlowKind
lldb::InstructionControlFlowKind GetControlFlowKind(const lldb_private::ExecutionContext *exe_ctx) override
Definition: DisassemblerLLVMC.cpp:545
lldb::ByteOrder
ByteOrder
Byte ordering definitions.
Definition: lldb-enumerations.h:138
DisassemblerLLVMC::MCDisasmInstance::GetMCInst
uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc, llvm::MCInst &mc_inst) const
Definition: DisassemblerLLVMC.cpp:1314