LLDB  mainline
DisassemblerLLVMC.cpp
Go to the documentation of this file.
1 //===-- DisassemblerLLVMC.cpp -----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "DisassemblerLLVMC.h"
10 
11 #include "llvm-c/Disassembler.h"
12 #include "llvm/ADT/SmallString.h"
13 #include "llvm/MC/MCAsmInfo.h"
14 #include "llvm/MC/MCContext.h"
15 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
16 #include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h"
17 #include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCInstPrinter.h"
20 #include "llvm/MC/MCInstrInfo.h"
21 #include "llvm/MC/MCRegisterInfo.h"
22 #include "llvm/MC/MCSubtargetInfo.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/ScopedPrinter.h"
25 #include "llvm/Support/TargetRegistry.h"
26 #include "llvm/Support/TargetSelect.h"
27 
28 #include "lldb/Core/Address.h"
29 #include "lldb/Core/Module.h"
32 #include "lldb/Target/Process.h"
35 #include "lldb/Target/StackFrame.h"
36 #include "lldb/Target/Target.h"
38 #include "lldb/Utility/Log.h"
40 #include "lldb/Utility/Stream.h"
41 
42 using namespace lldb;
43 using namespace lldb_private;
44 
46 public:
47  static std::unique_ptr<MCDisasmInstance>
48  Create(const char *triple, const char *cpu, const char *features_str,
49  unsigned flavor, DisassemblerLLVMC &owner);
50 
51  ~MCDisasmInstance() = default;
52 
53  uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len,
54  lldb::addr_t pc, llvm::MCInst &mc_inst) const;
55  void PrintMCInst(llvm::MCInst &mc_inst, std::string &inst_string,
56  std::string &comments_string);
57  void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style);
58  bool CanBranch(llvm::MCInst &mc_inst) const;
59  bool HasDelaySlot(llvm::MCInst &mc_inst) const;
60  bool IsCall(llvm::MCInst &mc_inst) const;
61 
62 private:
63  MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,
64  std::unique_ptr<llvm::MCRegisterInfo> &&reg_info_up,
65  std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up,
66  std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up,
67  std::unique_ptr<llvm::MCContext> &&context_up,
68  std::unique_ptr<llvm::MCDisassembler> &&disasm_up,
69  std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up);
70 
71  std::unique_ptr<llvm::MCInstrInfo> m_instr_info_up;
72  std::unique_ptr<llvm::MCRegisterInfo> m_reg_info_up;
73  std::unique_ptr<llvm::MCSubtargetInfo> m_subtarget_info_up;
74  std::unique_ptr<llvm::MCAsmInfo> m_asm_info_up;
75  std::unique_ptr<llvm::MCContext> m_context_up;
76  std::unique_ptr<llvm::MCDisassembler> m_disasm_up;
77  std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up;
78 };
79 
81 public:
83  const lldb_private::Address &address,
84  AddressClass addr_class)
85  : Instruction(address, addr_class),
87  disasm.shared_from_this())),
88  m_does_branch(eLazyBoolCalculate), m_has_delay_slot(eLazyBoolCalculate),
89  m_is_call(eLazyBoolCalculate), m_is_valid(false),
90  m_using_file_addr(false) {}
91 
92  ~InstructionLLVMC() override = default;
93 
94  bool DoesBranch() override {
95  if (m_does_branch == eLazyBoolCalculate) {
96  DisassemblerScope disasm(*this);
97  if (disasm) {
98  DataExtractor data;
99  if (m_opcode.GetData(data)) {
100  bool is_alternate_isa;
101  lldb::addr_t pc = m_address.GetFileAddress();
102 
103  DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
104  GetDisasmToUse(is_alternate_isa, disasm);
105  const uint8_t *opcode_data = data.GetDataStart();
106  const size_t opcode_data_len = data.GetByteSize();
107  llvm::MCInst inst;
108  const size_t inst_size =
109  mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
110  // Be conservative, if we didn't understand the instruction, say it
111  // might branch...
112  if (inst_size == 0)
113  m_does_branch = eLazyBoolYes;
114  else {
115  const bool can_branch = mc_disasm_ptr->CanBranch(inst);
116  if (can_branch)
117  m_does_branch = eLazyBoolYes;
118  else
119  m_does_branch = eLazyBoolNo;
120  }
121  }
122  }
123  }
124  return m_does_branch == eLazyBoolYes;
125  }
126 
127  bool HasDelaySlot() override {
128  if (m_has_delay_slot == eLazyBoolCalculate) {
129  DisassemblerScope disasm(*this);
130  if (disasm) {
131  DataExtractor data;
132  if (m_opcode.GetData(data)) {
133  bool is_alternate_isa;
134  lldb::addr_t pc = m_address.GetFileAddress();
135 
136  DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
137  GetDisasmToUse(is_alternate_isa, disasm);
138  const uint8_t *opcode_data = data.GetDataStart();
139  const size_t opcode_data_len = data.GetByteSize();
140  llvm::MCInst inst;
141  const size_t inst_size =
142  mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
143  // if we didn't understand the instruction, say it doesn't have a
144  // delay slot...
145  if (inst_size == 0)
146  m_has_delay_slot = eLazyBoolNo;
147  else {
148  const bool has_delay_slot = mc_disasm_ptr->HasDelaySlot(inst);
149  if (has_delay_slot)
150  m_has_delay_slot = eLazyBoolYes;
151  else
152  m_has_delay_slot = eLazyBoolNo;
153  }
154  }
155  }
156  }
157  return m_has_delay_slot == eLazyBoolYes;
158  }
159 
161  DisassemblerScope disasm(*this);
162  return GetDisasmToUse(is_alternate_isa, disasm);
163  }
164 
165  size_t Decode(const lldb_private::Disassembler &disassembler,
166  const lldb_private::DataExtractor &data,
167  lldb::offset_t data_offset) override {
168  // All we have to do is read the opcode which can be easy for some
169  // architectures
170  bool got_op = false;
171  DisassemblerScope disasm(*this);
172  if (disasm) {
173  const ArchSpec &arch = disasm->GetArchitecture();
174  const lldb::ByteOrder byte_order = data.GetByteOrder();
175 
176  const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize();
177  const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize();
178  if (min_op_byte_size == max_op_byte_size) {
179  // Fixed size instructions, just read that amount of data.
180  if (!data.ValidOffsetForDataOfSize(data_offset, min_op_byte_size))
181  return false;
182 
183  switch (min_op_byte_size) {
184  case 1:
185  m_opcode.SetOpcode8(data.GetU8(&data_offset), byte_order);
186  got_op = true;
187  break;
188 
189  case 2:
190  m_opcode.SetOpcode16(data.GetU16(&data_offset), byte_order);
191  got_op = true;
192  break;
193 
194  case 4:
195  m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order);
196  got_op = true;
197  break;
198 
199  case 8:
200  m_opcode.SetOpcode64(data.GetU64(&data_offset), byte_order);
201  got_op = true;
202  break;
203 
204  default:
205  m_opcode.SetOpcodeBytes(data.PeekData(data_offset, min_op_byte_size),
206  min_op_byte_size);
207  got_op = true;
208  break;
209  }
210  }
211  if (!got_op) {
212  bool is_alternate_isa = false;
213  DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
214  GetDisasmToUse(is_alternate_isa, disasm);
215 
216  const llvm::Triple::ArchType machine = arch.GetMachine();
217  if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb) {
218  if (machine == llvm::Triple::thumb || is_alternate_isa) {
219  uint32_t thumb_opcode = data.GetU16(&data_offset);
220  if ((thumb_opcode & 0xe000) != 0xe000 ||
221  ((thumb_opcode & 0x1800u) == 0)) {
222  m_opcode.SetOpcode16(thumb_opcode, byte_order);
223  m_is_valid = true;
224  } else {
225  thumb_opcode <<= 16;
226  thumb_opcode |= data.GetU16(&data_offset);
227  m_opcode.SetOpcode16_2(thumb_opcode, byte_order);
228  m_is_valid = true;
229  }
230  } else {
231  m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order);
232  m_is_valid = true;
233  }
234  } else {
235  // The opcode isn't evenly sized, so we need to actually use the llvm
236  // disassembler to parse it and get the size.
237  uint8_t *opcode_data =
238  const_cast<uint8_t *>(data.PeekData(data_offset, 1));
239  const size_t opcode_data_len = data.BytesLeft(data_offset);
240  const addr_t pc = m_address.GetFileAddress();
241  llvm::MCInst inst;
242 
243  const size_t inst_size =
244  mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
245  if (inst_size == 0)
246  m_opcode.Clear();
247  else {
248  m_opcode.SetOpcodeBytes(opcode_data, inst_size);
249  m_is_valid = true;
250  }
251  }
252  }
253  return m_opcode.GetByteSize();
254  }
255  return 0;
256  }
257 
258  void AppendComment(std::string &description) {
259  if (m_comment.empty())
260  m_comment.swap(description);
261  else {
262  m_comment.append(", ");
263  m_comment.append(description);
264  }
265  }
266 
268  const lldb_private::ExecutionContext *exe_ctx) override {
269  DataExtractor data;
270  const AddressClass address_class = GetAddressClass();
271 
272  if (m_opcode.GetData(data)) {
273  std::string out_string;
274  std::string comment_string;
275 
276  DisassemblerScope disasm(*this, exe_ctx);
277  if (disasm) {
279 
280  if (address_class == AddressClass::eCodeAlternateISA)
281  mc_disasm_ptr = disasm->m_alternate_disasm_up.get();
282  else
283  mc_disasm_ptr = disasm->m_disasm_up.get();
284 
285  lldb::addr_t pc = m_address.GetFileAddress();
286  m_using_file_addr = true;
287 
288  const bool data_from_file = disasm->m_data_from_file;
289  bool use_hex_immediates = true;
290  Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC;
291 
292  if (exe_ctx) {
293  Target *target = exe_ctx->GetTargetPtr();
294  if (target) {
295  use_hex_immediates = target->GetUseHexImmediates();
296  hex_style = target->GetHexImmediateStyle();
297 
298  if (!data_from_file) {
299  const lldb::addr_t load_addr = m_address.GetLoadAddress(target);
300  if (load_addr != LLDB_INVALID_ADDRESS) {
301  pc = load_addr;
302  m_using_file_addr = false;
303  }
304  }
305  }
306  }
307 
308  const uint8_t *opcode_data = data.GetDataStart();
309  const size_t opcode_data_len = data.GetByteSize();
310  llvm::MCInst inst;
311  size_t inst_size =
312  mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
313 
314  if (inst_size > 0) {
315  mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style);
316  mc_disasm_ptr->PrintMCInst(inst, out_string, comment_string);
317 
318  if (!comment_string.empty()) {
319  AppendComment(comment_string);
320  }
321  }
322 
323  if (inst_size == 0) {
324  m_comment.assign("unknown opcode");
325  inst_size = m_opcode.GetByteSize();
326  StreamString mnemonic_strm;
327  lldb::offset_t offset = 0;
328  lldb::ByteOrder byte_order = data.GetByteOrder();
329  switch (inst_size) {
330  case 1: {
331  const uint8_t uval8 = data.GetU8(&offset);
332  m_opcode.SetOpcode8(uval8, byte_order);
333  m_opcode_name.assign(".byte");
334  mnemonic_strm.Printf("0x%2.2x", uval8);
335  } break;
336  case 2: {
337  const uint16_t uval16 = data.GetU16(&offset);
338  m_opcode.SetOpcode16(uval16, byte_order);
339  m_opcode_name.assign(".short");
340  mnemonic_strm.Printf("0x%4.4x", uval16);
341  } break;
342  case 4: {
343  const uint32_t uval32 = data.GetU32(&offset);
344  m_opcode.SetOpcode32(uval32, byte_order);
345  m_opcode_name.assign(".long");
346  mnemonic_strm.Printf("0x%8.8x", uval32);
347  } break;
348  case 8: {
349  const uint64_t uval64 = data.GetU64(&offset);
350  m_opcode.SetOpcode64(uval64, byte_order);
351  m_opcode_name.assign(".quad");
352  mnemonic_strm.Printf("0x%16.16" PRIx64, uval64);
353  } break;
354  default:
355  if (inst_size == 0)
356  return;
357  else {
358  const uint8_t *bytes = data.PeekData(offset, inst_size);
359  if (bytes == NULL)
360  return;
361  m_opcode_name.assign(".byte");
362  m_opcode.SetOpcodeBytes(bytes, inst_size);
363  mnemonic_strm.Printf("0x%2.2x", bytes[0]);
364  for (uint32_t i = 1; i < inst_size; ++i)
365  mnemonic_strm.Printf(" 0x%2.2x", bytes[i]);
366  }
367  break;
368  }
369  m_mnemonics = mnemonic_strm.GetString();
370  return;
371  } else {
372  if (m_does_branch == eLazyBoolCalculate) {
373  const bool can_branch = mc_disasm_ptr->CanBranch(inst);
374  if (can_branch)
375  m_does_branch = eLazyBoolYes;
376  else
377  m_does_branch = eLazyBoolNo;
378  }
379  }
380 
381  static RegularExpression s_regex(
382  llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?"));
383 
384  RegularExpression::Match matches(3);
385 
386  if (s_regex.Execute(out_string, &matches)) {
387  matches.GetMatchAtIndex(out_string.c_str(), 1, m_opcode_name);
388  matches.GetMatchAtIndex(out_string.c_str(), 2, m_mnemonics);
389  }
390  }
391  }
392  }
393 
394  bool IsValid() const { return m_is_valid; }
395 
396  bool UsingFileAddress() const { return m_using_file_addr; }
397  size_t GetByteSize() const { return m_opcode.GetByteSize(); }
398 
399  /// Grants exclusive access to the disassembler and initializes it with the
400  /// given InstructionLLVMC and an optional ExecutionContext.
402  std::shared_ptr<DisassemblerLLVMC> m_disasm;
403 
404  public:
406  InstructionLLVMC &i,
407  const lldb_private::ExecutionContext *exe_ctx = nullptr)
408  : m_disasm(i.m_disasm_wp.lock()) {
409  m_disasm->m_mutex.lock();
410  m_disasm->m_inst = &i;
411  m_disasm->m_exe_ctx = exe_ctx;
412  }
413  ~DisassemblerScope() { m_disasm->m_mutex.unlock(); }
414 
415  /// Evaluates to true if this scope contains a valid disassembler.
416  operator bool() const { return static_cast<bool>(m_disasm); }
417 
418  std::shared_ptr<DisassemblerLLVMC> operator->() { return m_disasm; }
419  };
420 
421  static llvm::StringRef::const_iterator
422  ConsumeWhitespace(llvm::StringRef::const_iterator osi,
423  llvm::StringRef::const_iterator ose) {
424  while (osi != ose) {
425  switch (*osi) {
426  default:
427  return osi;
428  case ' ':
429  case '\t':
430  break;
431  }
432  ++osi;
433  }
434 
435  return osi;
436  }
437 
438  static std::pair<bool, llvm::StringRef::const_iterator>
439  ConsumeChar(llvm::StringRef::const_iterator osi, const char c,
440  llvm::StringRef::const_iterator ose) {
441  bool found = false;
442 
443  osi = ConsumeWhitespace(osi, ose);
444  if (osi != ose && *osi == c) {
445  found = true;
446  ++osi;
447  }
448 
449  return std::make_pair(found, osi);
450  }
451 
452  static std::pair<Operand, llvm::StringRef::const_iterator>
453  ParseRegisterName(llvm::StringRef::const_iterator osi,
454  llvm::StringRef::const_iterator ose) {
455  Operand ret;
456  ret.m_type = Operand::Type::Register;
457  std::string str;
458 
459  osi = ConsumeWhitespace(osi, ose);
460 
461  while (osi != ose) {
462  if (*osi >= '0' && *osi <= '9') {
463  if (str.empty()) {
464  return std::make_pair(Operand(), osi);
465  } else {
466  str.push_back(*osi);
467  }
468  } else if (*osi >= 'a' && *osi <= 'z') {
469  str.push_back(*osi);
470  } else {
471  switch (*osi) {
472  default:
473  if (str.empty()) {
474  return std::make_pair(Operand(), osi);
475  } else {
476  ret.m_register = ConstString(str);
477  return std::make_pair(ret, osi);
478  }
479  case '%':
480  if (!str.empty()) {
481  return std::make_pair(Operand(), osi);
482  }
483  break;
484  }
485  }
486  ++osi;
487  }
488 
489  ret.m_register = ConstString(str);
490  return std::make_pair(ret, osi);
491  }
492 
493  static std::pair<Operand, llvm::StringRef::const_iterator>
494  ParseImmediate(llvm::StringRef::const_iterator osi,
495  llvm::StringRef::const_iterator ose) {
496  Operand ret;
497  ret.m_type = Operand::Type::Immediate;
498  std::string str;
499  bool is_hex = false;
500 
501  osi = ConsumeWhitespace(osi, ose);
502 
503  while (osi != ose) {
504  if (*osi >= '0' && *osi <= '9') {
505  str.push_back(*osi);
506  } else if (*osi >= 'a' && *osi <= 'f') {
507  if (is_hex) {
508  str.push_back(*osi);
509  } else {
510  return std::make_pair(Operand(), osi);
511  }
512  } else {
513  switch (*osi) {
514  default:
515  if (str.empty()) {
516  return std::make_pair(Operand(), osi);
517  } else {
518  ret.m_immediate = strtoull(str.c_str(), nullptr, 0);
519  return std::make_pair(ret, osi);
520  }
521  case 'x':
522  if (!str.compare("0")) {
523  is_hex = true;
524  str.push_back(*osi);
525  } else {
526  return std::make_pair(Operand(), osi);
527  }
528  break;
529  case '#':
530  case '$':
531  if (!str.empty()) {
532  return std::make_pair(Operand(), osi);
533  }
534  break;
535  case '-':
536  if (str.empty()) {
537  ret.m_negative = true;
538  } else {
539  return std::make_pair(Operand(), osi);
540  }
541  }
542  }
543  ++osi;
544  }
545 
546  ret.m_immediate = strtoull(str.c_str(), nullptr, 0);
547  return std::make_pair(ret, osi);
548  }
549 
550  // -0x5(%rax,%rax,2)
551  static std::pair<Operand, llvm::StringRef::const_iterator>
552  ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi,
553  llvm::StringRef::const_iterator ose) {
554  std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
555  ParseImmediate(osi, ose);
556  if (offset_and_iterator.first.IsValid()) {
557  osi = offset_and_iterator.second;
558  }
559 
560  bool found = false;
561  std::tie(found, osi) = ConsumeChar(osi, '(', ose);
562  if (!found) {
563  return std::make_pair(Operand(), osi);
564  }
565 
566  std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
567  ParseRegisterName(osi, ose);
568  if (base_and_iterator.first.IsValid()) {
569  osi = base_and_iterator.second;
570  } else {
571  return std::make_pair(Operand(), osi);
572  }
573 
574  std::tie(found, osi) = ConsumeChar(osi, ',', ose);
575  if (!found) {
576  return std::make_pair(Operand(), osi);
577  }
578 
579  std::pair<Operand, llvm::StringRef::const_iterator> index_and_iterator =
580  ParseRegisterName(osi, ose);
581  if (index_and_iterator.first.IsValid()) {
582  osi = index_and_iterator.second;
583  } else {
584  return std::make_pair(Operand(), osi);
585  }
586 
587  std::tie(found, osi) = ConsumeChar(osi, ',', ose);
588  if (!found) {
589  return std::make_pair(Operand(), osi);
590  }
591 
592  std::pair<Operand, llvm::StringRef::const_iterator>
593  multiplier_and_iterator = ParseImmediate(osi, ose);
594  if (index_and_iterator.first.IsValid()) {
595  osi = index_and_iterator.second;
596  } else {
597  return std::make_pair(Operand(), osi);
598  }
599 
600  std::tie(found, osi) = ConsumeChar(osi, ')', ose);
601  if (!found) {
602  return std::make_pair(Operand(), osi);
603  }
604 
605  Operand product;
606  product.m_type = Operand::Type::Product;
607  product.m_children.push_back(index_and_iterator.first);
608  product.m_children.push_back(multiplier_and_iterator.first);
609 
610  Operand index;
611  index.m_type = Operand::Type::Sum;
612  index.m_children.push_back(base_and_iterator.first);
613  index.m_children.push_back(product);
614 
615  if (offset_and_iterator.first.IsValid()) {
616  Operand offset;
617  offset.m_type = Operand::Type::Sum;
618  offset.m_children.push_back(offset_and_iterator.first);
619  offset.m_children.push_back(index);
620 
621  Operand deref;
622  deref.m_type = Operand::Type::Dereference;
623  deref.m_children.push_back(offset);
624  return std::make_pair(deref, osi);
625  } else {
626  Operand deref;
627  deref.m_type = Operand::Type::Dereference;
628  deref.m_children.push_back(index);
629  return std::make_pair(deref, osi);
630  }
631  }
632 
633  // -0x10(%rbp)
634  static std::pair<Operand, llvm::StringRef::const_iterator>
635  ParseIntelDerefAccess(llvm::StringRef::const_iterator osi,
636  llvm::StringRef::const_iterator ose) {
637  std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
638  ParseImmediate(osi, ose);
639  if (offset_and_iterator.first.IsValid()) {
640  osi = offset_and_iterator.second;
641  }
642 
643  bool found = false;
644  std::tie(found, osi) = ConsumeChar(osi, '(', ose);
645  if (!found) {
646  return std::make_pair(Operand(), osi);
647  }
648 
649  std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
650  ParseRegisterName(osi, ose);
651  if (base_and_iterator.first.IsValid()) {
652  osi = base_and_iterator.second;
653  } else {
654  return std::make_pair(Operand(), osi);
655  }
656 
657  std::tie(found, osi) = ConsumeChar(osi, ')', ose);
658  if (!found) {
659  return std::make_pair(Operand(), osi);
660  }
661 
662  if (offset_and_iterator.first.IsValid()) {
663  Operand offset;
664  offset.m_type = Operand::Type::Sum;
665  offset.m_children.push_back(offset_and_iterator.first);
666  offset.m_children.push_back(base_and_iterator.first);
667 
668  Operand deref;
669  deref.m_type = Operand::Type::Dereference;
670  deref.m_children.push_back(offset);
671  return std::make_pair(deref, osi);
672  } else {
673  Operand deref;
674  deref.m_type = Operand::Type::Dereference;
675  deref.m_children.push_back(base_and_iterator.first);
676  return std::make_pair(deref, osi);
677  }
678  }
679 
680  // [sp, #8]!
681  static std::pair<Operand, llvm::StringRef::const_iterator>
682  ParseARMOffsetAccess(llvm::StringRef::const_iterator osi,
683  llvm::StringRef::const_iterator ose) {
684  bool found = false;
685  std::tie(found, osi) = ConsumeChar(osi, '[', ose);
686  if (!found) {
687  return std::make_pair(Operand(), osi);
688  }
689 
690  std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
691  ParseRegisterName(osi, ose);
692  if (base_and_iterator.first.IsValid()) {
693  osi = base_and_iterator.second;
694  } else {
695  return std::make_pair(Operand(), osi);
696  }
697 
698  std::tie(found, osi) = ConsumeChar(osi, ',', ose);
699  if (!found) {
700  return std::make_pair(Operand(), osi);
701  }
702 
703  std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
704  ParseImmediate(osi, ose);
705  if (offset_and_iterator.first.IsValid()) {
706  osi = offset_and_iterator.second;
707  }
708 
709  std::tie(found, osi) = ConsumeChar(osi, ']', ose);
710  if (!found) {
711  return std::make_pair(Operand(), osi);
712  }
713 
714  Operand offset;
715  offset.m_type = Operand::Type::Sum;
716  offset.m_children.push_back(offset_and_iterator.first);
717  offset.m_children.push_back(base_and_iterator.first);
718 
719  Operand deref;
720  deref.m_type = Operand::Type::Dereference;
721  deref.m_children.push_back(offset);
722  return std::make_pair(deref, osi);
723  }
724 
725  // [sp]
726  static std::pair<Operand, llvm::StringRef::const_iterator>
727  ParseARMDerefAccess(llvm::StringRef::const_iterator osi,
728  llvm::StringRef::const_iterator ose) {
729  bool found = false;
730  std::tie(found, osi) = ConsumeChar(osi, '[', ose);
731  if (!found) {
732  return std::make_pair(Operand(), osi);
733  }
734 
735  std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
736  ParseRegisterName(osi, ose);
737  if (base_and_iterator.first.IsValid()) {
738  osi = base_and_iterator.second;
739  } else {
740  return std::make_pair(Operand(), osi);
741  }
742 
743  std::tie(found, osi) = ConsumeChar(osi, ']', ose);
744  if (!found) {
745  return std::make_pair(Operand(), osi);
746  }
747 
748  Operand deref;
749  deref.m_type = Operand::Type::Dereference;
750  deref.m_children.push_back(base_and_iterator.first);
751  return std::make_pair(deref, osi);
752  }
753 
754  static void DumpOperand(const Operand &op, Stream &s) {
755  switch (op.m_type) {
756  case Operand::Type::Dereference:
757  s.PutCString("*");
758  DumpOperand(op.m_children[0], s);
759  break;
760  case Operand::Type::Immediate:
761  if (op.m_negative) {
762  s.PutCString("-");
763  }
764  s.PutCString(llvm::to_string(op.m_immediate));
765  break;
766  case Operand::Type::Invalid:
767  s.PutCString("Invalid");
768  break;
769  case Operand::Type::Product:
770  s.PutCString("(");
771  DumpOperand(op.m_children[0], s);
772  s.PutCString("*");
773  DumpOperand(op.m_children[1], s);
774  s.PutCString(")");
775  break;
776  case Operand::Type::Register:
778  break;
779  case Operand::Type::Sum:
780  s.PutCString("(");
781  DumpOperand(op.m_children[0], s);
782  s.PutCString("+");
783  DumpOperand(op.m_children[1], s);
784  s.PutCString(")");
785  break;
786  }
787  }
788 
790  llvm::SmallVectorImpl<Instruction::Operand> &operands) override {
791  const char *operands_string = GetOperands(nullptr);
792 
793  if (!operands_string) {
794  return false;
795  }
796 
797  llvm::StringRef operands_ref(operands_string);
798 
799  llvm::StringRef::const_iterator osi = operands_ref.begin();
800  llvm::StringRef::const_iterator ose = operands_ref.end();
801 
802  while (osi != ose) {
803  Operand operand;
804  llvm::StringRef::const_iterator iter;
805 
806  if ((std::tie(operand, iter) = ParseIntelIndexedAccess(osi, ose),
807  operand.IsValid()) ||
808  (std::tie(operand, iter) = ParseIntelDerefAccess(osi, ose),
809  operand.IsValid()) ||
810  (std::tie(operand, iter) = ParseARMOffsetAccess(osi, ose),
811  operand.IsValid()) ||
812  (std::tie(operand, iter) = ParseARMDerefAccess(osi, ose),
813  operand.IsValid()) ||
814  (std::tie(operand, iter) = ParseRegisterName(osi, ose),
815  operand.IsValid()) ||
816  (std::tie(operand, iter) = ParseImmediate(osi, ose),
817  operand.IsValid())) {
818  osi = iter;
819  operands.push_back(operand);
820  } else {
821  return false;
822  }
823 
824  std::pair<bool, llvm::StringRef::const_iterator> found_and_iter =
825  ConsumeChar(osi, ',', ose);
826  if (found_and_iter.first) {
827  osi = found_and_iter.second;
828  }
829 
830  osi = ConsumeWhitespace(osi, ose);
831  }
832 
833  DisassemblerSP disasm_sp = m_disasm_wp.lock();
834 
835  if (disasm_sp && operands.size() > 1) {
836  // TODO tie this into the MC Disassembler's notion of clobbers.
837  switch (disasm_sp->GetArchitecture().GetMachine()) {
838  default:
839  break;
840  case llvm::Triple::x86:
841  case llvm::Triple::x86_64:
842  operands[operands.size() - 1].m_clobbered = true;
843  break;
844  case llvm::Triple::arm:
845  operands[0].m_clobbered = true;
846  break;
847  }
848  }
849 
850  if (Log *log =
852  StreamString ss;
853 
854  ss.Printf("[%s] expands to %zu operands:\n", operands_string,
855  operands.size());
856  for (const Operand &operand : operands) {
857  ss.PutCString(" ");
858  DumpOperand(operand, ss);
859  ss.PutCString("\n");
860  }
861 
862  log->PutString(ss.GetString());
863  }
864 
865  return true;
866  }
867 
868  bool IsCall() override {
869  if (m_is_call == eLazyBoolCalculate) {
870  DisassemblerScope disasm(*this);
871  if (disasm) {
872  DataExtractor data;
873  if (m_opcode.GetData(data)) {
874  bool is_alternate_isa;
875  lldb::addr_t pc = m_address.GetFileAddress();
876 
877  DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
878  GetDisasmToUse(is_alternate_isa, disasm);
879  const uint8_t *opcode_data = data.GetDataStart();
880  const size_t opcode_data_len = data.GetByteSize();
881  llvm::MCInst inst;
882  const size_t inst_size =
883  mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
884  if (inst_size == 0) {
885  m_is_call = eLazyBoolNo;
886  } else {
887  if (mc_disasm_ptr->IsCall(inst))
888  m_is_call = eLazyBoolYes;
889  else
890  m_is_call = eLazyBoolNo;
891  }
892  }
893  }
894  }
895  return m_is_call == eLazyBoolYes;
896  }
897 
898 protected:
899  std::weak_ptr<DisassemblerLLVMC> m_disasm_wp;
905 
906 private:
908  GetDisasmToUse(bool &is_alternate_isa, DisassemblerScope &disasm) {
909  is_alternate_isa = false;
910  if (disasm) {
911  if (disasm->m_alternate_disasm_up) {
912  const AddressClass address_class = GetAddressClass();
913 
914  if (address_class == AddressClass::eCodeAlternateISA) {
915  is_alternate_isa = true;
916  return disasm->m_alternate_disasm_up.get();
917  }
918  }
919  return disasm->m_disasm_up.get();
920  }
921  return nullptr;
922  }
923 };
924 
925 std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>
926 DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu,
927  const char *features_str,
928  unsigned flavor,
929  DisassemblerLLVMC &owner) {
930  using Instance = std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>;
931 
932  std::string Status;
933  const llvm::Target *curr_target =
934  llvm::TargetRegistry::lookupTarget(triple, Status);
935  if (!curr_target)
936  return Instance();
937 
938  std::unique_ptr<llvm::MCInstrInfo> instr_info_up(
939  curr_target->createMCInstrInfo());
940  if (!instr_info_up)
941  return Instance();
942 
943  std::unique_ptr<llvm::MCRegisterInfo> reg_info_up(
944  curr_target->createMCRegInfo(triple));
945  if (!reg_info_up)
946  return Instance();
947 
948  std::unique_ptr<llvm::MCSubtargetInfo> subtarget_info_up(
949  curr_target->createMCSubtargetInfo(triple, cpu, features_str));
950  if (!subtarget_info_up)
951  return Instance();
952 
953  std::unique_ptr<llvm::MCAsmInfo> asm_info_up(
954  curr_target->createMCAsmInfo(*reg_info_up, triple));
955  if (!asm_info_up)
956  return Instance();
957 
958  std::unique_ptr<llvm::MCContext> context_up(
959  new llvm::MCContext(asm_info_up.get(), reg_info_up.get(), 0));
960  if (!context_up)
961  return Instance();
962 
963  std::unique_ptr<llvm::MCDisassembler> disasm_up(
964  curr_target->createMCDisassembler(*subtarget_info_up, *context_up));
965  if (!disasm_up)
966  return Instance();
967 
968  std::unique_ptr<llvm::MCRelocationInfo> rel_info_up(
969  curr_target->createMCRelocationInfo(triple, *context_up));
970  if (!rel_info_up)
971  return Instance();
972 
973  std::unique_ptr<llvm::MCSymbolizer> symbolizer_up(
974  curr_target->createMCSymbolizer(
975  triple, nullptr, DisassemblerLLVMC::SymbolLookupCallback, &owner,
976  context_up.get(), std::move(rel_info_up)));
977  disasm_up->setSymbolizer(std::move(symbolizer_up));
978 
979  unsigned asm_printer_variant =
980  flavor == ~0U ? asm_info_up->getAssemblerDialect() : flavor;
981 
982  std::unique_ptr<llvm::MCInstPrinter> instr_printer_up(
983  curr_target->createMCInstPrinter(llvm::Triple{triple},
984  asm_printer_variant, *asm_info_up,
985  *instr_info_up, *reg_info_up));
986  if (!instr_printer_up)
987  return Instance();
988 
989  return Instance(
990  new MCDisasmInstance(std::move(instr_info_up), std::move(reg_info_up),
991  std::move(subtarget_info_up), std::move(asm_info_up),
992  std::move(context_up), std::move(disasm_up),
993  std::move(instr_printer_up)));
994 }
995 
996 DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance(
997  std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,
998  std::unique_ptr<llvm::MCRegisterInfo> &&reg_info_up,
999  std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up,
1000  std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up,
1001  std::unique_ptr<llvm::MCContext> &&context_up,
1002  std::unique_ptr<llvm::MCDisassembler> &&disasm_up,
1003  std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up)
1004  : m_instr_info_up(std::move(instr_info_up)),
1005  m_reg_info_up(std::move(reg_info_up)),
1006  m_subtarget_info_up(std::move(subtarget_info_up)),
1007  m_asm_info_up(std::move(asm_info_up)),
1008  m_context_up(std::move(context_up)), m_disasm_up(std::move(disasm_up)),
1009  m_instr_printer_up(std::move(instr_printer_up)) {
1010  assert(m_instr_info_up && m_reg_info_up && m_subtarget_info_up &&
1011  m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up);
1012 }
1013 
1015  const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc,
1016  llvm::MCInst &mc_inst) const {
1017  llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len);
1018  llvm::MCDisassembler::DecodeStatus status;
1019 
1020  uint64_t new_inst_size;
1021  status = m_disasm_up->getInstruction(mc_inst, new_inst_size, data, pc,
1022  llvm::nulls(), llvm::nulls());
1023  if (status == llvm::MCDisassembler::Success)
1024  return new_inst_size;
1025  else
1026  return 0;
1027 }
1028 
1030  llvm::MCInst &mc_inst, std::string &inst_string,
1031  std::string &comments_string) {
1032  llvm::raw_string_ostream inst_stream(inst_string);
1033  llvm::raw_string_ostream comments_stream(comments_string);
1034 
1035  m_instr_printer_up->setCommentStream(comments_stream);
1036  m_instr_printer_up->printInst(&mc_inst, inst_stream, llvm::StringRef(),
1037  *m_subtarget_info_up);
1038  m_instr_printer_up->setCommentStream(llvm::nulls());
1039  comments_stream.flush();
1040 
1041  static std::string g_newlines("\r\n");
1042 
1043  for (size_t newline_pos = 0;
1044  (newline_pos = comments_string.find_first_of(g_newlines, newline_pos)) !=
1045  comments_string.npos;
1046  /**/) {
1047  comments_string.replace(comments_string.begin() + newline_pos,
1048  comments_string.begin() + newline_pos + 1, 1, ' ');
1049  }
1050 }
1051 
1053  bool use_hex_immed, HexImmediateStyle hex_style) {
1054  m_instr_printer_up->setPrintImmHex(use_hex_immed);
1055  switch (hex_style) {
1056  case eHexStyleC:
1057  m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::C);
1058  break;
1059  case eHexStyleAsm:
1060  m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::Asm);
1061  break;
1062  }
1063 }
1064 
1066  llvm::MCInst &mc_inst) const {
1067  return m_instr_info_up->get(mc_inst.getOpcode())
1068  .mayAffectControlFlow(mc_inst, *m_reg_info_up);
1069 }
1070 
1072  llvm::MCInst &mc_inst) const {
1073  return m_instr_info_up->get(mc_inst.getOpcode()).hasDelaySlot();
1074 }
1075 
1076 bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const {
1077  return m_instr_info_up->get(mc_inst.getOpcode()).isCall();
1078 }
1079 
1081  const char *flavor_string)
1082  : Disassembler(arch, flavor_string), m_exe_ctx(NULL), m_inst(NULL),
1083  m_data_from_file(false) {
1084  if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) {
1085  m_flavor.assign("default");
1086  }
1087 
1088  unsigned flavor = ~0U;
1089  llvm::Triple triple = arch.GetTriple();
1090 
1091  // So far the only supported flavor is "intel" on x86. The base class will
1092  // set this correctly coming in.
1093  if (triple.getArch() == llvm::Triple::x86 ||
1094  triple.getArch() == llvm::Triple::x86_64) {
1095  if (m_flavor == "intel") {
1096  flavor = 1;
1097  } else if (m_flavor == "att") {
1098  flavor = 0;
1099  }
1100  }
1101 
1102  ArchSpec thumb_arch(arch);
1103  if (triple.getArch() == llvm::Triple::arm) {
1104  std::string thumb_arch_name(thumb_arch.GetTriple().getArchName().str());
1105  // Replace "arm" with "thumb" so we get all thumb variants correct
1106  if (thumb_arch_name.size() > 3) {
1107  thumb_arch_name.erase(0, 3);
1108  thumb_arch_name.insert(0, "thumb");
1109  } else {
1110  thumb_arch_name = "thumbv8.2a";
1111  }
1112  thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name));
1113  }
1114 
1115  // If no sub architecture specified then use the most recent arm architecture
1116  // so the disassembler will return all instruction. Without it we will see a
1117  // lot of unknow opcode in case the code uses instructions which are not
1118  // available in the oldest arm version (used when no sub architecture is
1119  // specified)
1120  if (triple.getArch() == llvm::Triple::arm &&
1121  triple.getSubArch() == llvm::Triple::NoSubArch)
1122  triple.setArchName("armv8.2a");
1123 
1124  std::string features_str = "";
1125  const char *triple_str = triple.getTriple().c_str();
1126 
1127  // ARM Cortex M0-M7 devices only execute thumb instructions
1128  if (arch.IsAlwaysThumbInstructions()) {
1129  triple_str = thumb_arch.GetTriple().getTriple().c_str();
1130  features_str += "+fp-armv8,";
1131  }
1132 
1133  const char *cpu = "";
1134 
1135  switch (arch.GetCore()) {
1136  case ArchSpec::eCore_mips32:
1137  case ArchSpec::eCore_mips32el:
1138  cpu = "mips32";
1139  break;
1140  case ArchSpec::eCore_mips32r2:
1141  case ArchSpec::eCore_mips32r2el:
1142  cpu = "mips32r2";
1143  break;
1144  case ArchSpec::eCore_mips32r3:
1145  case ArchSpec::eCore_mips32r3el:
1146  cpu = "mips32r3";
1147  break;
1148  case ArchSpec::eCore_mips32r5:
1149  case ArchSpec::eCore_mips32r5el:
1150  cpu = "mips32r5";
1151  break;
1152  case ArchSpec::eCore_mips32r6:
1153  case ArchSpec::eCore_mips32r6el:
1154  cpu = "mips32r6";
1155  break;
1156  case ArchSpec::eCore_mips64:
1157  case ArchSpec::eCore_mips64el:
1158  cpu = "mips64";
1159  break;
1160  case ArchSpec::eCore_mips64r2:
1161  case ArchSpec::eCore_mips64r2el:
1162  cpu = "mips64r2";
1163  break;
1164  case ArchSpec::eCore_mips64r3:
1165  case ArchSpec::eCore_mips64r3el:
1166  cpu = "mips64r3";
1167  break;
1168  case ArchSpec::eCore_mips64r5:
1169  case ArchSpec::eCore_mips64r5el:
1170  cpu = "mips64r5";
1171  break;
1172  case ArchSpec::eCore_mips64r6:
1173  case ArchSpec::eCore_mips64r6el:
1174  cpu = "mips64r6";
1175  break;
1176  default:
1177  cpu = "";
1178  break;
1179  }
1180 
1181  if (triple.getArch() == llvm::Triple::mips ||
1182  triple.getArch() == llvm::Triple::mipsel ||
1183  triple.getArch() == llvm::Triple::mips64 ||
1184  triple.getArch() == llvm::Triple::mips64el) {
1185  uint32_t arch_flags = arch.GetFlags();
1186  if (arch_flags & ArchSpec::eMIPSAse_msa)
1187  features_str += "+msa,";
1188  if (arch_flags & ArchSpec::eMIPSAse_dsp)
1189  features_str += "+dsp,";
1190  if (arch_flags & ArchSpec::eMIPSAse_dspr2)
1191  features_str += "+dspr2,";
1192  }
1193 
1194  // If any AArch64 variant, enable the ARMv8.2 ISA extensions so we can
1195  // disassemble newer instructions.
1196  if (triple.getArch() == llvm::Triple::aarch64)
1197  features_str += "+v8.2a";
1198 
1199  if (triple.getArch() == llvm::Triple::aarch64
1200  && triple.getVendor() == llvm::Triple::Apple) {
1201  cpu = "apple-latest";
1202  }
1203 
1204  // We use m_disasm_up.get() to tell whether we are valid or not, so if this
1205  // isn't good for some reason, we won't be valid and FindPlugin will fail and
1206  // we won't get used.
1207  m_disasm_up = MCDisasmInstance::Create(triple_str, cpu, features_str.c_str(),
1208  flavor, *this);
1209 
1210  llvm::Triple::ArchType llvm_arch = triple.getArch();
1211 
1212  // For arm CPUs that can execute arm or thumb instructions, also create a
1213  // thumb instruction disassembler.
1214  if (llvm_arch == llvm::Triple::arm) {
1215  std::string thumb_triple(thumb_arch.GetTriple().getTriple());
1217  MCDisasmInstance::Create(thumb_triple.c_str(), "", features_str.c_str(),
1218  flavor, *this);
1219  if (!m_alternate_disasm_up)
1220  m_disasm_up.reset();
1221 
1222  } else if (llvm_arch == llvm::Triple::mips ||
1223  llvm_arch == llvm::Triple::mipsel ||
1224  llvm_arch == llvm::Triple::mips64 ||
1225  llvm_arch == llvm::Triple::mips64el) {
1226  /* Create alternate disassembler for MIPS16 and microMIPS */
1227  uint32_t arch_flags = arch.GetFlags();
1228  if (arch_flags & ArchSpec::eMIPSAse_mips16)
1229  features_str += "+mips16,";
1230  else if (arch_flags & ArchSpec::eMIPSAse_micromips)
1231  features_str += "+micromips,";
1232 
1234  triple_str, cpu, features_str.c_str(), flavor, *this);
1235  if (!m_alternate_disasm_up)
1236  m_disasm_up.reset();
1237  }
1238 }
1239 
1241 
1243  const char *flavor) {
1244  if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch) {
1245  std::unique_ptr<DisassemblerLLVMC> disasm_up(
1246  new DisassemblerLLVMC(arch, flavor));
1247 
1248  if (disasm_up.get() && disasm_up->IsValid())
1249  return disasm_up.release();
1250  }
1251  return NULL;
1252 }
1253 
1255  const DataExtractor &data,
1256  lldb::offset_t data_offset,
1257  size_t num_instructions,
1258  bool append, bool data_from_file) {
1259  if (!append)
1261 
1262  if (!IsValid())
1263  return 0;
1264 
1265  m_data_from_file = data_from_file;
1266  uint32_t data_cursor = data_offset;
1267  const size_t data_byte_size = data.GetByteSize();
1268  uint32_t instructions_parsed = 0;
1269  Address inst_addr(base_addr);
1270 
1271  while (data_cursor < data_byte_size &&
1272  instructions_parsed < num_instructions) {
1273 
1274  AddressClass address_class = AddressClass::eCode;
1275 
1277  address_class = inst_addr.GetAddressClass();
1278 
1279  InstructionSP inst_sp(
1280  new InstructionLLVMC(*this, inst_addr, address_class));
1281 
1282  if (!inst_sp)
1283  break;
1284 
1285  uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor);
1286 
1287  if (inst_size == 0)
1288  break;
1289 
1290  m_instruction_list.Append(inst_sp);
1291  data_cursor += inst_size;
1292  inst_addr.Slide(inst_size);
1293  instructions_parsed++;
1294  }
1295 
1296  return data_cursor - data_offset;
1297 }
1298 
1300  PluginManager::RegisterPlugin(GetPluginNameStatic(),
1301  "Disassembler that uses LLVM MC to disassemble "
1302  "i386, x86_64, ARM, and ARM64.",
1303  CreateInstance);
1304 
1305  llvm::InitializeAllTargetInfos();
1306  llvm::InitializeAllTargetMCs();
1307  llvm::InitializeAllAsmParsers();
1308  llvm::InitializeAllDisassemblers();
1309 }
1310 
1312  PluginManager::UnregisterPlugin(CreateInstance);
1313 }
1314 
1316  static ConstString g_name("llvm-mc");
1317  return g_name;
1318 }
1319 
1320 int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc,
1321  uint64_t offset, uint64_t size,
1322  int tag_type, void *tag_bug) {
1323  return static_cast<DisassemblerLLVMC *>(disassembler)
1324  ->OpInfo(pc, offset, size, tag_type, tag_bug);
1325 }
1326 
1327 const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler,
1328  uint64_t value,
1329  uint64_t *type, uint64_t pc,
1330  const char **name) {
1331  return static_cast<DisassemblerLLVMC *>(disassembler)
1332  ->SymbolLookup(value, type, pc, name);
1333 }
1334 
1336  const lldb_private::ArchSpec &arch, const char *flavor) {
1337  llvm::Triple triple = arch.GetTriple();
1338  if (flavor == NULL || strcmp(flavor, "default") == 0)
1339  return true;
1340 
1341  if (triple.getArch() == llvm::Triple::x86 ||
1342  triple.getArch() == llvm::Triple::x86_64) {
1343  return strcmp(flavor, "intel") == 0 || strcmp(flavor, "att") == 0;
1344  } else
1345  return false;
1346 }
1347 
1348 bool DisassemblerLLVMC::IsValid() const { return m_disasm_up.operator bool(); }
1349 
1350 int DisassemblerLLVMC::OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size,
1351  int tag_type, void *tag_bug) {
1352  switch (tag_type) {
1353  default:
1354  break;
1355  case 1:
1356  memset(tag_bug, 0, sizeof(::LLVMOpInfo1));
1357  break;
1358  }
1359  return 0;
1360 }
1361 
1362 const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
1363  uint64_t pc, const char **name) {
1364  if (*type_ptr) {
1365  if (m_exe_ctx && m_inst) {
1366  // std::string remove_this_prior_to_checkin;
1367  Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : NULL;
1368  Address value_so_addr;
1369  Address pc_so_addr;
1370  if (m_inst->UsingFileAddress()) {
1371  ModuleSP module_sp(m_inst->GetAddress().GetModule());
1372  if (module_sp) {
1373  module_sp->ResolveFileAddress(value, value_so_addr);
1374  module_sp->ResolveFileAddress(pc, pc_so_addr);
1375  }
1376  } else if (target && !target->GetSectionLoadList().IsEmpty()) {
1377  target->GetSectionLoadList().ResolveLoadAddress(value, value_so_addr);
1378  target->GetSectionLoadList().ResolveLoadAddress(pc, pc_so_addr);
1379  }
1380 
1381  SymbolContext sym_ctx;
1382  const SymbolContextItem resolve_scope =
1383  eSymbolContextFunction | eSymbolContextSymbol;
1384  if (pc_so_addr.IsValid() && pc_so_addr.GetModule()) {
1385  pc_so_addr.GetModule()->ResolveSymbolContextForAddress(
1386  pc_so_addr, resolve_scope, sym_ctx);
1387  }
1388 
1389  if (value_so_addr.IsValid() && value_so_addr.GetSection()) {
1390  StreamString ss;
1391 
1392  bool format_omitting_current_func_name = false;
1393  if (sym_ctx.symbol || sym_ctx.function) {
1394  AddressRange range;
1395  if (sym_ctx.GetAddressRange(resolve_scope, 0, false, range) &&
1396  range.GetBaseAddress().IsValid() &&
1397  range.ContainsLoadAddress(value_so_addr, target)) {
1398  format_omitting_current_func_name = true;
1399  }
1400  }
1401 
1402  // If the "value" address (the target address we're symbolicating) is
1403  // inside the same SymbolContext as the current instruction pc
1404  // (pc_so_addr), don't print the full function name - just print it
1405  // with DumpStyleNoFunctionName style, e.g. "<+36>".
1406  if (format_omitting_current_func_name) {
1407  value_so_addr.Dump(&ss, target, Address::DumpStyleNoFunctionName,
1408  Address::DumpStyleSectionNameOffset);
1409  } else {
1410  value_so_addr.Dump(
1411  &ss, target,
1412  Address::DumpStyleResolvedDescriptionNoFunctionArguments,
1413  Address::DumpStyleSectionNameOffset);
1414  }
1415 
1416  if (!ss.GetString().empty()) {
1417  // If Address::Dump returned a multi-line description, most commonly
1418  // seen when we have multiple levels of inlined functions at an
1419  // address, only show the first line.
1420  std::string str = ss.GetString();
1421  size_t first_eol_char = str.find_first_of("\r\n");
1422  if (first_eol_char != std::string::npos) {
1423  str.erase(first_eol_char);
1424  }
1425  m_inst->AppendComment(str);
1426  }
1427  }
1428  }
1429  }
1430 
1431  *type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
1432  *name = NULL;
1433  return NULL;
1434 }
1435 
1436 // PluginInterface protocol
1438 
size_t PutCString(llvm::StringRef cstr)
Output a C string to the stream.
Definition: Stream.cpp:61
uint32_t GetFlags() const
Definition: ArchSpec.h:501
const char * SymbolLookup(uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
An data extractor class.
Definition: DataExtractor.h:47
Core GetCore() const
Definition: ArchSpec.h:410
#define LIBLLDB_LOG_PROCESS
Definition: Logging.h:15
Enumerations for broadcasting.
Definition: SBLaunchInfo.h:14
InstructionLLVMC * m_inst
int OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size, int TagType, void *TagBug)
const char * AsCString(const char *value_if_empty=nullptr) const
Get the string value as a C string.
Definition: ConstString.h:224
A stream class that can stream formatted output to a file.
Definition: Stream.h:28
uint32_t GetU32(lldb::offset_t *offset_ptr) const
Extract a uint32_t value from *offset_ptr.
~DisassemblerLLVMC() override
Defines a symbol context baton that can be handed other debug core functions.
Definition: SymbolContext.h:33
DisassemblerLLVMC(const lldb_private::ArchSpec &arch, const char *flavor)
std::unique_ptr< MCDisasmInstance > m_disasm_up
bool Dump(Stream *s, ExecutionContextScope *exe_scope, DumpStyle style, DumpStyle fallback_style=DumpStyleInvalid, uint32_t addr_byte_size=UINT32_MAX) const
Dump a description of this object to a Stream.
Definition: Address.cpp:374
enum lldb_private::Instruction::Operand::Type m_type
"lldb/Utility/RegularExpression.h" A C++ wrapper class for regex.
An architecture specification class.
Definition: ArchSpec.h:32
static std::pair< Operand, llvm::StringRef::const_iterator > ParseImmediate(llvm::StringRef::const_iterator osi, llvm::StringRef::const_iterator ose)
static std::pair< Operand, llvm::StringRef::const_iterator > ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi, llvm::StringRef::const_iterator ose)
"lldb/Target/ExecutionContext.h" A class that contains an execution context.
bool IsCall() override
lldb_private::ConstString GetPluginName() override
bool DoesBranch() override
bool HasDelaySlot() override
bool IsAlwaysThumbInstructions() const
Detect whether this architecture uses thumb code exclusively.
Definition: ArchSpec.cpp:1428
InstructionList m_instruction_list
Definition: Disassembler.h:536
Grants exclusive access to the disassembler and initializes it with the given InstructionLLVMC and an...
static std::pair< Operand, llvm::StringRef::const_iterator > ParseARMDerefAccess(llvm::StringRef::const_iterator osi, llvm::StringRef::const_iterator ose)
Symbol * symbol
The Symbol for a given query.
llvm::Triple & GetTriple()
Architecture triple accessor.
Definition: ArchSpec.h:431
std::vector< Operand > m_children
Definition: Disassembler.h:209
Function * function
The Function for a given query.
InstructionLLVMC(DisassemblerLLVMC &disasm, const lldb_private::Address &address, AddressClass addr_class)
bool HasDelaySlot(llvm::MCInst &mc_inst) const
bool ContainsLoadAddress(const Address &so_addr, Target *target) const
Check if a section offset so_addr when represented as a load address is contained within this object&#39;...
static std::pair< Operand, llvm::StringRef::const_iterator > ParseIntelDerefAccess(llvm::StringRef::const_iterator osi, llvm::StringRef::const_iterator ose)
void PrintMCInst(llvm::MCInst &mc_inst, std::string &inst_string, std::string &comments_string)
bool Slide(int64_t offset)
Definition: Address.h:430
AddressClass GetAddressClass() const
Definition: Address.cpp:983
Target * GetTargetPtr() const
Returns a pointer to the target object.
#define LLDB_INVALID_ADDRESS
Invalid value definitions.
Definition: lldb-defines.h:85
bool UsingFileAddress() const
bool GetMatchAtIndex(llvm::StringRef s, uint32_t idx, std::string &match_str) const
uint64_t offset_t
Definition: lldb-types.h:87
size_t DecodeInstructions(const lldb_private::Address &base_addr, const lldb_private::DataExtractor &data, lldb::offset_t data_offset, size_t num_instructions, bool append, bool data_from_file) override
std::shared_ptr< DisassemblerLLVMC > operator->()
Log * GetLogIfAllCategoriesSet(uint32_t mask)
Definition: Logging.cpp:57
uint32_t GetMinimumOpcodeByteSize() const
Definition: ArchSpec.cpp:963
bool GetUseHexImmediates() const
Definition: Target.cpp:4016
SectionLoadList & GetSectionLoadList()
Definition: Target.h:1012
bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch, const char *flavor) override
lldb::ByteOrder GetByteOrder() const
Get the current byte order value.
static std::unique_ptr< MCDisasmInstance > Create(const char *triple, const char *cpu, const char *features_str, unsigned flavor, DisassemblerLLVMC &owner)
size_t GetByteSize() const
llvm::StringRef GetString() const
bool ResolveLoadAddress(lldb::addr_t load_addr, Address &so_addr, bool allow_section_end=false) const
std::unique_ptr< MCDisasmInstance > m_alternate_disasm_up
bool ValidOffsetForDataOfSize(lldb::offset_t offset, lldb::offset_t length) const
Test the availability of length bytes of data from offset.
size_t Printf(const char *format,...) __attribute__((format(printf
Output printf formatted output to the stream.
Definition: Stream.cpp:106
static lldb_private::ConstString GetPluginNameStatic()
static const char * SymbolLookupCallback(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
DisassemblerScope(InstructionLLVMC &i, const lldb_private::ExecutionContext *exe_ctx=nullptr)
static llvm::StringRef::const_iterator ConsumeWhitespace(llvm::StringRef::const_iterator osi, llvm::StringRef::const_iterator ose)
bool IsValid() const
Check if the object state is valid.
Definition: Address.h:343
A section + offset based address class.
Definition: Address.h:80
void CalculateMnemonicOperandsAndComment(const lldb_private::ExecutionContext *exe_ctx) override
void AppendComment(std::string &description)
void Append(lldb::InstructionSP &inst_sp)
bool CanBranch(llvm::MCInst &mc_inst) const
const lldb_private::ExecutionContext * m_exe_ctx
uint64_t GetByteSize() const
Get the number of bytes contained in this object.
friend class InstructionLLVMC
uint32_t GetPluginVersion() override
static lldb_private::Disassembler * CreateInstance(const lldb_private::ArchSpec &arch, const char *flavor)
uint64_t addr_t
Definition: lldb-types.h:83
static int OpInfoCallback(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t Size, int TagType, void *TagBug)
const uint8_t * GetDataStart() const
Get the data start pointer.
A uniqued constant string class.
Definition: ConstString.h:38
static std::pair< Operand, llvm::StringRef::const_iterator > ParseARMOffsetAccess(llvm::StringRef::const_iterator osi, llvm::StringRef::const_iterator ose)
lldb::ModuleSP GetModule() const
Get accessor for the module for this address.
Definition: Address.cpp:264
bool Execute(llvm::StringRef string, Match *match=nullptr) const
Executes a regular expression.
Definition: SBAddress.h:15
bool ParseOperands(llvm::SmallVectorImpl< Instruction::Operand > &operands) override
SharingPtr< T > static_pointer_cast(const SharingPtr< U > &r)
Definition: SharingPtr.h:355
lldb::offset_t BytesLeft(lldb::offset_t offset) const
void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style)
bool GetAddressRange(uint32_t scope, uint32_t range_idx, bool use_inline_block_range, AddressRange &range) const
Get the address range contained within a symbol context.
size_t Decode(const lldb_private::Disassembler &disassembler, const lldb_private::DataExtractor &data, lldb::offset_t data_offset) override
Address & GetBaseAddress()
Get accessor for the base address of the range.
Definition: AddressRange.h:220
uint8_t GetU8(lldb::offset_t *offset_ptr) const
Extract a uint8_t value from *offset_ptr.
static std::pair< bool, llvm::StringRef::const_iterator > ConsumeChar(llvm::StringRef::const_iterator osi, const char c, llvm::StringRef::const_iterator ose)
lldb::SectionSP GetSection() const
Get const accessor for the section.
Definition: Address.h:410
uint32_t GetMaximumOpcodeByteSize() const
Definition: ArchSpec.cpp:970
DisassemblerLLVMC::MCDisasmInstance * GetDisasmToUse(bool &is_alternate_isa)
std::weak_ptr< DisassemblerLLVMC > m_disasm_wp
A section + offset based address range class.
Definition: AddressRange.h:32
static std::pair< Operand, llvm::StringRef::const_iterator > ParseRegisterName(llvm::StringRef::const_iterator osi, llvm::StringRef::const_iterator ose)
uint16_t GetU16(lldb::offset_t *offset_ptr) const
Extract a uint16_t value from *offset_ptr.
bool IsCall(llvm::MCInst &mc_inst) const
llvm::Triple::ArchType GetMachine() const
Returns a machine family for the current architecture.
Definition: ArchSpec.cpp:726
static void DumpOperand(const Operand &op, Stream &s)
const Address & GetAddress() const
Definition: Disassembler.h:84
Disassembler::HexImmediateStyle GetHexImmediateStyle() const
Definition: Target.cpp:4047
uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc, llvm::MCInst &mc_inst) const
uint64_t GetU64(lldb::offset_t *offset_ptr) const
Extract a uint64_t value from *offset_ptr.
An error handling class.
Definition: Status.h:44
const uint8_t * PeekData(lldb::offset_t offset, lldb::offset_t length) const
Peek at a bytes at offset.