LLDB  mainline
Disassembler.h
Go to the documentation of this file.
1 //===-- Disassembler.h ------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef liblldb_Disassembler_h_
10 #define liblldb_Disassembler_h_
11 
12 #include "lldb/Core/Address.h"
14 #include "lldb/Core/FormatEntity.h"
15 #include "lldb/Core/Opcode.h"
18 #include "lldb/Symbol/LineEntry.h"
20 #include "lldb/Utility/ArchSpec.h"
22 #include "lldb/Utility/FileSpec.h"
23 #include "lldb/lldb-defines.h"
24 #include "lldb/lldb-forward.h"
26 #include "lldb/lldb-types.h"
27 
28 #include "llvm/ADT/StringRef.h"
29 
30 #include <functional>
31 #include <map>
32 #include <memory>
33 #include <set>
34 #include <string>
35 #include <vector>
36 
37 #include <stddef.h>
38 #include <stdint.h>
39 #include <stdio.h>
40 
41 namespace lldb_private {
42 class AddressRange;
43 }
44 namespace lldb_private {
45 class DataExtractor;
46 }
47 namespace lldb_private {
48 class Debugger;
49 }
50 namespace lldb_private {
51 class Disassembler;
52 }
53 namespace lldb_private {
54 class Module;
55 }
56 namespace lldb_private {
57 class Stream;
58 }
59 namespace lldb_private {
60 class SymbolContext;
61 }
62 namespace lldb_private {
63 class SymbolContextList;
64 }
65 namespace lldb_private {
66 class Target;
67 }
68 namespace lldb_private {
69 struct RegisterInfo;
70 }
71 namespace llvm {
72 template <typename T> class SmallVectorImpl;
73 }
74 
75 namespace lldb_private {
76 
77 class Instruction {
78 public:
79  Instruction(const Address &address,
81 
82  virtual ~Instruction();
83 
84  const Address &GetAddress() const { return m_address; }
85 
86  const char *GetMnemonic(const ExecutionContext *exe_ctx) {
87  CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
88  return m_opcode_name.c_str();
89  }
90 
91  const char *GetOperands(const ExecutionContext *exe_ctx) {
92  CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
93  return m_mnemonics.c_str();
94  }
95 
96  const char *GetComment(const ExecutionContext *exe_ctx) {
97  CalculateMnemonicOperandsAndCommentIfNeeded(exe_ctx);
98  return m_comment.c_str();
99  }
100 
101  virtual void
102  CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0;
103 
104  AddressClass GetAddressClass();
105 
106  void SetAddress(const Address &addr) {
107  // Invalidate the address class to lazily discover it if we need to.
108  m_address_class = AddressClass::eInvalid;
109  m_address = addr;
110  }
111 
112  /// Dump the text representation of this Instruction to a Stream
113  ///
114  /// Print the (optional) address, (optional) bytes, opcode,
115  /// operands, and instruction comments to a stream.
116  ///
117  /// \param[in] s
118  /// The Stream to add the text to.
119  ///
120  /// \param[in] show_address
121  /// Whether the address (using disassembly_addr_format_spec formatting)
122  /// should be printed.
123  ///
124  /// \param[in] show_bytes
125  /// Whether the bytes of the assembly instruction should be printed.
126  ///
127  /// \param[in] max_opcode_byte_size
128  /// The size (in bytes) of the largest instruction in the list that
129  /// we are printing (for text justification/alignment purposes)
130  /// Only needed if show_bytes is true.
131  ///
132  /// \param[in] exe_ctx
133  /// The current execution context, if available. May be used in
134  /// the assembling of the operands+comments for this instruction.
135  /// Pass NULL if not applicable.
136  ///
137  /// \param[in] sym_ctx
138  /// The SymbolContext for this instruction.
139  /// Pass NULL if not available/computed.
140  /// Only needed if show_address is true.
141  ///
142  /// \param[in] prev_sym_ctx
143  /// The SymbolContext for the previous instruction. Depending on
144  /// the disassembly address format specification, a change in
145  /// Symbol / Function may mean that a line is printed with the new
146  /// symbol/function name.
147  /// Pass NULL if unavailable, or if this is the first instruction of
148  /// the InstructionList.
149  /// Only needed if show_address is true.
150  ///
151  /// \param[in] disassembly_addr_format
152  /// The format specification for how addresses are printed.
153  /// Only needed if show_address is true.
154  ///
155  /// \param[in] max_address_text_size
156  /// The length of the longest address string at the start of the
157  /// disassembly line that will be printed (the
158  /// Debugger::FormatDisassemblerAddress() string)
159  /// so this method can properly align the instruction opcodes.
160  /// May be 0 to indicate no indentation/alignment of the opcodes.
161  virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address,
162  bool show_bytes, const ExecutionContext *exe_ctx,
163  const SymbolContext *sym_ctx,
164  const SymbolContext *prev_sym_ctx,
165  const FormatEntity::Entry *disassembly_addr_format,
166  size_t max_address_text_size);
167 
168  virtual bool DoesBranch() = 0;
169 
170  virtual bool HasDelaySlot();
171 
172  bool CanSetBreakpoint ();
173 
174  virtual size_t Decode(const Disassembler &disassembler,
175  const DataExtractor &data,
176  lldb::offset_t data_offset) = 0;
177 
178  virtual void SetDescription(llvm::StringRef) {
179  } // May be overridden in sub-classes that have descriptions.
180 
181  lldb::OptionValueSP ReadArray(FILE *in_file, Stream *out_stream,
182  OptionValue::Type data_type);
183 
184  lldb::OptionValueSP ReadDictionary(FILE *in_file, Stream *out_stream);
185 
186  bool DumpEmulation(const ArchSpec &arch);
187 
188  virtual bool TestEmulation(Stream *stream, const char *test_file_name);
189 
190  bool Emulate(const ArchSpec &arch, uint32_t evaluate_options, void *baton,
191  EmulateInstruction::ReadMemoryCallback read_mem_callback,
192  EmulateInstruction::WriteMemoryCallback write_mem_calback,
194  EmulateInstruction::WriteRegisterCallback write_reg_callback);
195 
196  const Opcode &GetOpcode() const { return m_opcode; }
197 
198  uint32_t GetData(DataExtractor &data);
199 
200  struct Operand {
201  enum class Type {
202  Invalid = 0,
203  Register,
204  Immediate,
205  Dereference,
206  Sum,
207  Product
208  } m_type = Type::Invalid;
209  std::vector<Operand> m_children;
210  lldb::addr_t m_immediate = 0;
212  bool m_negative = false;
213  bool m_clobbered = false;
214 
215  bool IsValid() { return m_type != Type::Invalid; }
216 
217  static Operand BuildRegister(ConstString &r);
218  static Operand BuildImmediate(lldb::addr_t imm, bool neg);
219  static Operand BuildImmediate(int64_t imm);
220  static Operand BuildDereference(const Operand &ref);
221  static Operand BuildSum(const Operand &lhs, const Operand &rhs);
222  static Operand BuildProduct(const Operand &lhs, const Operand &rhs);
223  };
224 
226  return false;
227  }
228 
229  virtual bool IsCall() { return false; }
230 
231 protected:
232  Address m_address; // The section offset address of this instruction
233  // We include an address class in the Instruction class to
234  // allow the instruction specify the
235  // AddressClass::eCodeAlternateISA (currently used for
236  // thumb), and also to specify data (AddressClass::eData).
237  // The usual value will be AddressClass::eCode, but often
238  // when disassembling memory, you might run into data.
239  // This can help us to disassemble appropriately.
240 private:
241  AddressClass m_address_class; // Use GetAddressClass () accessor function!
242 
243 protected:
244  Opcode m_opcode; // The opcode for this instruction
245  std::string m_opcode_name;
246  std::string m_mnemonics;
247  std::string m_comment;
249 
250  void
252  if (!m_calculated_strings) {
253  m_calculated_strings = true;
254  CalculateMnemonicOperandsAndComment(exe_ctx);
255  }
256  }
257 };
258 
259 namespace OperandMatchers {
260 std::function<bool(const Instruction::Operand &)>
261 MatchBinaryOp(std::function<bool(const Instruction::Operand &)> base,
262  std::function<bool(const Instruction::Operand &)> left,
263  std::function<bool(const Instruction::Operand &)> right);
264 
265 std::function<bool(const Instruction::Operand &)>
266 MatchUnaryOp(std::function<bool(const Instruction::Operand &)> base,
267  std::function<bool(const Instruction::Operand &)> child);
268 
269 std::function<bool(const Instruction::Operand &)>
270 MatchRegOp(const RegisterInfo &info);
271 
272 std::function<bool(const Instruction::Operand &)> FetchRegOp(ConstString &reg);
273 
274 std::function<bool(const Instruction::Operand &)> MatchImmOp(int64_t imm);
275 
276 std::function<bool(const Instruction::Operand &)> FetchImmOp(int64_t &imm);
277 
278 std::function<bool(const Instruction::Operand &)>
280 }
281 
283 public:
284  InstructionList();
285  ~InstructionList();
286 
287  size_t GetSize() const;
288 
289  uint32_t GetMaxOpcocdeByteSize() const;
290 
291  lldb::InstructionSP GetInstructionAtIndex(size_t idx) const;
292 
293  uint32_t GetIndexOfNextBranchInstruction(uint32_t start,
294  Target &target) const;
295 
296  uint32_t GetIndexOfInstructionAtLoadAddress(lldb::addr_t load_addr,
297  Target &target);
298 
299  uint32_t GetIndexOfInstructionAtAddress(const Address &addr);
300 
301  void Clear();
302 
303  void Append(lldb::InstructionSP &inst_sp);
304 
305  void Dump(Stream *s, bool show_address, bool show_bytes,
306  const ExecutionContext *exe_ctx);
307 
308 private:
309  typedef std::vector<lldb::InstructionSP> collection;
310  typedef collection::iterator iterator;
311  typedef collection::const_iterator const_iterator;
312 
313  collection m_instructions;
314 };
315 
317 public:
319 
320  ~PseudoInstruction() override;
321 
322  bool DoesBranch() override;
323 
324  bool HasDelaySlot() override;
325 
327  const ExecutionContext *exe_ctx) override {
328  // TODO: fill this in and put opcode name into Instruction::m_opcode_name,
329  // mnemonic into Instruction::m_mnemonics, and any comment into
330  // Instruction::m_comment
331  }
332 
333  size_t Decode(const Disassembler &disassembler, const DataExtractor &data,
334  lldb::offset_t data_offset) override;
335 
336  void SetOpcode(size_t opcode_size, void *opcode_data);
337 
338  void SetDescription(llvm::StringRef description) override;
339 
340 protected:
341  std::string m_description;
342 
343  DISALLOW_COPY_AND_ASSIGN(PseudoInstruction);
344 };
345 
346 class Disassembler : public std::enable_shared_from_this<Disassembler>,
347  public PluginInterface {
348 public:
349  enum {
350  eOptionNone = 0u,
351  eOptionShowBytes = (1u << 0),
352  eOptionRawOuput = (1u << 1),
353  eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains
354  // the current PC (mixed mode only)
355  eOptionMarkPCAddress =
356  (1u << 3) // Mark the disassembly line the contains the PC
357  };
358 
362  };
363 
364  // FindPlugin should be lax about the flavor string (it is too annoying to
365  // have various internal uses of the disassembler fail because the global
366  // flavor string gets set wrong. Instead, if you get a flavor string you
367  // don't understand, use the default. Folks who care to check can use the
368  // FlavorValidForArchSpec method on the disassembler they got back.
369  static lldb::DisassemblerSP
370  FindPlugin(const ArchSpec &arch, const char *flavor, const char *plugin_name);
371 
372  // This version will use the value in the Target settings if flavor is NULL;
373  static lldb::DisassemblerSP
374  FindPluginForTarget(const lldb::TargetSP target_sp, const ArchSpec &arch,
375  const char *flavor, const char *plugin_name);
376 
377  static lldb::DisassemblerSP
378  DisassembleRange(const ArchSpec &arch, const char *plugin_name,
379  const char *flavor, const ExecutionContext &exe_ctx,
380  const AddressRange &disasm_range, bool prefer_file_cache);
381 
382  static lldb::DisassemblerSP
383  DisassembleBytes(const ArchSpec &arch, const char *plugin_name,
384  const char *flavor, const Address &start, const void *bytes,
385  size_t length, uint32_t max_num_instructions,
386  bool data_from_file);
387 
388  static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
389  const char *plugin_name, const char *flavor,
390  const ExecutionContext &exe_ctx,
391  const AddressRange &range, uint32_t num_instructions,
392  bool mixed_source_and_assembly,
393  uint32_t num_mixed_context_lines, uint32_t options,
394  Stream &strm);
395 
396  static bool Disassemble(Debugger &debugger, const ArchSpec &arch,
397  const char *plugin_name, const char *flavor,
398  const ExecutionContext &exe_ctx, const Address &start,
399  uint32_t num_instructions,
400  bool mixed_source_and_assembly,
401  uint32_t num_mixed_context_lines, uint32_t options,
402  Stream &strm);
403 
404  static size_t
405  Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name,
406  const char *flavor, const ExecutionContext &exe_ctx,
407  SymbolContextList &sc_list, uint32_t num_instructions,
408  bool mixed_source_and_assembly, uint32_t num_mixed_context_lines,
409  uint32_t options, Stream &strm);
410 
411  static bool
412  Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name,
413  const char *flavor, const ExecutionContext &exe_ctx,
414  ConstString name, Module *module,
415  uint32_t num_instructions, bool mixed_source_and_assembly,
416  uint32_t num_mixed_context_lines, uint32_t options, Stream &strm);
417 
418  static bool
419  Disassemble(Debugger &debugger, const ArchSpec &arch, const char *plugin_name,
420  const char *flavor, const ExecutionContext &exe_ctx,
421  uint32_t num_instructions, bool mixed_source_and_assembly,
422  uint32_t num_mixed_context_lines, uint32_t options, Stream &strm);
423 
424  // Constructors and Destructors
425  Disassembler(const ArchSpec &arch, const char *flavor);
426  ~Disassembler() override;
427 
428  typedef const char *(*SummaryCallback)(const Instruction &inst,
429  ExecutionContext *exe_context,
430  void *user_data);
431 
432  static bool PrintInstructions(Disassembler *disasm_ptr, Debugger &debugger,
433  const ArchSpec &arch,
434  const ExecutionContext &exe_ctx,
435  uint32_t num_instructions,
436  bool mixed_source_and_assembly,
437  uint32_t num_mixed_context_lines,
438  uint32_t options, Stream &strm);
439 
440  size_t ParseInstructions(const ExecutionContext *exe_ctx,
441  const AddressRange &range, Stream *error_strm_ptr,
442  bool prefer_file_cache);
443 
444  size_t ParseInstructions(const ExecutionContext *exe_ctx,
445  const Address &range, uint32_t num_instructions,
446  bool prefer_file_cache);
447 
448  virtual size_t DecodeInstructions(const Address &base_addr,
449  const DataExtractor &data,
450  lldb::offset_t data_offset,
451  size_t num_instructions, bool append,
452  bool data_from_file) = 0;
453 
454  InstructionList &GetInstructionList();
455 
456  const InstructionList &GetInstructionList() const;
457 
458  const ArchSpec &GetArchitecture() const { return m_arch; }
459 
460  const char *GetFlavor() const { return m_flavor.c_str(); }
461 
462  virtual bool FlavorValidForArchSpec(const lldb_private::ArchSpec &arch,
463  const char *flavor) = 0;
464 
465 protected:
466  // SourceLine and SourceLinesToDisplay structures are only used in the mixed
467  // source and assembly display methods internal to this class.
468 
469  struct SourceLine {
473 
474  SourceLine() : file(), line(LLDB_INVALID_LINE_NUMBER), column(0) {}
475 
476  bool operator==(const SourceLine &rhs) const {
477  return file == rhs.file && line == rhs.line && rhs.column == column;
478  }
479 
480  bool operator!=(const SourceLine &rhs) const {
481  return file != rhs.file || line != rhs.line || column != rhs.column;
482  }
483 
484  bool IsValid() const { return line != LLDB_INVALID_LINE_NUMBER; }
485  };
486 
488  std::vector<SourceLine> lines;
489 
490  // index of the "current" source line, if we want to highlight that when
491  // displaying the source lines. (as opposed to the surrounding source
492  // lines provided to give context)
494 
495  // Whether to print a blank line at the end of the source lines.
497 
499  : lines(), current_source_line(-1), print_source_context_end_eol(true) {
500  }
501  };
502 
503  // Get the function's declaration line number, hopefully a line number
504  // earlier than the opening curly brace at the start of the function body.
505  static SourceLine GetFunctionDeclLineEntry(const SymbolContext &sc);
506 
507  // Add the provided SourceLine to the map of filenames-to-source-lines-seen.
508  static void AddLineToSourceLineTables(
509  SourceLine &line,
510  std::map<FileSpec, std::set<uint32_t>> &source_lines_seen);
511 
512  // Given a source line, determine if we should print it when we're doing
513  // mixed source & assembly output. We're currently using the
514  // target.process.thread.step-avoid-regexp setting (which is used for
515  // stepping over inlined STL functions by default) to determine what source
516  // lines to avoid showing.
517  //
518  // Returns true if this source line should be elided (if the source line
519  // should not be displayed).
520  static bool
521  ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx,
522  const SymbolContext &sc, SourceLine &line);
523 
524  static bool
526  const SymbolContext &sc, LineEntry &line) {
527  SourceLine sl;
528  sl.file = line.file;
529  sl.line = line.line;
530  sl.column = line.column;
531  return ElideMixedSourceAndDisassemblyLine(exe_ctx, sc, sl);
532  };
533 
534  // Classes that inherit from Disassembler can see and modify these
538  std::string m_flavor;
539 
540 private:
541  // For Disassembler only
542  DISALLOW_COPY_AND_ASSIGN(Disassembler);
543 };
544 
545 } // namespace lldb_private
546 
547 #endif // liblldb_Disassembler_h_
A class to manage flag bits.
Definition: Debugger.h:82
A line table entry class.
Definition: LineEntry.h:20
An data extractor class.
Definition: DataExtractor.h:47
Defines a list of symbol context objects.
Enumerations for broadcasting.
Definition: SBLaunchInfo.h:14
Definition: Debugger.h:71
A stream class that can stream formatted output to a file.
Definition: Stream.h:28
Defines a symbol context baton that can be handed other debug core functions.
Definition: SymbolContext.h:33
size_t(* WriteMemoryCallback)(EmulateInstruction *instruction, void *baton, const Context &context, lldb::addr_t addr, const void *dst, size_t length)
A file utility class.
Definition: FileSpec.h:55
An architecture specification class.
Definition: ArchSpec.h:32
"lldb/Target/ExecutionContext.h" A class that contains an execution context.
void CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) override
Definition: Disassembler.h:326
const char * GetMnemonic(const ExecutionContext *exe_ctx)
Definition: Disassembler.h:86
InstructionList m_instruction_list
Definition: Disassembler.h:536
const char * GetOperands(const ExecutionContext *exe_ctx)
Definition: Disassembler.h:91
bool operator!=(const SourceLine &rhs) const
Definition: Disassembler.h:480
std::vector< Operand > m_children
Definition: Disassembler.h:209
std::function< bool(const Instruction::Operand &)> MatchUnaryOp(std::function< bool(const Instruction::Operand &)> base, std::function< bool(const Instruction::Operand &)> child)
Definition: Disassembler.h:266
size_t(* ReadMemoryCallback)(EmulateInstruction *instruction, void *baton, const Context &context, lldb::addr_t addr, void *dst, size_t length)
const Opcode & GetOpcode() const
Definition: Disassembler.h:196
uint64_t offset_t
Definition: lldb-types.h:87
std::function< bool(const Instruction::Operand &)> FetchRegOp(ConstString &reg)
Definition: Disassembler.h:272
virtual void SetDescription(llvm::StringRef)
Definition: Disassembler.h:178
uint16_t column
The column number of the source line, or zero if there is no column information.
Definition: LineEntry.h:156
std::function< bool(const Instruction::Operand &)> MatchBinaryOp(std::function< bool(const Instruction::Operand &)> base, std::function< bool(const Instruction::Operand &)> left, std::function< bool(const Instruction::Operand &)> right)
Definition: Disassembler.h:261
A class that describes an executable image and its associated object and symbol files.
Definition: Module.h:109
std::function< bool(const Instruction::Operand &)> MatchOpType(Instruction::Operand::Type type)
Definition: Disassembler.h:279
FileSpec file
The source file, possibly mapped by the target.source-map setting.
Definition: LineEntry.h:151
uint32_t line
The source line number, or zero if there is no line number information.
Definition: LineEntry.h:154
void CalculateMnemonicOperandsAndCommentIfNeeded(const ExecutionContext *exe_ctx)
Definition: Disassembler.h:251
virtual bool ParseOperands(llvm::SmallVectorImpl< Operand > &operands)
Definition: Disassembler.h:225
std::function< bool(const Instruction::Operand &)> FetchImmOp(int64_t &imm)
Definition: Disassembler.h:276
A section + offset based address class.
Definition: Address.h:80
const ArchSpec & GetArchitecture() const
Definition: Disassembler.h:458
std::function< bool(const Instruction::Operand &)> MatchRegOp(const RegisterInfo &info)
Definition: Disassembler.h:270
std::function< bool(const Instruction::Operand &)> MatchImmOp(int64_t imm)
Definition: Disassembler.h:274
bool(* WriteRegisterCallback)(EmulateInstruction *instruction, void *baton, const Context &context, const RegisterInfo *reg_info, const RegisterValue &reg_value)
uint64_t addr_t
Definition: lldb-types.h:83
A uniqued constant string class.
Definition: ConstString.h:38
bool operator==(const SourceLine &rhs) const
Definition: Disassembler.h:476
static bool ElideMixedSourceAndDisassemblyLine(const ExecutionContext &exe_ctx, const SymbolContext &sc, LineEntry &line)
Definition: Disassembler.h:525
const char * GetFlavor() const
Definition: Disassembler.h:460
void SetAddress(const Address &addr)
Definition: Disassembler.h:106
const char * GetComment(const ExecutionContext *exe_ctx)
Definition: Disassembler.h:96
#define LLDB_INVALID_LINE_NUMBER
Definition: lldb-defines.h:97
A section + offset based address range class.
Definition: AddressRange.h:32
const Address & GetAddress() const
Definition: Disassembler.h:84
bool(* ReadRegisterCallback)(EmulateInstruction *instruction, void *baton, const RegisterInfo *reg_info, RegisterValue &reg_value)