LLDB  mainline
x86AssemblyInspectionEngine.cpp
Go to the documentation of this file.
1 //===-- x86AssemblyInspectionEngine.cpp -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 
11 #include <memory>
12 
13 #include "llvm-c/Disassembler.h"
14 
15 #include "lldb/Core/Address.h"
16 #include "lldb/Symbol/UnwindPlan.h"
19 
20 using namespace lldb_private;
21 using namespace lldb;
22 
24  : m_cur_insn(nullptr), m_machine_ip_regnum(LLDB_INVALID_REGNUM),
25  m_machine_sp_regnum(LLDB_INVALID_REGNUM),
26  m_machine_fp_regnum(LLDB_INVALID_REGNUM),
27  m_lldb_ip_regnum(LLDB_INVALID_REGNUM),
28  m_lldb_sp_regnum(LLDB_INVALID_REGNUM),
29  m_lldb_fp_regnum(LLDB_INVALID_REGNUM),
30 
31  m_reg_map(), m_arch(arch), m_cpu(k_cpu_unspecified), m_wordsize(-1),
32  m_register_map_initialized(false), m_disasm_context() {
34  ::LLVMCreateDisasm(arch.GetTriple().getTriple().c_str(), nullptr,
35  /*TagType=*/1, nullptr, nullptr);
36 }
37 
39  ::LLVMDisasmDispose(m_disasm_context);
40 }
41 
42 void x86AssemblyInspectionEngine::Initialize(RegisterContextSP &reg_ctx) {
44  m_wordsize = -1;
46 
47  const llvm::Triple::ArchType cpu = m_arch.GetMachine();
48  if (cpu == llvm::Triple::x86)
49  m_cpu = k_i386;
50  else if (cpu == llvm::Triple::x86_64)
51  m_cpu = k_x86_64;
52 
53  if (m_cpu == k_cpu_unspecified)
54  return;
55 
56  if (reg_ctx.get() == nullptr)
57  return;
58 
59  if (m_cpu == k_i386) {
64  m_wordsize = 4;
65 
66  struct lldb_reg_info reginfo;
67  reginfo.name = "eax";
68  m_reg_map[k_machine_eax] = reginfo;
69  reginfo.name = "edx";
70  m_reg_map[k_machine_edx] = reginfo;
71  reginfo.name = "esp";
72  m_reg_map[k_machine_esp] = reginfo;
73  reginfo.name = "esi";
74  m_reg_map[k_machine_esi] = reginfo;
75  reginfo.name = "eip";
76  m_reg_map[k_machine_eip] = reginfo;
77  reginfo.name = "ecx";
78  m_reg_map[k_machine_ecx] = reginfo;
79  reginfo.name = "ebx";
80  m_reg_map[k_machine_ebx] = reginfo;
81  reginfo.name = "ebp";
82  m_reg_map[k_machine_ebp] = reginfo;
83  reginfo.name = "edi";
84  m_reg_map[k_machine_edi] = reginfo;
85  } else {
90  m_wordsize = 8;
91 
92  struct lldb_reg_info reginfo;
93  reginfo.name = "rax";
94  m_reg_map[k_machine_rax] = reginfo;
95  reginfo.name = "rdx";
96  m_reg_map[k_machine_rdx] = reginfo;
97  reginfo.name = "rsp";
98  m_reg_map[k_machine_rsp] = reginfo;
99  reginfo.name = "rsi";
100  m_reg_map[k_machine_rsi] = reginfo;
101  reginfo.name = "r8";
102  m_reg_map[k_machine_r8] = reginfo;
103  reginfo.name = "r10";
104  m_reg_map[k_machine_r10] = reginfo;
105  reginfo.name = "r12";
106  m_reg_map[k_machine_r12] = reginfo;
107  reginfo.name = "r14";
108  m_reg_map[k_machine_r14] = reginfo;
109  reginfo.name = "rip";
110  m_reg_map[k_machine_rip] = reginfo;
111  reginfo.name = "rcx";
112  m_reg_map[k_machine_rcx] = reginfo;
113  reginfo.name = "rbx";
114  m_reg_map[k_machine_rbx] = reginfo;
115  reginfo.name = "rbp";
116  m_reg_map[k_machine_rbp] = reginfo;
117  reginfo.name = "rdi";
118  m_reg_map[k_machine_rdi] = reginfo;
119  reginfo.name = "r9";
120  m_reg_map[k_machine_r9] = reginfo;
121  reginfo.name = "r11";
122  m_reg_map[k_machine_r11] = reginfo;
123  reginfo.name = "r13";
124  m_reg_map[k_machine_r13] = reginfo;
125  reginfo.name = "r15";
126  m_reg_map[k_machine_r15] = reginfo;
127  }
128 
129  for (MachineRegnumToNameAndLLDBRegnum::iterator it = m_reg_map.begin();
130  it != m_reg_map.end(); ++it) {
131  const RegisterInfo *ri = reg_ctx->GetRegisterInfoByName(it->second.name);
132  if (ri)
133  it->second.lldb_regnum = ri->kinds[eRegisterKindLLDB];
134  }
135 
136  uint32_t lldb_regno;
138  m_lldb_sp_regnum = lldb_regno;
140  m_lldb_fp_regnum = lldb_regno;
142  m_lldb_alt_fp_regnum = lldb_regno;
144  m_lldb_ip_regnum = lldb_regno;
145 
147 }
148 
150  std::vector<lldb_reg_info> &reg_info) {
152  m_wordsize = -1;
154 
155  const llvm::Triple::ArchType cpu = m_arch.GetMachine();
156  if (cpu == llvm::Triple::x86)
157  m_cpu = k_i386;
158  else if (cpu == llvm::Triple::x86_64)
159  m_cpu = k_x86_64;
160 
161  if (m_cpu == k_cpu_unspecified)
162  return;
163 
164  if (m_cpu == k_i386) {
169  m_wordsize = 4;
170 
171  struct lldb_reg_info reginfo;
172  reginfo.name = "eax";
173  m_reg_map[k_machine_eax] = reginfo;
174  reginfo.name = "edx";
175  m_reg_map[k_machine_edx] = reginfo;
176  reginfo.name = "esp";
177  m_reg_map[k_machine_esp] = reginfo;
178  reginfo.name = "esi";
179  m_reg_map[k_machine_esi] = reginfo;
180  reginfo.name = "eip";
181  m_reg_map[k_machine_eip] = reginfo;
182  reginfo.name = "ecx";
183  m_reg_map[k_machine_ecx] = reginfo;
184  reginfo.name = "ebx";
185  m_reg_map[k_machine_ebx] = reginfo;
186  reginfo.name = "ebp";
187  m_reg_map[k_machine_ebp] = reginfo;
188  reginfo.name = "edi";
189  m_reg_map[k_machine_edi] = reginfo;
190  } else {
195  m_wordsize = 8;
196 
197  struct lldb_reg_info reginfo;
198  reginfo.name = "rax";
199  m_reg_map[k_machine_rax] = reginfo;
200  reginfo.name = "rdx";
201  m_reg_map[k_machine_rdx] = reginfo;
202  reginfo.name = "rsp";
203  m_reg_map[k_machine_rsp] = reginfo;
204  reginfo.name = "rsi";
205  m_reg_map[k_machine_rsi] = reginfo;
206  reginfo.name = "r8";
207  m_reg_map[k_machine_r8] = reginfo;
208  reginfo.name = "r10";
209  m_reg_map[k_machine_r10] = reginfo;
210  reginfo.name = "r12";
211  m_reg_map[k_machine_r12] = reginfo;
212  reginfo.name = "r14";
213  m_reg_map[k_machine_r14] = reginfo;
214  reginfo.name = "rip";
215  m_reg_map[k_machine_rip] = reginfo;
216  reginfo.name = "rcx";
217  m_reg_map[k_machine_rcx] = reginfo;
218  reginfo.name = "rbx";
219  m_reg_map[k_machine_rbx] = reginfo;
220  reginfo.name = "rbp";
221  m_reg_map[k_machine_rbp] = reginfo;
222  reginfo.name = "rdi";
223  m_reg_map[k_machine_rdi] = reginfo;
224  reginfo.name = "r9";
225  m_reg_map[k_machine_r9] = reginfo;
226  reginfo.name = "r11";
227  m_reg_map[k_machine_r11] = reginfo;
228  reginfo.name = "r13";
229  m_reg_map[k_machine_r13] = reginfo;
230  reginfo.name = "r15";
231  m_reg_map[k_machine_r15] = reginfo;
232  }
233 
234  for (MachineRegnumToNameAndLLDBRegnum::iterator it = m_reg_map.begin();
235  it != m_reg_map.end(); ++it) {
236  for (size_t i = 0; i < reg_info.size(); ++i) {
237  if (::strcmp(reg_info[i].name, it->second.name) == 0) {
238  it->second.lldb_regnum = reg_info[i].lldb_regnum;
239  break;
240  }
241  }
242  }
243 
244  uint32_t lldb_regno;
246  m_lldb_sp_regnum = lldb_regno;
248  m_lldb_fp_regnum = lldb_regno;
250  m_lldb_alt_fp_regnum = lldb_regno;
252  m_lldb_ip_regnum = lldb_regno;
253 
255 }
256 
257 // This function expects an x86 native register number (i.e. the bits stripped
258 // out of the actual instruction), not an lldb register number.
259 //
260 // FIXME: This is ABI dependent, it shouldn't be hardcoded here.
261 
263  if (m_cpu == k_i386) {
264  switch (machine_regno) {
265  case k_machine_ebx:
266  case k_machine_ebp: // not actually a nonvolatile but often treated as such
267  // by convention
268  case k_machine_esi:
269  case k_machine_edi:
270  case k_machine_esp:
271  return true;
272  default:
273  return false;
274  }
275  }
276  if (m_cpu == k_x86_64) {
277  switch (machine_regno) {
278  case k_machine_rbx:
279  case k_machine_rsp:
280  case k_machine_rbp: // not actually a nonvolatile but often treated as such
281  // by convention
282  case k_machine_r12:
283  case k_machine_r13:
284  case k_machine_r14:
285  case k_machine_r15:
286  return true;
287  default:
288  return false;
289  }
290  }
291  return false;
292 }
293 
294 // Macro to detect if this is a REX mode prefix byte.
295 #define REX_W_PREFIX_P(opcode) (((opcode) & (~0x5)) == 0x48)
296 
297 // The high bit which should be added to the source register number (the "R"
298 // bit)
299 #define REX_W_SRCREG(opcode) (((opcode)&0x4) >> 2)
300 
301 // The high bit which should be added to the destination register number (the
302 // "B" bit)
303 #define REX_W_DSTREG(opcode) ((opcode)&0x1)
304 
305 // pushq %rbp [0x55]
307  uint8_t *p = m_cur_insn;
308  return *p == 0x55;
309 }
310 
311 // pushq $0 ; the first instruction in start() [0x6a 0x00]
313  uint8_t *p = m_cur_insn;
314  return *p == 0x6a && *(p + 1) == 0x0;
315 }
316 
317 // pushq $0
318 // pushl $0
320  uint8_t *p = m_cur_insn;
321  return *p == 0x68 || *p == 0x6a;
322 }
323 
324 // pushl imm8(%esp)
325 //
326 // e.g. 0xff 0x74 0x24 0x20 - 'pushl 0x20(%esp)' (same byte pattern for 'pushq
327 // 0x20(%rsp)' in an x86_64 program)
328 //
329 // 0xff (with opcode bits '6' in next byte, PUSH r/m32) 0x74 (ModR/M byte with
330 // three bits used to specify the opcode)
331 // mod == b01, opcode == b110, R/M == b100
332 // "+disp8"
333 // 0x24 (SIB byte - scaled index = 0, r32 == esp) 0x20 imm8 value
334 
336  if (*m_cur_insn == 0xff) {
337  // Get the 3 opcode bits from the ModR/M byte
338  uint8_t opcode = (*(m_cur_insn + 1) >> 3) & 7;
339  if (opcode == 6) {
340  // I'm only looking for 0xff /6 here - I
341  // don't really care what value is being pushed, just that we're pushing
342  // a 32/64 bit value on to the stack is enough.
343  return true;
344  }
345  }
346  return false;
347 }
348 
349 // instructions only valid in 32-bit mode:
350 // 0x0e - push cs
351 // 0x16 - push ss
352 // 0x1e - push ds
353 // 0x06 - push es
355  uint8_t p = *m_cur_insn;
356  if (m_wordsize == 4) {
357  if (p == 0x0e || p == 0x16 || p == 0x1e || p == 0x06)
358  return true;
359  }
360  return false;
361 }
362 
363 // pushq %rbx
364 // pushl %ebx
366  uint8_t *p = m_cur_insn;
367  int regno_prefix_bit = 0;
368  // If we have a rex prefix byte, check to see if a B bit is set
369  if (m_wordsize == 8 && (*p & 0xfe) == 0x40) {
370  regno_prefix_bit = (*p & 1) << 3;
371  p++;
372  }
373  if (*p >= 0x50 && *p <= 0x57) {
374  regno = (*p - 0x50) | regno_prefix_bit;
375  return true;
376  }
377  return false;
378 }
379 
380 // movq %rsp, %rbp [0x48 0x8b 0xec] or [0x48 0x89 0xe5] movl %esp, %ebp [0x8b
381 // 0xec] or [0x89 0xe5]
383  uint8_t *p = m_cur_insn;
384  if (m_wordsize == 8 && *p == 0x48)
385  p++;
386  if (*(p) == 0x8b && *(p + 1) == 0xec)
387  return true;
388  if (*(p) == 0x89 && *(p + 1) == 0xe5)
389  return true;
390  return false;
391 }
392 
393 // movq %rsp, %rbx [0x48 0x8b 0xdc] or [0x48 0x89 0xe3]
394 // movl %esp, %ebx [0x8b 0xdc] or [0x89 0xe3]
396  uint8_t *p = m_cur_insn;
397  if (m_wordsize == 8 && *p == 0x48)
398  p++;
399  if (*(p) == 0x8b && *(p + 1) == 0xdc)
400  return true;
401  if (*(p) == 0x89 && *(p + 1) == 0xe3)
402  return true;
403  return false;
404 }
405 
406 // movq %rbp, %rsp [0x48 0x8b 0xe5] or [0x48 0x89 0xec]
407 // movl %ebp, %esp [0x8b 0xe5] or [0x89 0xec]
409  uint8_t *p = m_cur_insn;
410  if (m_wordsize == 8 && *p == 0x48)
411  p++;
412  if (*(p) == 0x8b && *(p + 1) == 0xe5)
413  return true;
414  if (*(p) == 0x89 && *(p + 1) == 0xec)
415  return true;
416  return false;
417 }
418 
419 // movq %rbx, %rsp [0x48 0x8b 0xe3] or [0x48 0x89 0xdc]
420 // movl %ebx, %esp [0x8b 0xe3] or [0x89 0xdc]
422  uint8_t *p = m_cur_insn;
423  if (m_wordsize == 8 && *p == 0x48)
424  p++;
425  if (*(p) == 0x8b && *(p + 1) == 0xe3)
426  return true;
427  if (*(p) == 0x89 && *(p + 1) == 0xdc)
428  return true;
429  return false;
430 }
431 
432 // subq $0x20, %rsp
434  uint8_t *p = m_cur_insn;
435  if (m_wordsize == 8 && *p == 0x48)
436  p++;
437  // 8-bit immediate operand
438  if (*p == 0x83 && *(p + 1) == 0xec) {
439  amount = (int8_t) * (p + 2);
440  return true;
441  }
442  // 32-bit immediate operand
443  if (*p == 0x81 && *(p + 1) == 0xec) {
444  amount = (int32_t)extract_4(p + 2);
445  return true;
446  }
447  return false;
448 }
449 
450 // addq $0x20, %rsp
452  uint8_t *p = m_cur_insn;
453  if (m_wordsize == 8 && *p == 0x48)
454  p++;
455  // 8-bit immediate operand
456  if (*p == 0x83 && *(p + 1) == 0xc4) {
457  amount = (int8_t) * (p + 2);
458  return true;
459  }
460  // 32-bit immediate operand
461  if (*p == 0x81 && *(p + 1) == 0xc4) {
462  amount = (int32_t)extract_4(p + 2);
463  return true;
464  }
465  return false;
466 }
467 
468 // lea esp, [esp - 0x28]
469 // lea esp, [esp + 0x28]
471  uint8_t *p = m_cur_insn;
472  if (m_wordsize == 8 && *p == 0x48)
473  p++;
474 
475  // Check opcode
476  if (*p != 0x8d)
477  return false;
478 
479  // 8 bit displacement
480  if (*(p + 1) == 0x64 && (*(p + 2) & 0x3f) == 0x24) {
481  amount = (int8_t) * (p + 3);
482  return true;
483  }
484 
485  // 32 bit displacement
486  if (*(p + 1) == 0xa4 && (*(p + 2) & 0x3f) == 0x24) {
487  amount = (int32_t)extract_4(p + 3);
488  return true;
489  }
490 
491  return false;
492 }
493 
494 // lea -0x28(%ebp), %esp
495 // (32-bit and 64-bit variants, 8-bit and 32-bit displacement)
497  uint8_t *p = m_cur_insn;
498  if (m_wordsize == 8 && *p == 0x48)
499  p++;
500 
501  // Check opcode
502  if (*p != 0x8d)
503  return false;
504  ++p;
505 
506  // 8 bit displacement
507  if (*p == 0x65) {
508  amount = (int8_t)p[1];
509  return true;
510  }
511 
512  // 32 bit displacement
513  if (*p == 0xa5) {
514  amount = (int32_t)extract_4(p + 1);
515  return true;
516  }
517 
518  return false;
519 }
520 
521 // lea -0x28(%ebx), %esp
522 // (32-bit and 64-bit variants, 8-bit and 32-bit displacement)
524  uint8_t *p = m_cur_insn;
525  if (m_wordsize == 8 && *p == 0x48)
526  p++;
527 
528  // Check opcode
529  if (*p != 0x8d)
530  return false;
531  ++p;
532 
533  // 8 bit displacement
534  if (*p == 0x63) {
535  amount = (int8_t)p[1];
536  return true;
537  }
538 
539  // 32 bit displacement
540  if (*p == 0xa3) {
541  amount = (int32_t)extract_4(p + 1);
542  return true;
543  }
544 
545  return false;
546 }
547 
548 // and -0xfffffff0, %esp
549 // (32-bit and 64-bit variants, 8-bit and 32-bit displacement)
551  uint8_t *p = m_cur_insn;
552  if (m_wordsize == 8 && *p == 0x48)
553  p++;
554 
555  if (*p != 0x81 && *p != 0x83)
556  return false;
557 
558  return *++p == 0xe4;
559 }
560 
561 // popq %rbx
562 // popl %ebx
564  uint8_t *p = m_cur_insn;
565  int regno_prefix_bit = 0;
566  // If we have a rex prefix byte, check to see if a B bit is set
567  if (m_wordsize == 8 && (*p & 0xfe) == 0x40) {
568  regno_prefix_bit = (*p & 1) << 3;
569  p++;
570  }
571  if (*p >= 0x58 && *p <= 0x5f) {
572  regno = (*p - 0x58) | regno_prefix_bit;
573  return true;
574  }
575  return false;
576 }
577 
578 // popq %rbp [0x5d]
579 // popl %ebp [0x5d]
581  uint8_t *p = m_cur_insn;
582  return (*p == 0x5d);
583 }
584 
585 // instructions valid only in 32-bit mode:
586 // 0x1f - pop ds
587 // 0x07 - pop es
588 // 0x17 - pop ss
590  uint8_t p = *m_cur_insn;
591  if (m_wordsize == 4) {
592  if (p == 0x1f || p == 0x07 || p == 0x17)
593  return true;
594  }
595  return false;
596 }
597 
598 // leave [0xc9]
600  uint8_t *p = m_cur_insn;
601  return (*p == 0xc9);
602 }
603 
604 // call $0 [0xe8 0x0 0x0 0x0 0x0]
606  uint8_t *p = m_cur_insn;
607  return (*p == 0xe8) && (*(p + 1) == 0x0) && (*(p + 2) == 0x0) &&
608  (*(p + 3) == 0x0) && (*(p + 4) == 0x0);
609 }
610 
611 // Look for an instruction sequence storing a nonvolatile register on to the
612 // stack frame.
613 
614 // movq %rax, -0x10(%rbp) [0x48 0x89 0x45 0xf0]
615 // movl %eax, -0xc(%ebp) [0x89 0x45 0xf4]
616 
617 // The offset value returned in rbp_offset will be positive -- but it must be
618 // subtraced from the frame base register to get the actual location. The
619 // positive value returned for the offset is a convention used elsewhere for
620 // CFA offsets et al.
621 
623  int &regno, int &rbp_offset) {
624  uint8_t *p = m_cur_insn;
625  int src_reg_prefix_bit = 0;
626  int target_reg_prefix_bit = 0;
627 
628  if (m_wordsize == 8 && REX_W_PREFIX_P(*p)) {
629  src_reg_prefix_bit = REX_W_SRCREG(*p) << 3;
630  target_reg_prefix_bit = REX_W_DSTREG(*p) << 3;
631  if (target_reg_prefix_bit == 1) {
632  // rbp/ebp don't need a prefix bit - we know this isn't the reg we care
633  // about.
634  return false;
635  }
636  p++;
637  }
638 
639  if (*p == 0x89) {
640  /* Mask off the 3-5 bits which indicate the destination register
641  if this is a ModR/M byte. */
642  int opcode_destreg_masked_out = *(p + 1) & (~0x38);
643 
644  /* Is this a ModR/M byte with Mod bits 01 and R/M bits 101
645  and three bits between them, e.g. 01nnn101
646  We're looking for a destination of ebp-disp8 or ebp-disp32. */
647  int immsize;
648  if (opcode_destreg_masked_out == 0x45)
649  immsize = 2;
650  else if (opcode_destreg_masked_out == 0x85)
651  immsize = 4;
652  else
653  return false;
654 
655  int offset = 0;
656  if (immsize == 2)
657  offset = (int8_t) * (p + 2);
658  if (immsize == 4)
659  offset = (uint32_t)extract_4(p + 2);
660  if (offset > 0)
661  return false;
662 
663  regno = ((*(p + 1) >> 3) & 0x7) | src_reg_prefix_bit;
664  rbp_offset = offset > 0 ? offset : -offset;
665  return true;
666  }
667  return false;
668 }
669 
670 // Returns true if this is a jmp instruction where we can't
671 // know the destination address statically.
672 //
673 // ff e0 jmpq *%rax
674 // ff e1 jmpq *%rcx
675 // ff 60 28 jmpq *0x28(%rax)
676 // ff 60 60 jmpq *0x60(%rax)
678  if (*m_cur_insn != 0xff)
679  return false;
680 
681  // The second byte is a ModR/M /4 byte, strip off the registers
682  uint8_t second_byte_sans_reg = *(m_cur_insn + 1) & ~7;
683 
684  // Don't handle 0x24 disp32, because the target address is
685  // knowable statically - pc_rel_branch_or_jump_p() will
686  // return the target address.
687 
688  // [reg]
689  if (second_byte_sans_reg == 0x20)
690  return true;
691 
692  // [reg]+disp8
693  if (second_byte_sans_reg == 0x60)
694  return true;
695 
696  // [reg]+disp32
697  if (second_byte_sans_reg == 0xa0)
698  return true;
699 
700  // reg
701  if (second_byte_sans_reg == 0xe0)
702  return true;
703 
704  // disp32
705  // jumps to an address stored in memory, the value can't be cached
706  // in an unwind plan.
707  if (second_byte_sans_reg == 0x24)
708  return true;
709 
710  // use SIB byte
711  // ff 24 fe jmpq *(%rsi,%rdi,8)
712  if (second_byte_sans_reg == 0x24)
713  return true;
714 
715  return false;
716 }
717 
718 // Detect branches to fixed pc-relative offsets.
719 // Returns the offset from the address of the next instruction
720 // that may be branch/jumped to.
721 //
722 // Cannot determine the offset of a JMP that jumps to the address in
723 // a register ("jmpq *%rax") or offset from a register value
724 // ("jmpq *0x28(%rax)"), this method will return false on those
725 // instructions.
726 //
727 // These instructions all end in either a relative 8/16/32 bit value
728 // depending on the instruction and the current execution mode of the
729 // inferior process. Once we know the size of the opcode instruction,
730 // we can use the total instruction length to determine the size of
731 // the relative offset without having to compute it correctly.
732 
734  const int instruction_length, int &offset)
735 {
736  int opcode_size = 0;
737 
738  uint8_t b1 = m_cur_insn[0];
739 
740  switch (b1) {
741  case 0x77: // JA/JNBE rel8
742  case 0x73: // JAE/JNB/JNC rel8
743  case 0x72: // JB/JC/JNAE rel8
744  case 0x76: // JBE/JNA rel8
745  case 0xe3: // JCXZ/JECXZ/JRCXZ rel8
746  case 0x74: // JE/JZ rel8
747  case 0x7f: // JG/JNLE rel8
748  case 0x7d: // JGE/JNL rel8
749  case 0x7c: // JL/JNGE rel8
750  case 0x7e: // JNG/JLE rel8
751  case 0x71: // JNO rel8
752  case 0x7b: // JNP/JPO rel8
753  case 0x79: // JNS rel8
754  case 0x75: // JNE/JNZ rel8
755  case 0x70: // JO rel8
756  case 0x7a: // JP/JPE rel8
757  case 0x78: // JS rel8
758  case 0xeb: // JMP rel8
759  case 0xe9: // JMP rel16/rel32
760  opcode_size = 1;
761  break;
762  default:
763  break;
764  }
765  if (b1 == 0x0f && opcode_size == 0) {
766  uint8_t b2 = m_cur_insn[1];
767  switch (b2) {
768  case 0x87: // JA/JNBE rel16/rel32
769  case 0x86: // JBE/JNA rel16/rel32
770  case 0x84: // JE/JZ rel16/rel32
771  case 0x8f: // JG/JNLE rel16/rel32
772  case 0x8d: // JNL/JGE rel16/rel32
773  case 0x8e: // JLE rel16/rel32
774  case 0x82: // JB/JC/JNAE rel16/rel32
775  case 0x83: // JAE/JNB/JNC rel16/rel32
776  case 0x85: // JNE/JNZ rel16/rel32
777  case 0x8c: // JL/JNGE rel16/rel32
778  case 0x81: // JNO rel16/rel32
779  case 0x8b: // JNP/JPO rel16/rel32
780  case 0x89: // JNS rel16/rel32
781  case 0x80: // JO rel16/rel32
782  case 0x8a: // JP rel16/rel32
783  case 0x88: // JS rel16/rel32
784  opcode_size = 2;
785  break;
786  default:
787  break;
788  }
789  }
790 
791  if (opcode_size == 0)
792  return false;
793 
794  offset = 0;
795  if (instruction_length - opcode_size == 1) {
796  int8_t rel8 = (int8_t) *(m_cur_insn + opcode_size);
797  offset = rel8;
798  } else if (instruction_length - opcode_size == 2) {
799  int16_t rel16 = extract_2_signed (m_cur_insn + opcode_size);
800  offset = rel16;
801  } else if (instruction_length - opcode_size == 4) {
802  int32_t rel32 = extract_4_signed (m_cur_insn + opcode_size);
803  offset = rel32;
804  } else {
805  return false;
806  }
807  return true;
808 }
809 
810 // Returns true if this instruction is a intra-function branch or jump -
811 // a branch/jump within the bounds of this same function.
812 // Cannot predict where a jump through a register value ("jmpq *%rax")
813 // will go, so it will return false on that instruction.
815  const addr_t current_func_text_offset,
816  const AddressRange &func_range,
817  const int instruction_length,
818  addr_t &target_insn_offset) {
819  int offset;
820  if (pc_rel_branch_or_jump_p (instruction_length, offset) && offset != 0) {
821  addr_t next_pc_value = current_func_text_offset + instruction_length;
822  if (offset < 0 && addr_t(-offset) > current_func_text_offset) {
823  // Branch target is before the start of this function
824  return false;
825  }
826  if (offset + next_pc_value > func_range.GetByteSize()) {
827  // Branch targets outside this function's bounds
828  return false;
829  }
830  // This instruction branches to target_insn_offset (byte offset into the function)
831  target_insn_offset = next_pc_value + offset;
832  return true;
833  }
834  return false;
835 }
836 
837 // Returns true if this instruction is a inter-function branch or jump - a
838 // branch/jump to another function.
839 // Cannot predict where a jump through a register value ("jmpq *%rax")
840 // will go, so it will return false on that instruction.
842  const addr_t current_func_text_offset,
843  const AddressRange &func_range,
844  const int instruction_length) {
845  int offset;
846  addr_t target_insn_offset;
847  if (pc_rel_branch_or_jump_p (instruction_length, offset)) {
848  return !local_branch_p(current_func_text_offset,func_range,instruction_length,target_insn_offset);
849  }
850  return false;
851 }
852 
853 // ret [0xc3] or [0xcb] or [0xc2 imm16] or [0xca imm16]
855  uint8_t *p = m_cur_insn;
856  return *p == 0xc3 || *p == 0xc2 || *p == 0xca || *p == 0xcb;
857 }
858 
860  uint16_t v = 0;
861  for (int i = 1; i >= 0; i--)
862  v = (v << 8) | b[i];
863  return v;
864 }
865 
867  int16_t v = 0;
868  for (int i = 1; i >= 0; i--)
869  v = (v << 8) | b[i];
870  return v;
871 }
872 
874  uint32_t v = 0;
875  for (int i = 3; i >= 0; i--)
876  v = (v << 8) | b[i];
877  return v;
878 }
879 
881  int32_t v = 0;
882  for (int i = 3; i >= 0; i--)
883  v = (v << 8) | b[i];
884  return v;
885 }
886 
887 
889  int &length,
890  uint32_t buffer_remaining_bytes) {
891 
892  uint32_t max_op_byte_size = std::min(buffer_remaining_bytes, m_arch.GetMaximumOpcodeByteSize());
893  llvm::SmallVector<uint8_t, 32> opcode_data;
894  opcode_data.resize(max_op_byte_size);
895 
896  char out_string[512];
897  const size_t inst_size =
898  ::LLVMDisasmInstruction(m_disasm_context, insn_p, max_op_byte_size, 0,
899  out_string, sizeof(out_string));
900 
901  length = inst_size;
902  return true;
903 }
904 
906  int machine_regno, uint32_t &lldb_regno) {
907  MachineRegnumToNameAndLLDBRegnum::iterator it = m_reg_map.find(machine_regno);
908  if (it != m_reg_map.end()) {
909  lldb_regno = it->second.lldb_regnum;
910  return true;
911  }
912  return false;
913 }
914 
916  uint8_t *data, size_t size, AddressRange &func_range,
917  UnwindPlan &unwind_plan) {
918  unwind_plan.Clear();
919 
920  if (data == nullptr || size == 0)
921  return false;
922 
924  return false;
925 
926  addr_t current_func_text_offset = 0;
927  int current_sp_bytes_offset_from_fa = 0;
928  bool is_aligned = false;
929  UnwindPlan::Row::RegisterLocation initial_regloc;
931 
932  unwind_plan.SetPlanValidAddressRange(func_range);
933  unwind_plan.SetRegisterKind(eRegisterKindLLDB);
934 
935  // At the start of the function, find the CFA by adding wordsize to the SP
936  // register
937  row->SetOffset(current_func_text_offset);
938  row->GetCFAValue().SetIsRegisterPlusOffset(m_lldb_sp_regnum, m_wordsize);
939 
940  // caller's stack pointer value before the call insn is the CFA address
941  initial_regloc.SetIsCFAPlusOffset(0);
942  row->SetRegisterInfo(m_lldb_sp_regnum, initial_regloc);
943 
944  // saved instruction pointer can be found at CFA - wordsize.
945  current_sp_bytes_offset_from_fa = m_wordsize;
946  initial_regloc.SetAtCFAPlusOffset(-current_sp_bytes_offset_from_fa);
947  row->SetRegisterInfo(m_lldb_ip_regnum, initial_regloc);
948 
949  unwind_plan.AppendRow(row);
950 
951  // Allocate a new Row, populate it with the existing Row contents.
952  UnwindPlan::Row *newrow = new UnwindPlan::Row;
953  *newrow = *row.get();
954  row.reset(newrow);
955 
956  // Track which registers have been saved so far in the prologue. If we see
957  // another push of that register, it's not part of the prologue. The register
958  // numbers used here are the machine register #'s (i386_register_numbers,
959  // x86_64_register_numbers).
960  std::vector<bool> saved_registers(32, false);
961 
962  // Once the prologue has completed we'll save a copy of the unwind
963  // instructions If there is an epilogue in the middle of the function, after
964  // that epilogue we'll reinstate the unwind setup -- we assume that some code
965  // path jumps over the mid-function epilogue
966 
967  UnwindPlan::RowSP prologue_completed_row; // copy of prologue row of CFI
968  int prologue_completed_sp_bytes_offset_from_cfa; // The sp value before the
969  // epilogue started executed
970  bool prologue_completed_is_aligned;
971  std::vector<bool> prologue_completed_saved_registers;
972 
973  while (current_func_text_offset < size) {
974  int stack_offset, insn_len;
975  int machine_regno; // register numbers masked directly out of instructions
976  uint32_t lldb_regno; // register numbers in lldb's eRegisterKindLLDB
977  // numbering scheme
978 
979  bool in_epilogue = false; // we're in the middle of an epilogue sequence
980  bool row_updated = false; // The UnwindPlan::Row 'row' has been updated
981 
982  m_cur_insn = data + current_func_text_offset;
983  if (!instruction_length(m_cur_insn, insn_len, size - current_func_text_offset)
984  || insn_len == 0
985  || insn_len > kMaxInstructionByteSize) {
986  // An unrecognized/junk instruction
987  break;
988  }
989 
990  auto &cfa_value = row->GetCFAValue();
991  auto &afa_value = row->GetAFAValue();
992  auto fa_value_ptr = is_aligned ? &afa_value : &cfa_value;
993 
994  if (mov_rsp_rbp_pattern_p()) {
995  if (fa_value_ptr->GetRegisterNumber() == m_lldb_sp_regnum) {
996  fa_value_ptr->SetIsRegisterPlusOffset(
997  m_lldb_fp_regnum, fa_value_ptr->GetOffset());
998  row_updated = true;
999  }
1000  }
1001 
1002  else if (mov_rsp_rbx_pattern_p()) {
1003  if (fa_value_ptr->GetRegisterNumber() == m_lldb_sp_regnum) {
1004  fa_value_ptr->SetIsRegisterPlusOffset(
1005  m_lldb_alt_fp_regnum, fa_value_ptr->GetOffset());
1006  row_updated = true;
1007  }
1008  }
1009 
1010  else if (and_rsp_pattern_p()) {
1011  current_sp_bytes_offset_from_fa = 0;
1012  afa_value.SetIsRegisterPlusOffset(
1013  m_lldb_sp_regnum, current_sp_bytes_offset_from_fa);
1014  fa_value_ptr = &afa_value;
1015  is_aligned = true;
1016  row_updated = true;
1017  }
1018 
1019  else if (mov_rbp_rsp_pattern_p()) {
1020  if (is_aligned && cfa_value.GetRegisterNumber() == m_lldb_fp_regnum)
1021  {
1022  is_aligned = false;
1023  fa_value_ptr = &cfa_value;
1024  afa_value.SetUnspecified();
1025  row_updated = true;
1026  }
1027  if (fa_value_ptr->GetRegisterNumber() == m_lldb_fp_regnum)
1028  current_sp_bytes_offset_from_fa = fa_value_ptr->GetOffset();
1029  }
1030 
1031  else if (mov_rbx_rsp_pattern_p()) {
1032  if (is_aligned && cfa_value.GetRegisterNumber() == m_lldb_alt_fp_regnum)
1033  {
1034  is_aligned = false;
1035  fa_value_ptr = &cfa_value;
1036  afa_value.SetUnspecified();
1037  row_updated = true;
1038  }
1039  if (fa_value_ptr->GetRegisterNumber() == m_lldb_alt_fp_regnum)
1040  current_sp_bytes_offset_from_fa = fa_value_ptr->GetOffset();
1041  }
1042 
1043  // This is the start() function (or a pthread equivalent), it starts with a
1044  // pushl $0x0 which puts the saved pc value of 0 on the stack. In this
1045  // case we want to pretend we didn't see a stack movement at all --
1046  // normally the saved pc value is already on the stack by the time the
1047  // function starts executing.
1048  else if (push_0_pattern_p()) {
1049  }
1050 
1051  else if (push_reg_p(machine_regno)) {
1052  current_sp_bytes_offset_from_fa += m_wordsize;
1053  // the PUSH instruction has moved the stack pointer - if the FA is set
1054  // in terms of the stack pointer, we need to add a new row of
1055  // instructions.
1056  if (fa_value_ptr->GetRegisterNumber() == m_lldb_sp_regnum) {
1057  fa_value_ptr->SetOffset(current_sp_bytes_offset_from_fa);
1058  row_updated = true;
1059  }
1060  // record where non-volatile (callee-saved, spilled) registers are saved
1061  // on the stack
1062  if (nonvolatile_reg_p(machine_regno) &&
1063  machine_regno_to_lldb_regno(machine_regno, lldb_regno) &&
1064  !saved_registers[machine_regno]) {
1066  if (is_aligned)
1067  regloc.SetAtAFAPlusOffset(-current_sp_bytes_offset_from_fa);
1068  else
1069  regloc.SetAtCFAPlusOffset(-current_sp_bytes_offset_from_fa);
1070  row->SetRegisterInfo(lldb_regno, regloc);
1071  saved_registers[machine_regno] = true;
1072  row_updated = true;
1073  }
1074  }
1075 
1076  else if (pop_reg_p(machine_regno)) {
1077  current_sp_bytes_offset_from_fa -= m_wordsize;
1078 
1079  if (nonvolatile_reg_p(machine_regno) &&
1080  machine_regno_to_lldb_regno(machine_regno, lldb_regno) &&
1081  saved_registers[machine_regno]) {
1082  saved_registers[machine_regno] = false;
1083  row->RemoveRegisterInfo(lldb_regno);
1084 
1085  if (lldb_regno == fa_value_ptr->GetRegisterNumber()) {
1086  fa_value_ptr->SetIsRegisterPlusOffset(
1087  m_lldb_sp_regnum, fa_value_ptr->GetOffset());
1088  }
1089 
1090  in_epilogue = true;
1091  row_updated = true;
1092  }
1093 
1094  // the POP instruction has moved the stack pointer - if the FA is set in
1095  // terms of the stack pointer, we need to add a new row of instructions.
1096  if (fa_value_ptr->GetRegisterNumber() == m_lldb_sp_regnum) {
1097  fa_value_ptr->SetIsRegisterPlusOffset(
1098  m_lldb_sp_regnum, current_sp_bytes_offset_from_fa);
1099  row_updated = true;
1100  }
1101  }
1102 
1103  else if (pop_misc_reg_p()) {
1104  current_sp_bytes_offset_from_fa -= m_wordsize;
1105  if (fa_value_ptr->GetRegisterNumber() == m_lldb_sp_regnum) {
1106  fa_value_ptr->SetIsRegisterPlusOffset(
1107  m_lldb_sp_regnum, current_sp_bytes_offset_from_fa);
1108  row_updated = true;
1109  }
1110  }
1111 
1112  // The LEAVE instruction moves the value from rbp into rsp and pops a value
1113  // off the stack into rbp (restoring the caller's rbp value). It is the
1114  // opposite of ENTER, or 'push rbp, mov rsp rbp'.
1115  else if (leave_pattern_p()) {
1116  if (saved_registers[m_machine_fp_regnum]) {
1117  saved_registers[m_machine_fp_regnum] = false;
1118  row->RemoveRegisterInfo(m_lldb_fp_regnum);
1119 
1120  row_updated = true;
1121  }
1122 
1123  if (is_aligned && cfa_value.GetRegisterNumber() == m_lldb_fp_regnum)
1124  {
1125  is_aligned = false;
1126  fa_value_ptr = &cfa_value;
1127  afa_value.SetUnspecified();
1128  row_updated = true;
1129  }
1130 
1131  if (fa_value_ptr->GetRegisterNumber() == m_lldb_fp_regnum)
1132  {
1133  fa_value_ptr->SetIsRegisterPlusOffset(
1134  m_lldb_sp_regnum, fa_value_ptr->GetOffset());
1135 
1136  current_sp_bytes_offset_from_fa = fa_value_ptr->GetOffset();
1137  }
1138 
1139  current_sp_bytes_offset_from_fa -= m_wordsize;
1140 
1141  if (fa_value_ptr->GetRegisterNumber() == m_lldb_sp_regnum) {
1142  fa_value_ptr->SetIsRegisterPlusOffset(
1143  m_lldb_sp_regnum, current_sp_bytes_offset_from_fa);
1144  row_updated = true;
1145  }
1146 
1147  in_epilogue = true;
1148  }
1149 
1150  else if (mov_reg_to_local_stack_frame_p(machine_regno, stack_offset) &&
1151  nonvolatile_reg_p(machine_regno) &&
1152  machine_regno_to_lldb_regno(machine_regno, lldb_regno) &&
1153  !saved_registers[machine_regno]) {
1154  saved_registers[machine_regno] = true;
1155 
1157 
1158  // stack_offset for 'movq %r15, -80(%rbp)' will be 80. In the Row, we
1159  // want to express this as the offset from the FA. If the frame base is
1160  // rbp (like the above instruction), the FA offset for rbp is probably
1161  // 16. So we want to say that the value is stored at the FA address -
1162  // 96.
1163  if (is_aligned)
1164  regloc.SetAtAFAPlusOffset(-(stack_offset + fa_value_ptr->GetOffset()));
1165  else
1166  regloc.SetAtCFAPlusOffset(-(stack_offset + fa_value_ptr->GetOffset()));
1167 
1168  row->SetRegisterInfo(lldb_regno, regloc);
1169 
1170  row_updated = true;
1171  }
1172 
1173  else if (sub_rsp_pattern_p(stack_offset)) {
1174  current_sp_bytes_offset_from_fa += stack_offset;
1175  if (fa_value_ptr->GetRegisterNumber() == m_lldb_sp_regnum) {
1176  fa_value_ptr->SetOffset(current_sp_bytes_offset_from_fa);
1177  row_updated = true;
1178  }
1179  }
1180 
1181  else if (add_rsp_pattern_p(stack_offset)) {
1182  current_sp_bytes_offset_from_fa -= stack_offset;
1183  if (fa_value_ptr->GetRegisterNumber() == m_lldb_sp_regnum) {
1184  fa_value_ptr->SetOffset(current_sp_bytes_offset_from_fa);
1185  row_updated = true;
1186  }
1187  in_epilogue = true;
1188  }
1189 
1190  else if (push_extended_pattern_p() || push_imm_pattern_p() ||
1191  push_misc_reg_p()) {
1192  current_sp_bytes_offset_from_fa += m_wordsize;
1193  if (fa_value_ptr->GetRegisterNumber() == m_lldb_sp_regnum) {
1194  fa_value_ptr->SetOffset(current_sp_bytes_offset_from_fa);
1195  row_updated = true;
1196  }
1197  }
1198 
1199  else if (lea_rsp_pattern_p(stack_offset)) {
1200  current_sp_bytes_offset_from_fa -= stack_offset;
1201  if (fa_value_ptr->GetRegisterNumber() == m_lldb_sp_regnum) {
1202  fa_value_ptr->SetOffset(current_sp_bytes_offset_from_fa);
1203  row_updated = true;
1204  }
1205  if (stack_offset > 0)
1206  in_epilogue = true;
1207  }
1208 
1209  else if (lea_rbp_rsp_pattern_p(stack_offset)) {
1210  if (is_aligned &&
1211  cfa_value.GetRegisterNumber() == m_lldb_fp_regnum) {
1212  is_aligned = false;
1213  fa_value_ptr = &cfa_value;
1214  afa_value.SetUnspecified();
1215  row_updated = true;
1216  }
1217  if (fa_value_ptr->GetRegisterNumber() == m_lldb_fp_regnum) {
1218  current_sp_bytes_offset_from_fa =
1219  fa_value_ptr->GetOffset() - stack_offset;
1220  }
1221  }
1222 
1223  else if (lea_rbx_rsp_pattern_p(stack_offset)) {
1224  if (is_aligned &&
1225  cfa_value.GetRegisterNumber() == m_lldb_alt_fp_regnum) {
1226  is_aligned = false;
1227  fa_value_ptr = &cfa_value;
1228  afa_value.SetUnspecified();
1229  row_updated = true;
1230  }
1231  if (fa_value_ptr->GetRegisterNumber() == m_lldb_alt_fp_regnum) {
1232  current_sp_bytes_offset_from_fa = fa_value_ptr->GetOffset() - stack_offset;
1233  }
1234  }
1235 
1236  else if (prologue_completed_row.get() &&
1237  (ret_pattern_p() ||
1238  non_local_branch_p (current_func_text_offset, func_range, insn_len) ||
1239  jmp_to_reg_p())) {
1240  // Check if the current instruction is the end of an epilogue sequence,
1241  // and if so, re-instate the prologue-completed unwind state.
1242 
1243  // The current instruction is a branch/jump outside this function,
1244  // a ret, or a jump through a register value which we cannot
1245  // determine the effcts of. Verify that the stack frame state
1246  // has been unwound to the same as it was at function entry to avoid
1247  // mis-identifying a JMP instruction as an epilogue.
1249  if (row->GetRegisterInfo(m_lldb_sp_regnum, sp) &&
1250  row->GetRegisterInfo(m_lldb_ip_regnum, pc)) {
1251  // Any ret instruction variant is definitely indicative of an
1252  // epilogue; for other insn patterns verify that we're back to
1253  // the original unwind state.
1254  if (ret_pattern_p() ||
1255  (sp.IsCFAPlusOffset() && sp.GetOffset() == 0 &&
1256  pc.IsAtCFAPlusOffset() && pc.GetOffset() == -m_wordsize)) {
1257  // Reinstate the saved prologue setup for any instructions that come
1258  // after the epilogue
1259 
1260  UnwindPlan::Row *newrow = new UnwindPlan::Row;
1261  *newrow = *prologue_completed_row.get();
1262  row.reset(newrow);
1263  current_sp_bytes_offset_from_fa =
1264  prologue_completed_sp_bytes_offset_from_cfa;
1265  is_aligned = prologue_completed_is_aligned;
1266 
1267  saved_registers.clear();
1268  saved_registers.resize(prologue_completed_saved_registers.size(), false);
1269  for (size_t i = 0; i < prologue_completed_saved_registers.size(); ++i) {
1270  saved_registers[i] = prologue_completed_saved_registers[i];
1271  }
1272 
1273  in_epilogue = true;
1274  row_updated = true;
1275  }
1276  }
1277  }
1278 
1279  // call next instruction
1280  // call 0
1281  // => pop %ebx
1282  // This is used in i386 programs to get the PIC base address for finding
1283  // global data
1284  else if (call_next_insn_pattern_p()) {
1285  current_sp_bytes_offset_from_fa += m_wordsize;
1286  if (fa_value_ptr->GetRegisterNumber() == m_lldb_sp_regnum) {
1287  fa_value_ptr->SetOffset(current_sp_bytes_offset_from_fa);
1288  row_updated = true;
1289  }
1290  }
1291 
1292  if (row_updated) {
1293  if (current_func_text_offset + insn_len < size) {
1294  row->SetOffset(current_func_text_offset + insn_len);
1295  unwind_plan.AppendRow(row);
1296  // Allocate a new Row, populate it with the existing Row contents.
1297  newrow = new UnwindPlan::Row;
1298  *newrow = *row.get();
1299  row.reset(newrow);
1300  }
1301  }
1302 
1303  if (!in_epilogue && row_updated) {
1304  // If we're not in an epilogue sequence, save the updated Row
1305  UnwindPlan::Row *newrow = new UnwindPlan::Row;
1306  *newrow = *row.get();
1307  prologue_completed_row.reset(newrow);
1308 
1309  prologue_completed_saved_registers.clear();
1310  prologue_completed_saved_registers.resize(saved_registers.size(), false);
1311  for (size_t i = 0; i < saved_registers.size(); ++i) {
1312  prologue_completed_saved_registers[i] = saved_registers[i];
1313  }
1314  }
1315 
1316  // We may change the sp value without adding a new Row necessarily -- keep
1317  // track of it either way.
1318  if (!in_epilogue) {
1319  prologue_completed_sp_bytes_offset_from_cfa =
1320  current_sp_bytes_offset_from_fa;
1321  prologue_completed_is_aligned = is_aligned;
1322  }
1323 
1324  m_cur_insn = m_cur_insn + insn_len;
1325  current_func_text_offset += insn_len;
1326  }
1327 
1328  unwind_plan.SetSourceName("assembly insn profiling");
1329  unwind_plan.SetSourcedFromCompiler(eLazyBoolNo);
1332 
1333  return true;
1334 }
1335 
1337  uint8_t *data, size_t size, AddressRange &func_range,
1338  UnwindPlan &unwind_plan, RegisterContextSP &reg_ctx) {
1339  Address addr_start = func_range.GetBaseAddress();
1340  if (!addr_start.IsValid())
1341  return false;
1342 
1343  // We either need a live RegisterContext, or we need the UnwindPlan to
1344  // already be in the lldb register numbering scheme.
1345  if (reg_ctx.get() == nullptr &&
1346  unwind_plan.GetRegisterKind() != eRegisterKindLLDB)
1347  return false;
1348 
1349  // Is original unwind_plan valid?
1350  // unwind_plan should have at least one row which is ABI-default (CFA
1351  // register is sp), and another row in mid-function.
1352  if (unwind_plan.GetRowCount() < 2)
1353  return false;
1354 
1355  UnwindPlan::RowSP first_row = unwind_plan.GetRowAtIndex(0);
1356  if (first_row->GetOffset() != 0)
1357  return false;
1358  uint32_t cfa_reg = first_row->GetCFAValue().GetRegisterNumber();
1359  if (unwind_plan.GetRegisterKind() != eRegisterKindLLDB) {
1360  cfa_reg = reg_ctx->ConvertRegisterKindToRegisterNumber(
1361  unwind_plan.GetRegisterKind(),
1362  first_row->GetCFAValue().GetRegisterNumber());
1363  }
1364  if (cfa_reg != m_lldb_sp_regnum ||
1365  first_row->GetCFAValue().GetOffset() != m_wordsize)
1366  return false;
1367 
1368  UnwindPlan::RowSP original_last_row = unwind_plan.GetRowForFunctionOffset(-1);
1369 
1370  size_t offset = 0;
1371  int row_id = 1;
1372  bool unwind_plan_updated = false;
1373  UnwindPlan::RowSP row(new UnwindPlan::Row(*first_row));
1374 
1375  // After a mid-function epilogue we will need to re-insert the original
1376  // unwind rules so unwinds work for the remainder of the function. These
1377  // aren't common with clang/gcc on x86 but it is possible.
1378  bool reinstate_unwind_state = false;
1379 
1380  while (offset < size) {
1381  m_cur_insn = data + offset;
1382  int insn_len;
1383  if (!instruction_length(m_cur_insn, insn_len, size - offset) ||
1384  insn_len == 0 || insn_len > kMaxInstructionByteSize) {
1385  // An unrecognized/junk instruction.
1386  break;
1387  }
1388 
1389  // Advance offsets.
1390  offset += insn_len;
1391 
1392  // offset is pointing beyond the bounds of the function; stop looping.
1393  if (offset >= size)
1394  continue;
1395 
1396  if (reinstate_unwind_state) {
1397  UnwindPlan::RowSP new_row(new UnwindPlan::Row());
1398  *new_row = *original_last_row;
1399  new_row->SetOffset(offset);
1400  unwind_plan.AppendRow(new_row);
1401  row = std::make_shared<UnwindPlan::Row>();
1402  *row = *new_row;
1403  reinstate_unwind_state = false;
1404  unwind_plan_updated = true;
1405  continue;
1406  }
1407 
1408  // If we already have one row for this instruction, we can continue.
1409  while (row_id < unwind_plan.GetRowCount() &&
1410  unwind_plan.GetRowAtIndex(row_id)->GetOffset() <= offset) {
1411  row_id++;
1412  }
1413  UnwindPlan::RowSP original_row = unwind_plan.GetRowAtIndex(row_id - 1);
1414  if (original_row->GetOffset() == offset) {
1415  *row = *original_row;
1416  continue;
1417  }
1418 
1419  if (row_id == 0) {
1420  // If we are here, compiler didn't generate CFI for prologue. This won't
1421  // happen to GCC or clang. In this case, bail out directly.
1422  return false;
1423  }
1424 
1425  // Inspect the instruction to check if we need a new row for it.
1426  cfa_reg = row->GetCFAValue().GetRegisterNumber();
1427  if (unwind_plan.GetRegisterKind() != eRegisterKindLLDB) {
1428  cfa_reg = reg_ctx->ConvertRegisterKindToRegisterNumber(
1429  unwind_plan.GetRegisterKind(),
1430  row->GetCFAValue().GetRegisterNumber());
1431  }
1432  if (cfa_reg == m_lldb_sp_regnum) {
1433  // CFA register is sp.
1434 
1435  // call next instruction
1436  // call 0
1437  // => pop %ebx
1438  if (call_next_insn_pattern_p()) {
1439  row->SetOffset(offset);
1440  row->GetCFAValue().IncOffset(m_wordsize);
1441 
1442  UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row));
1443  unwind_plan.InsertRow(new_row);
1444  unwind_plan_updated = true;
1445  continue;
1446  }
1447 
1448  // push/pop register
1449  int regno;
1450  if (push_reg_p(regno)) {
1451  row->SetOffset(offset);
1452  row->GetCFAValue().IncOffset(m_wordsize);
1453 
1454  UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row));
1455  unwind_plan.InsertRow(new_row);
1456  unwind_plan_updated = true;
1457  continue;
1458  }
1459  if (pop_reg_p(regno)) {
1460  // Technically, this might be a nonvolatile register recover in
1461  // epilogue. We should reset RegisterInfo for the register. But in
1462  // practice, previous rule for the register is still valid... So we
1463  // ignore this case.
1464 
1465  row->SetOffset(offset);
1466  row->GetCFAValue().IncOffset(-m_wordsize);
1467 
1468  UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row));
1469  unwind_plan.InsertRow(new_row);
1470  unwind_plan_updated = true;
1471  continue;
1472  }
1473 
1474  if (pop_misc_reg_p()) {
1475  row->SetOffset(offset);
1476  row->GetCFAValue().IncOffset(-m_wordsize);
1477 
1478  UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row));
1479  unwind_plan.InsertRow(new_row);
1480  unwind_plan_updated = true;
1481  continue;
1482  }
1483 
1484  // push imm
1485  if (push_imm_pattern_p()) {
1486  row->SetOffset(offset);
1487  row->GetCFAValue().IncOffset(m_wordsize);
1488  UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row));
1489  unwind_plan.InsertRow(new_row);
1490  unwind_plan_updated = true;
1491  continue;
1492  }
1493 
1494  // push extended
1496  row->SetOffset(offset);
1497  row->GetCFAValue().IncOffset(m_wordsize);
1498  UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row));
1499  unwind_plan.InsertRow(new_row);
1500  unwind_plan_updated = true;
1501  continue;
1502  }
1503 
1504  // add/sub %rsp/%esp
1505  int amount;
1506  if (add_rsp_pattern_p(amount)) {
1507  row->SetOffset(offset);
1508  row->GetCFAValue().IncOffset(-amount);
1509 
1510  UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row));
1511  unwind_plan.InsertRow(new_row);
1512  unwind_plan_updated = true;
1513  continue;
1514  }
1515  if (sub_rsp_pattern_p(amount)) {
1516  row->SetOffset(offset);
1517  row->GetCFAValue().IncOffset(amount);
1518 
1519  UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row));
1520  unwind_plan.InsertRow(new_row);
1521  unwind_plan_updated = true;
1522  continue;
1523  }
1524 
1525  // lea %rsp, [%rsp + $offset]
1526  if (lea_rsp_pattern_p(amount)) {
1527  row->SetOffset(offset);
1528  row->GetCFAValue().IncOffset(-amount);
1529 
1530  UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row));
1531  unwind_plan.InsertRow(new_row);
1532  unwind_plan_updated = true;
1533  continue;
1534  }
1535 
1536  if (ret_pattern_p()) {
1537  reinstate_unwind_state = true;
1538  continue;
1539  }
1540  } else if (cfa_reg == m_lldb_fp_regnum) {
1541  // CFA register is fp.
1542 
1543  // The only case we care about is epilogue:
1544  // [0x5d] pop %rbp/%ebp
1545  // => [0xc3] ret
1546  if (pop_rbp_pattern_p() || leave_pattern_p()) {
1547  m_cur_insn++;
1548  if (ret_pattern_p()) {
1549  row->SetOffset(offset);
1550  row->GetCFAValue().SetIsRegisterPlusOffset(
1551  first_row->GetCFAValue().GetRegisterNumber(), m_wordsize);
1552 
1553  UnwindPlan::RowSP new_row(new UnwindPlan::Row(*row));
1554  unwind_plan.InsertRow(new_row);
1555  unwind_plan_updated = true;
1556  reinstate_unwind_state = true;
1557  continue;
1558  }
1559  }
1560  } else {
1561  // CFA register is not sp or fp.
1562 
1563  // This must be hand-written assembly.
1564  // Just trust eh_frame and assume we have finished.
1565  break;
1566  }
1567  }
1568 
1569  unwind_plan.SetPlanValidAddressRange(func_range);
1570  if (unwind_plan_updated) {
1571  std::string unwind_plan_source(unwind_plan.GetSourceName().AsCString());
1572  unwind_plan_source += " plus augmentation from assembly parsing";
1573  unwind_plan.SetSourceName(unwind_plan_source.c_str());
1574  unwind_plan.SetSourcedFromCompiler(eLazyBoolNo);
1576  }
1577  return true;
1578 }
1579 
1581  uint8_t *data, size_t size, size_t &offset) {
1582  offset = 0;
1583 
1585  return false;
1586 
1587  while (offset < size) {
1588  int regno;
1589  int insn_len;
1590  int scratch;
1591 
1592  m_cur_insn = data + offset;
1593  if (!instruction_length(m_cur_insn, insn_len, size - offset)
1594  || insn_len > kMaxInstructionByteSize
1595  || insn_len == 0) {
1596  // An error parsing the instruction, i.e. probably data/garbage - stop
1597  // scanning
1598  break;
1599  }
1600 
1602  sub_rsp_pattern_p(scratch) || push_reg_p(regno) ||
1603  mov_reg_to_local_stack_frame_p(regno, scratch) ||
1604  (lea_rsp_pattern_p(scratch) && offset == 0)) {
1605  offset += insn_len;
1606  continue;
1607  }
1608  //
1609  // Unknown non-prologue instruction - stop scanning
1610  break;
1611  }
1612 
1613  return true;
1614 }
bool non_local_branch_p(const lldb::addr_t current_func_text_offset, const lldb_private::AddressRange &func_range, const int instruction_length)
bool GetNonCallSiteUnwindPlanFromAssembly(uint8_t *data, size_t size, lldb_private::AddressRange &func_range, lldb_private::UnwindPlan &unwind_plan)
Create an UnwindPlan for a "non-call site" stack frame situation.
lldb::RegisterKind GetRegisterKind() const
Definition: UnwindPlan.h:418
One of the two initialize methods that can be called on this object; they must be called before any o...
void SetSourceName(const char *)
Definition: UnwindPlan.cpp:546
A class that represents a running process on the host machine.
lldb::addr_t GetByteSize() const
Get accessor for the byte size of this range.
Definition: AddressRange.h:222
const char * AsCString(const char *value_if_empty=nullptr) const
Get the string value as a C string.
Definition: ConstString.h:223
void InsertRow(const RowSP &row_sp, bool replace_existing=false)
Definition: UnwindPlan.cpp:365
bool pc_rel_branch_or_jump_p(const int instruction_length, int &offset)
bool FindFirstNonPrologueInstruction(uint8_t *data, size_t size, size_t &offset)
An architecture specification class.
Definition: ArchSpec.h:33
const UnwindPlan::RowSP GetRowAtIndex(uint32_t idx) const
Definition: UnwindPlan.cpp:402
bool machine_regno_to_lldb_regno(int machine_regno, uint32_t &lldb_regno)
llvm::Triple & GetTriple()
Architecture triple accessor.
Definition: ArchSpec.h:434
void SetPlanValidAddressRange(const AddressRange &range)
Definition: UnwindPlan.cpp:426
lldb&#39;s internal register numbers
std::shared_ptr< Row > RowSP
Definition: UnwindPlan.h:377
void AppendRow(const RowSP &row_sp)
Definition: UnwindPlan.cpp:357
#define REX_W_SRCREG(opcode)
#define REX_W_DSTREG(opcode)
bool instruction_length(uint8_t *insn, int &length, uint32_t buffer_remaining_bytes)
bool mov_reg_to_local_stack_frame_p(int &regno, int &rbp_offset)
void SetRegisterKind(lldb::RegisterKind kind)
Definition: UnwindPlan.h:420
bool IsValid() const
Check if the object state is valid.
Definition: Address.h:332
A section + offset based address class.
Definition: Address.h:59
bool AugmentUnwindPlanFromCallSite(uint8_t *data, size_t size, lldb_private::AddressRange &func_range, lldb_private::UnwindPlan &unwind_plan, lldb::RegisterContextSP &reg_ctx)
Take an existing UnwindPlan, probably from eh_frame which may be missing description of the epilogue ...
x86AssemblyInspectionEngine(const lldb_private::ArchSpec &arch)
default ctor
void SetUnwindPlanValidAtAllInstructions(lldb_private::LazyBool valid_at_all_insn)
Definition: UnwindPlan.h:473
lldb_private::ConstString GetSourceName() const
Definition: UnwindPlan.cpp:550
uint64_t addr_t
Definition: lldb-types.h:83
void Initialize(lldb::RegisterContextSP &reg_ctx)
One of the two initialize methods that can be called on this object; they must be called before any o...
Definition: SBAddress.h:15
Address & GetBaseAddress()
Get accessor for the base address of the range.
Definition: AddressRange.h:210
uint32_t GetMaximumOpcodeByteSize() const
Definition: ArchSpec.cpp:961
UnwindPlan::RowSP GetRowForFunctionOffset(int offset) const
Definition: UnwindPlan.cpp:380
A section + offset based address range class.
Definition: AddressRange.h:25
#define REX_W_PREFIX_P(opcode)
void SetSourcedFromCompiler(lldb_private::LazyBool from_compiler)
Definition: UnwindPlan.h:461
bool local_branch_p(const lldb::addr_t current_func_text_offset, const lldb_private::AddressRange &func_range, const int instruction_length, lldb::addr_t &target_insn_offset)
llvm::Triple::ArchType GetMachine() const
Returns a machine family for the current architecture.
Definition: ArchSpec.cpp:714
#define LLDB_INVALID_REGNUM
Definition: lldb-defines.h:90
void SetUnwindPlanForSignalTrap(lldb_private::LazyBool is_for_signal_trap)
Definition: UnwindPlan.h:485