LLDB  mainline
ObjectFileMachO.cpp
Go to the documentation of this file.
1 //===-- ObjectFileMachO.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ADT/StringRef.h"
10 
15 #include "lldb/Core/Debugger.h"
16 #include "lldb/Core/FileSpecList.h"
17 #include "lldb/Core/Module.h"
18 #include "lldb/Core/ModuleSpec.h"
20 #include "lldb/Core/Progress.h"
21 #include "lldb/Core/Section.h"
22 #include "lldb/Core/StreamFile.h"
23 #include "lldb/Host/Host.h"
26 #include "lldb/Symbol/ObjectFile.h"
29 #include "lldb/Target/Platform.h"
30 #include "lldb/Target/Process.h"
32 #include "lldb/Target/Target.h"
33 #include "lldb/Target/Thread.h"
34 #include "lldb/Target/ThreadList.h"
35 #include "lldb/Utility/ArchSpec.h"
37 #include "lldb/Utility/FileSpec.h"
38 #include "lldb/Utility/Log.h"
39 #include "lldb/Utility/RangeMap.h"
41 #include "lldb/Utility/Status.h"
43 #include "lldb/Utility/Timer.h"
44 #include "lldb/Utility/UUID.h"
45 
46 #include "lldb/Host/SafeMachO.h"
47 
48 #include "llvm/ADT/DenseSet.h"
49 #include "llvm/Support/FormatVariadic.h"
50 #include "llvm/Support/MemoryBuffer.h"
51 
52 #include "ObjectFileMachO.h"
53 
54 #if defined(__APPLE__)
55 #include <TargetConditionals.h>
56 // GetLLDBSharedCacheUUID() needs to call dlsym()
57 #include <dlfcn.h>
58 #endif
59 
60 #ifndef __APPLE__
62 #else
63 #include <uuid/uuid.h>
64 #endif
65 
66 #include <bitset>
67 #include <memory>
68 
69 // Unfortunately the signpost header pulls in the system MachO header, too.
70 #ifdef CPU_TYPE_ARM
71 #undef CPU_TYPE_ARM
72 #endif
73 #ifdef CPU_TYPE_ARM64
74 #undef CPU_TYPE_ARM64
75 #endif
76 #ifdef CPU_TYPE_ARM64_32
77 #undef CPU_TYPE_ARM64_32
78 #endif
79 #ifdef CPU_TYPE_I386
80 #undef CPU_TYPE_I386
81 #endif
82 #ifdef CPU_TYPE_X86_64
83 #undef CPU_TYPE_X86_64
84 #endif
85 #ifdef MH_DYLINKER
86 #undef MH_DYLINKER
87 #endif
88 #ifdef MH_OBJECT
89 #undef MH_OBJECT
90 #endif
91 #ifdef LC_VERSION_MIN_MACOSX
92 #undef LC_VERSION_MIN_MACOSX
93 #endif
94 #ifdef LC_VERSION_MIN_IPHONEOS
95 #undef LC_VERSION_MIN_IPHONEOS
96 #endif
97 #ifdef LC_VERSION_MIN_TVOS
98 #undef LC_VERSION_MIN_TVOS
99 #endif
100 #ifdef LC_VERSION_MIN_WATCHOS
101 #undef LC_VERSION_MIN_WATCHOS
102 #endif
103 #ifdef LC_BUILD_VERSION
104 #undef LC_BUILD_VERSION
105 #endif
106 #ifdef PLATFORM_MACOS
107 #undef PLATFORM_MACOS
108 #endif
109 #ifdef PLATFORM_MACCATALYST
110 #undef PLATFORM_MACCATALYST
111 #endif
112 #ifdef PLATFORM_IOS
113 #undef PLATFORM_IOS
114 #endif
115 #ifdef PLATFORM_IOSSIMULATOR
116 #undef PLATFORM_IOSSIMULATOR
117 #endif
118 #ifdef PLATFORM_TVOS
119 #undef PLATFORM_TVOS
120 #endif
121 #ifdef PLATFORM_TVOSSIMULATOR
122 #undef PLATFORM_TVOSSIMULATOR
123 #endif
124 #ifdef PLATFORM_WATCHOS
125 #undef PLATFORM_WATCHOS
126 #endif
127 #ifdef PLATFORM_WATCHOSSIMULATOR
128 #undef PLATFORM_WATCHOSSIMULATOR
129 #endif
130 
131 #define THUMB_ADDRESS_BIT_MASK 0xfffffffffffffffeull
132 using namespace lldb;
133 using namespace lldb_private;
134 using namespace llvm::MachO;
135 
137 
138 // Some structure definitions needed for parsing the dyld shared cache files
139 // found on iOS devices.
140 
142  char magic[16]; // e.g. "dyld_v0 i386", "dyld_v1 armv7", etc.
143  uint32_t mappingOffset; // file offset to first dyld_cache_mapping_info
144  uint32_t mappingCount; // number of dyld_cache_mapping_info entries
147  uint64_t dyldBaseAddress;
150  uint64_t slideInfoOffset;
151  uint64_t slideInfoSize;
154  uint8_t uuid[16]; // v1 and above, also recorded in dyld_all_image_infos v13
155  // and later
156 };
157 
159  uint64_t address;
160  uint64_t size;
161  uint64_t fileOffset;
164 };
165 
173 };
178 };
179 
180 static void PrintRegisterValue(RegisterContext *reg_ctx, const char *name,
181  const char *alt_name, size_t reg_byte_size,
182  Stream &data) {
183  const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name);
184  if (reg_info == nullptr)
185  reg_info = reg_ctx->GetRegisterInfoByName(alt_name);
186  if (reg_info) {
187  lldb_private::RegisterValue reg_value;
188  if (reg_ctx->ReadRegister(reg_info, reg_value)) {
189  if (reg_info->byte_size >= reg_byte_size)
190  data.Write(reg_value.GetBytes(), reg_byte_size);
191  else {
192  data.Write(reg_value.GetBytes(), reg_info->byte_size);
193  for (size_t i = 0, n = reg_byte_size - reg_info->byte_size; i < n; ++i)
194  data.PutChar(0);
195  }
196  return;
197  }
198  }
199  // Just write zeros if all else fails
200  for (size_t i = 0; i < reg_byte_size; ++i)
201  data.PutChar(0);
202 }
203 
205 public:
207  const DataExtractor &data)
208  : RegisterContextDarwin_x86_64(thread, 0) {
209  SetRegisterDataFrom_LC_THREAD(data);
210  }
211 
212  void InvalidateAllRegisters() override {
213  // Do nothing... registers are always valid...
214  }
215 
217  lldb::offset_t offset = 0;
218  SetError(GPRRegSet, Read, -1);
219  SetError(FPURegSet, Read, -1);
220  SetError(EXCRegSet, Read, -1);
221  bool done = false;
222 
223  while (!done) {
224  int flavor = data.GetU32(&offset);
225  if (flavor == 0)
226  done = true;
227  else {
228  uint32_t i;
229  uint32_t count = data.GetU32(&offset);
230  switch (flavor) {
231  case GPRRegSet:
232  for (i = 0; i < count; ++i)
233  (&gpr.rax)[i] = data.GetU64(&offset);
234  SetError(GPRRegSet, Read, 0);
235  done = true;
236 
237  break;
238  case FPURegSet:
239  // TODO: fill in FPU regs....
240  // SetError (FPURegSet, Read, -1);
241  done = true;
242 
243  break;
244  case EXCRegSet:
245  exc.trapno = data.GetU32(&offset);
246  exc.err = data.GetU32(&offset);
247  exc.faultvaddr = data.GetU64(&offset);
248  SetError(EXCRegSet, Read, 0);
249  done = true;
250  break;
251  case 7:
252  case 8:
253  case 9:
254  // fancy flavors that encapsulate of the above flavors...
255  break;
256 
257  default:
258  done = true;
259  break;
260  }
261  }
262  }
263  }
264 
265  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
266  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
267  if (reg_ctx_sp) {
268  RegisterContext *reg_ctx = reg_ctx_sp.get();
269 
270  data.PutHex32(GPRRegSet); // Flavor
271  data.PutHex32(GPRWordCount);
272  PrintRegisterValue(reg_ctx, "rax", nullptr, 8, data);
273  PrintRegisterValue(reg_ctx, "rbx", nullptr, 8, data);
274  PrintRegisterValue(reg_ctx, "rcx", nullptr, 8, data);
275  PrintRegisterValue(reg_ctx, "rdx", nullptr, 8, data);
276  PrintRegisterValue(reg_ctx, "rdi", nullptr, 8, data);
277  PrintRegisterValue(reg_ctx, "rsi", nullptr, 8, data);
278  PrintRegisterValue(reg_ctx, "rbp", nullptr, 8, data);
279  PrintRegisterValue(reg_ctx, "rsp", nullptr, 8, data);
280  PrintRegisterValue(reg_ctx, "r8", nullptr, 8, data);
281  PrintRegisterValue(reg_ctx, "r9", nullptr, 8, data);
282  PrintRegisterValue(reg_ctx, "r10", nullptr, 8, data);
283  PrintRegisterValue(reg_ctx, "r11", nullptr, 8, data);
284  PrintRegisterValue(reg_ctx, "r12", nullptr, 8, data);
285  PrintRegisterValue(reg_ctx, "r13", nullptr, 8, data);
286  PrintRegisterValue(reg_ctx, "r14", nullptr, 8, data);
287  PrintRegisterValue(reg_ctx, "r15", nullptr, 8, data);
288  PrintRegisterValue(reg_ctx, "rip", nullptr, 8, data);
289  PrintRegisterValue(reg_ctx, "rflags", nullptr, 8, data);
290  PrintRegisterValue(reg_ctx, "cs", nullptr, 8, data);
291  PrintRegisterValue(reg_ctx, "fs", nullptr, 8, data);
292  PrintRegisterValue(reg_ctx, "gs", nullptr, 8, data);
293 
294  // // Write out the FPU registers
295  // const size_t fpu_byte_size = sizeof(FPU);
296  // size_t bytes_written = 0;
297  // data.PutHex32 (FPURegSet);
298  // data.PutHex32 (fpu_byte_size/sizeof(uint64_t));
299  // bytes_written += data.PutHex32(0); // uint32_t pad[0]
300  // bytes_written += data.PutHex32(0); // uint32_t pad[1]
301  // bytes_written += WriteRegister (reg_ctx, "fcw", "fctrl", 2,
302  // data); // uint16_t fcw; // "fctrl"
303  // bytes_written += WriteRegister (reg_ctx, "fsw" , "fstat", 2,
304  // data); // uint16_t fsw; // "fstat"
305  // bytes_written += WriteRegister (reg_ctx, "ftw" , "ftag", 1,
306  // data); // uint8_t ftw; // "ftag"
307  // bytes_written += data.PutHex8 (0); // uint8_t pad1;
308  // bytes_written += WriteRegister (reg_ctx, "fop" , NULL, 2,
309  // data); // uint16_t fop; // "fop"
310  // bytes_written += WriteRegister (reg_ctx, "fioff", "ip", 4,
311  // data); // uint32_t ip; // "fioff"
312  // bytes_written += WriteRegister (reg_ctx, "fiseg", NULL, 2,
313  // data); // uint16_t cs; // "fiseg"
314  // bytes_written += data.PutHex16 (0); // uint16_t pad2;
315  // bytes_written += WriteRegister (reg_ctx, "dp", "fooff" , 4,
316  // data); // uint32_t dp; // "fooff"
317  // bytes_written += WriteRegister (reg_ctx, "foseg", NULL, 2,
318  // data); // uint16_t ds; // "foseg"
319  // bytes_written += data.PutHex16 (0); // uint16_t pad3;
320  // bytes_written += WriteRegister (reg_ctx, "mxcsr", NULL, 4,
321  // data); // uint32_t mxcsr;
322  // bytes_written += WriteRegister (reg_ctx, "mxcsrmask", NULL,
323  // 4, data);// uint32_t mxcsrmask;
324  // bytes_written += WriteRegister (reg_ctx, "stmm0", NULL,
325  // sizeof(MMSReg), data);
326  // bytes_written += WriteRegister (reg_ctx, "stmm1", NULL,
327  // sizeof(MMSReg), data);
328  // bytes_written += WriteRegister (reg_ctx, "stmm2", NULL,
329  // sizeof(MMSReg), data);
330  // bytes_written += WriteRegister (reg_ctx, "stmm3", NULL,
331  // sizeof(MMSReg), data);
332  // bytes_written += WriteRegister (reg_ctx, "stmm4", NULL,
333  // sizeof(MMSReg), data);
334  // bytes_written += WriteRegister (reg_ctx, "stmm5", NULL,
335  // sizeof(MMSReg), data);
336  // bytes_written += WriteRegister (reg_ctx, "stmm6", NULL,
337  // sizeof(MMSReg), data);
338  // bytes_written += WriteRegister (reg_ctx, "stmm7", NULL,
339  // sizeof(MMSReg), data);
340  // bytes_written += WriteRegister (reg_ctx, "xmm0" , NULL,
341  // sizeof(XMMReg), data);
342  // bytes_written += WriteRegister (reg_ctx, "xmm1" , NULL,
343  // sizeof(XMMReg), data);
344  // bytes_written += WriteRegister (reg_ctx, "xmm2" , NULL,
345  // sizeof(XMMReg), data);
346  // bytes_written += WriteRegister (reg_ctx, "xmm3" , NULL,
347  // sizeof(XMMReg), data);
348  // bytes_written += WriteRegister (reg_ctx, "xmm4" , NULL,
349  // sizeof(XMMReg), data);
350  // bytes_written += WriteRegister (reg_ctx, "xmm5" , NULL,
351  // sizeof(XMMReg), data);
352  // bytes_written += WriteRegister (reg_ctx, "xmm6" , NULL,
353  // sizeof(XMMReg), data);
354  // bytes_written += WriteRegister (reg_ctx, "xmm7" , NULL,
355  // sizeof(XMMReg), data);
356  // bytes_written += WriteRegister (reg_ctx, "xmm8" , NULL,
357  // sizeof(XMMReg), data);
358  // bytes_written += WriteRegister (reg_ctx, "xmm9" , NULL,
359  // sizeof(XMMReg), data);
360  // bytes_written += WriteRegister (reg_ctx, "xmm10", NULL,
361  // sizeof(XMMReg), data);
362  // bytes_written += WriteRegister (reg_ctx, "xmm11", NULL,
363  // sizeof(XMMReg), data);
364  // bytes_written += WriteRegister (reg_ctx, "xmm12", NULL,
365  // sizeof(XMMReg), data);
366  // bytes_written += WriteRegister (reg_ctx, "xmm13", NULL,
367  // sizeof(XMMReg), data);
368  // bytes_written += WriteRegister (reg_ctx, "xmm14", NULL,
369  // sizeof(XMMReg), data);
370  // bytes_written += WriteRegister (reg_ctx, "xmm15", NULL,
371  // sizeof(XMMReg), data);
372  //
373  // // Fill rest with zeros
374  // for (size_t i=0, n = fpu_byte_size - bytes_written; i<n; ++
375  // i)
376  // data.PutChar(0);
377 
378  // Write out the EXC registers
379  data.PutHex32(EXCRegSet);
380  data.PutHex32(EXCWordCount);
381  PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data);
382  PrintRegisterValue(reg_ctx, "err", nullptr, 4, data);
383  PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 8, data);
384  return true;
385  }
386  return false;
387  }
388 
389 protected:
390  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; }
391 
392  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; }
393 
394  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; }
395 
396  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
397  return 0;
398  }
399 
400  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
401  return 0;
402  }
403 
404  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
405  return 0;
406  }
407 };
408 
410 public:
412  const DataExtractor &data)
413  : RegisterContextDarwin_i386(thread, 0) {
414  SetRegisterDataFrom_LC_THREAD(data);
415  }
416 
417  void InvalidateAllRegisters() override {
418  // Do nothing... registers are always valid...
419  }
420 
422  lldb::offset_t offset = 0;
423  SetError(GPRRegSet, Read, -1);
424  SetError(FPURegSet, Read, -1);
425  SetError(EXCRegSet, Read, -1);
426  bool done = false;
427 
428  while (!done) {
429  int flavor = data.GetU32(&offset);
430  if (flavor == 0)
431  done = true;
432  else {
433  uint32_t i;
434  uint32_t count = data.GetU32(&offset);
435  switch (flavor) {
436  case GPRRegSet:
437  for (i = 0; i < count; ++i)
438  (&gpr.eax)[i] = data.GetU32(&offset);
439  SetError(GPRRegSet, Read, 0);
440  done = true;
441 
442  break;
443  case FPURegSet:
444  // TODO: fill in FPU regs....
445  // SetError (FPURegSet, Read, -1);
446  done = true;
447 
448  break;
449  case EXCRegSet:
450  exc.trapno = data.GetU32(&offset);
451  exc.err = data.GetU32(&offset);
452  exc.faultvaddr = data.GetU32(&offset);
453  SetError(EXCRegSet, Read, 0);
454  done = true;
455  break;
456  case 7:
457  case 8:
458  case 9:
459  // fancy flavors that encapsulate of the above flavors...
460  break;
461 
462  default:
463  done = true;
464  break;
465  }
466  }
467  }
468  }
469 
470  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
471  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
472  if (reg_ctx_sp) {
473  RegisterContext *reg_ctx = reg_ctx_sp.get();
474 
475  data.PutHex32(GPRRegSet); // Flavor
476  data.PutHex32(GPRWordCount);
477  PrintRegisterValue(reg_ctx, "eax", nullptr, 4, data);
478  PrintRegisterValue(reg_ctx, "ebx", nullptr, 4, data);
479  PrintRegisterValue(reg_ctx, "ecx", nullptr, 4, data);
480  PrintRegisterValue(reg_ctx, "edx", nullptr, 4, data);
481  PrintRegisterValue(reg_ctx, "edi", nullptr, 4, data);
482  PrintRegisterValue(reg_ctx, "esi", nullptr, 4, data);
483  PrintRegisterValue(reg_ctx, "ebp", nullptr, 4, data);
484  PrintRegisterValue(reg_ctx, "esp", nullptr, 4, data);
485  PrintRegisterValue(reg_ctx, "ss", nullptr, 4, data);
486  PrintRegisterValue(reg_ctx, "eflags", nullptr, 4, data);
487  PrintRegisterValue(reg_ctx, "eip", nullptr, 4, data);
488  PrintRegisterValue(reg_ctx, "cs", nullptr, 4, data);
489  PrintRegisterValue(reg_ctx, "ds", nullptr, 4, data);
490  PrintRegisterValue(reg_ctx, "es", nullptr, 4, data);
491  PrintRegisterValue(reg_ctx, "fs", nullptr, 4, data);
492  PrintRegisterValue(reg_ctx, "gs", nullptr, 4, data);
493 
494  // Write out the EXC registers
495  data.PutHex32(EXCRegSet);
496  data.PutHex32(EXCWordCount);
497  PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data);
498  PrintRegisterValue(reg_ctx, "err", nullptr, 4, data);
499  PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 4, data);
500  return true;
501  }
502  return false;
503  }
504 
505 protected:
506  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; }
507 
508  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; }
509 
510  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; }
511 
512  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
513  return 0;
514  }
515 
516  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
517  return 0;
518  }
519 
520  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
521  return 0;
522  }
523 };
524 
526 public:
528  const DataExtractor &data)
529  : RegisterContextDarwin_arm(thread, 0) {
530  SetRegisterDataFrom_LC_THREAD(data);
531  }
532 
533  void InvalidateAllRegisters() override {
534  // Do nothing... registers are always valid...
535  }
536 
538  lldb::offset_t offset = 0;
539  SetError(GPRRegSet, Read, -1);
540  SetError(FPURegSet, Read, -1);
541  SetError(EXCRegSet, Read, -1);
542  bool done = false;
543 
544  while (!done) {
545  int flavor = data.GetU32(&offset);
546  uint32_t count = data.GetU32(&offset);
547  lldb::offset_t next_thread_state = offset + (count * 4);
548  switch (flavor) {
549  case GPRAltRegSet:
550  case GPRRegSet:
551  // On ARM, the CPSR register is also included in the count but it is
552  // not included in gpr.r so loop until (count-1).
553  for (uint32_t i = 0; i < (count - 1); ++i) {
554  gpr.r[i] = data.GetU32(&offset);
555  }
556  // Save cpsr explicitly.
557  gpr.cpsr = data.GetU32(&offset);
558 
559  SetError(GPRRegSet, Read, 0);
560  offset = next_thread_state;
561  break;
562 
563  case FPURegSet: {
564  uint8_t *fpu_reg_buf = (uint8_t *)&fpu.floats.s[0];
565  const int fpu_reg_buf_size = sizeof(fpu.floats);
566  if (data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle,
567  fpu_reg_buf) == fpu_reg_buf_size) {
568  offset += fpu_reg_buf_size;
569  fpu.fpscr = data.GetU32(&offset);
570  SetError(FPURegSet, Read, 0);
571  } else {
572  done = true;
573  }
574  }
575  offset = next_thread_state;
576  break;
577 
578  case EXCRegSet:
579  if (count == 3) {
580  exc.exception = data.GetU32(&offset);
581  exc.fsr = data.GetU32(&offset);
582  exc.far = data.GetU32(&offset);
583  SetError(EXCRegSet, Read, 0);
584  }
585  done = true;
586  offset = next_thread_state;
587  break;
588 
589  // Unknown register set flavor, stop trying to parse.
590  default:
591  done = true;
592  }
593  }
594  }
595 
596  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
597  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
598  if (reg_ctx_sp) {
599  RegisterContext *reg_ctx = reg_ctx_sp.get();
600 
601  data.PutHex32(GPRRegSet); // Flavor
602  data.PutHex32(GPRWordCount);
603  PrintRegisterValue(reg_ctx, "r0", nullptr, 4, data);
604  PrintRegisterValue(reg_ctx, "r1", nullptr, 4, data);
605  PrintRegisterValue(reg_ctx, "r2", nullptr, 4, data);
606  PrintRegisterValue(reg_ctx, "r3", nullptr, 4, data);
607  PrintRegisterValue(reg_ctx, "r4", nullptr, 4, data);
608  PrintRegisterValue(reg_ctx, "r5", nullptr, 4, data);
609  PrintRegisterValue(reg_ctx, "r6", nullptr, 4, data);
610  PrintRegisterValue(reg_ctx, "r7", nullptr, 4, data);
611  PrintRegisterValue(reg_ctx, "r8", nullptr, 4, data);
612  PrintRegisterValue(reg_ctx, "r9", nullptr, 4, data);
613  PrintRegisterValue(reg_ctx, "r10", nullptr, 4, data);
614  PrintRegisterValue(reg_ctx, "r11", nullptr, 4, data);
615  PrintRegisterValue(reg_ctx, "r12", nullptr, 4, data);
616  PrintRegisterValue(reg_ctx, "sp", nullptr, 4, data);
617  PrintRegisterValue(reg_ctx, "lr", nullptr, 4, data);
618  PrintRegisterValue(reg_ctx, "pc", nullptr, 4, data);
619  PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data);
620 
621  // Write out the EXC registers
622  // data.PutHex32 (EXCRegSet);
623  // data.PutHex32 (EXCWordCount);
624  // WriteRegister (reg_ctx, "exception", NULL, 4, data);
625  // WriteRegister (reg_ctx, "fsr", NULL, 4, data);
626  // WriteRegister (reg_ctx, "far", NULL, 4, data);
627  return true;
628  }
629  return false;
630  }
631 
632 protected:
633  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
634 
635  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
636 
637  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
638 
639  int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
640 
641  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
642  return 0;
643  }
644 
645  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
646  return 0;
647  }
648 
649  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
650  return 0;
651  }
652 
653  int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
654  return -1;
655  }
656 };
657 
659 public:
661  const DataExtractor &data)
662  : RegisterContextDarwin_arm64(thread, 0) {
663  SetRegisterDataFrom_LC_THREAD(data);
664  }
665 
666  void InvalidateAllRegisters() override {
667  // Do nothing... registers are always valid...
668  }
669 
671  lldb::offset_t offset = 0;
672  SetError(GPRRegSet, Read, -1);
673  SetError(FPURegSet, Read, -1);
674  SetError(EXCRegSet, Read, -1);
675  bool done = false;
676  while (!done) {
677  int flavor = data.GetU32(&offset);
678  uint32_t count = data.GetU32(&offset);
679  lldb::offset_t next_thread_state = offset + (count * 4);
680  switch (flavor) {
681  case GPRRegSet:
682  // x0-x29 + fp + lr + sp + pc (== 33 64-bit registers) plus cpsr (1
683  // 32-bit register)
684  if (count >= (33 * 2) + 1) {
685  for (uint32_t i = 0; i < 29; ++i)
686  gpr.x[i] = data.GetU64(&offset);
687  gpr.fp = data.GetU64(&offset);
688  gpr.lr = data.GetU64(&offset);
689  gpr.sp = data.GetU64(&offset);
690  gpr.pc = data.GetU64(&offset);
691  gpr.cpsr = data.GetU32(&offset);
692  SetError(GPRRegSet, Read, 0);
693  }
694  offset = next_thread_state;
695  break;
696  case FPURegSet: {
697  uint8_t *fpu_reg_buf = (uint8_t *)&fpu.v[0];
698  const int fpu_reg_buf_size = sizeof(fpu);
699  if (fpu_reg_buf_size == count * sizeof(uint32_t) &&
700  data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle,
701  fpu_reg_buf) == fpu_reg_buf_size) {
702  SetError(FPURegSet, Read, 0);
703  } else {
704  done = true;
705  }
706  }
707  offset = next_thread_state;
708  break;
709  case EXCRegSet:
710  if (count == 4) {
711  exc.far = data.GetU64(&offset);
712  exc.esr = data.GetU32(&offset);
713  exc.exception = data.GetU32(&offset);
714  SetError(EXCRegSet, Read, 0);
715  }
716  offset = next_thread_state;
717  break;
718  default:
719  done = true;
720  break;
721  }
722  }
723  }
724 
725  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
726  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
727  if (reg_ctx_sp) {
728  RegisterContext *reg_ctx = reg_ctx_sp.get();
729 
730  data.PutHex32(GPRRegSet); // Flavor
731  data.PutHex32(GPRWordCount);
732  PrintRegisterValue(reg_ctx, "x0", nullptr, 8, data);
733  PrintRegisterValue(reg_ctx, "x1", nullptr, 8, data);
734  PrintRegisterValue(reg_ctx, "x2", nullptr, 8, data);
735  PrintRegisterValue(reg_ctx, "x3", nullptr, 8, data);
736  PrintRegisterValue(reg_ctx, "x4", nullptr, 8, data);
737  PrintRegisterValue(reg_ctx, "x5", nullptr, 8, data);
738  PrintRegisterValue(reg_ctx, "x6", nullptr, 8, data);
739  PrintRegisterValue(reg_ctx, "x7", nullptr, 8, data);
740  PrintRegisterValue(reg_ctx, "x8", nullptr, 8, data);
741  PrintRegisterValue(reg_ctx, "x9", nullptr, 8, data);
742  PrintRegisterValue(reg_ctx, "x10", nullptr, 8, data);
743  PrintRegisterValue(reg_ctx, "x11", nullptr, 8, data);
744  PrintRegisterValue(reg_ctx, "x12", nullptr, 8, data);
745  PrintRegisterValue(reg_ctx, "x13", nullptr, 8, data);
746  PrintRegisterValue(reg_ctx, "x14", nullptr, 8, data);
747  PrintRegisterValue(reg_ctx, "x15", nullptr, 8, data);
748  PrintRegisterValue(reg_ctx, "x16", nullptr, 8, data);
749  PrintRegisterValue(reg_ctx, "x17", nullptr, 8, data);
750  PrintRegisterValue(reg_ctx, "x18", nullptr, 8, data);
751  PrintRegisterValue(reg_ctx, "x19", nullptr, 8, data);
752  PrintRegisterValue(reg_ctx, "x20", nullptr, 8, data);
753  PrintRegisterValue(reg_ctx, "x21", nullptr, 8, data);
754  PrintRegisterValue(reg_ctx, "x22", nullptr, 8, data);
755  PrintRegisterValue(reg_ctx, "x23", nullptr, 8, data);
756  PrintRegisterValue(reg_ctx, "x24", nullptr, 8, data);
757  PrintRegisterValue(reg_ctx, "x25", nullptr, 8, data);
758  PrintRegisterValue(reg_ctx, "x26", nullptr, 8, data);
759  PrintRegisterValue(reg_ctx, "x27", nullptr, 8, data);
760  PrintRegisterValue(reg_ctx, "x28", nullptr, 8, data);
761  PrintRegisterValue(reg_ctx, "fp", nullptr, 8, data);
762  PrintRegisterValue(reg_ctx, "lr", nullptr, 8, data);
763  PrintRegisterValue(reg_ctx, "sp", nullptr, 8, data);
764  PrintRegisterValue(reg_ctx, "pc", nullptr, 8, data);
765  PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data);
766  data.PutHex32(0); // uint32_t pad at the end
767 
768  // Write out the EXC registers
769  data.PutHex32(EXCRegSet);
770  data.PutHex32(EXCWordCount);
771  PrintRegisterValue(reg_ctx, "far", NULL, 8, data);
772  PrintRegisterValue(reg_ctx, "esr", NULL, 4, data);
773  PrintRegisterValue(reg_ctx, "exception", NULL, 4, data);
774  return true;
775  }
776  return false;
777  }
778 
779 protected:
780  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
781 
782  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
783 
784  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
785 
786  int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
787 
788  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
789  return 0;
790  }
791 
792  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
793  return 0;
794  }
795 
796  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
797  return 0;
798  }
799 
800  int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
801  return -1;
802  }
803 };
804 
806  switch (magic) {
807  case MH_MAGIC:
808  case MH_CIGAM:
809  return sizeof(struct llvm::MachO::mach_header);
810 
811  case MH_MAGIC_64:
812  case MH_CIGAM_64:
813  return sizeof(struct llvm::MachO::mach_header_64);
814  break;
815 
816  default:
817  break;
818  }
819  return 0;
820 }
821 
822 #define MACHO_NLIST_ARM_SYMBOL_IS_THUMB 0x0008
823 
825 
827  PluginManager::RegisterPlugin(
828  GetPluginNameStatic(), GetPluginDescriptionStatic(), CreateInstance,
829  CreateMemoryInstance, GetModuleSpecifications, SaveCore);
830 }
831 
833  PluginManager::UnregisterPlugin(CreateInstance);
834 }
835 
836 ObjectFile *ObjectFileMachO::CreateInstance(const lldb::ModuleSP &module_sp,
837  DataBufferSP &data_sp,
838  lldb::offset_t data_offset,
839  const FileSpec *file,
840  lldb::offset_t file_offset,
841  lldb::offset_t length) {
842  if (!data_sp) {
843  data_sp = MapFileData(*file, length, file_offset);
844  if (!data_sp)
845  return nullptr;
846  data_offset = 0;
847  }
848 
849  if (!ObjectFileMachO::MagicBytesMatch(data_sp, data_offset, length))
850  return nullptr;
851 
852  // Update the data to contain the entire file if it doesn't already
853  if (data_sp->GetByteSize() < length) {
854  data_sp = MapFileData(*file, length, file_offset);
855  if (!data_sp)
856  return nullptr;
857  data_offset = 0;
858  }
859  auto objfile_up = std::make_unique<ObjectFileMachO>(
860  module_sp, data_sp, data_offset, file, file_offset, length);
861  if (!objfile_up || !objfile_up->ParseHeader())
862  return nullptr;
863 
864  return objfile_up.release();
865 }
866 
868  const lldb::ModuleSP &module_sp, DataBufferSP &data_sp,
869  const ProcessSP &process_sp, lldb::addr_t header_addr) {
870  if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) {
871  std::unique_ptr<ObjectFile> objfile_up(
872  new ObjectFileMachO(module_sp, data_sp, process_sp, header_addr));
873  if (objfile_up.get() && objfile_up->ParseHeader())
874  return objfile_up.release();
875  }
876  return nullptr;
877 }
878 
880  const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp,
881  lldb::offset_t data_offset, lldb::offset_t file_offset,
883  const size_t initial_count = specs.GetSize();
884 
885  if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) {
886  DataExtractor data;
887  data.SetData(data_sp);
888  llvm::MachO::mach_header header;
889  if (ParseHeader(data, &data_offset, header)) {
890  size_t header_and_load_cmds =
891  header.sizeofcmds + MachHeaderSizeFromMagic(header.magic);
892  if (header_and_load_cmds >= data_sp->GetByteSize()) {
893  data_sp = MapFileData(file, header_and_load_cmds, file_offset);
894  data.SetData(data_sp);
895  data_offset = MachHeaderSizeFromMagic(header.magic);
896  }
897  if (data_sp) {
898  ModuleSpec base_spec;
899  base_spec.GetFileSpec() = file;
900  base_spec.SetObjectOffset(file_offset);
901  base_spec.SetObjectSize(length);
902  GetAllArchSpecs(header, data, data_offset, base_spec, specs);
903  }
904  }
905  }
906  return specs.GetSize() - initial_count;
907 }
908 
910  static ConstString g_segment_name_TEXT("__TEXT");
911  return g_segment_name_TEXT;
912 }
913 
915  static ConstString g_segment_name_DATA("__DATA");
916  return g_segment_name_DATA;
917 }
918 
920  static ConstString g_segment_name("__DATA_DIRTY");
921  return g_segment_name;
922 }
923 
925  static ConstString g_segment_name("__DATA_CONST");
926  return g_segment_name;
927 }
928 
930  static ConstString g_segment_name_OBJC("__OBJC");
931  return g_segment_name_OBJC;
932 }
933 
935  static ConstString g_section_name_LINKEDIT("__LINKEDIT");
936  return g_section_name_LINKEDIT;
937 }
938 
940  static ConstString g_section_name("__DWARF");
941  return g_section_name;
942 }
943 
945  static ConstString g_section_name_eh_frame("__eh_frame");
946  return g_section_name_eh_frame;
947 }
948 
949 bool ObjectFileMachO::MagicBytesMatch(DataBufferSP &data_sp,
950  lldb::addr_t data_offset,
951  lldb::addr_t data_length) {
952  DataExtractor data;
953  data.SetData(data_sp, data_offset, data_length);
954  lldb::offset_t offset = 0;
955  uint32_t magic = data.GetU32(&offset);
956  return MachHeaderSizeFromMagic(magic) != 0;
957 }
958 
959 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp,
960  DataBufferSP &data_sp,
961  lldb::offset_t data_offset,
962  const FileSpec *file,
963  lldb::offset_t file_offset,
964  lldb::offset_t length)
965  : ObjectFile(module_sp, file, file_offset, length, data_sp, data_offset),
966  m_mach_segments(), m_mach_sections(), m_entry_point_address(),
967  m_thread_context_offsets(), m_thread_context_offsets_valid(false),
968  m_reexported_dylibs(), m_allow_assembly_emulation_unwind_plans(true) {
969  ::memset(&m_header, 0, sizeof(m_header));
970  ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
971 }
972 
973 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp,
974  lldb::DataBufferSP &header_data_sp,
975  const lldb::ProcessSP &process_sp,
976  lldb::addr_t header_addr)
977  : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
978  m_mach_segments(), m_mach_sections(), m_entry_point_address(),
979  m_thread_context_offsets(), m_thread_context_offsets_valid(false),
980  m_reexported_dylibs(), m_allow_assembly_emulation_unwind_plans(true) {
981  ::memset(&m_header, 0, sizeof(m_header));
982  ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
983 }
984 
986  lldb::offset_t *data_offset_ptr,
987  llvm::MachO::mach_header &header) {
989  // Leave magic in the original byte order
990  header.magic = data.GetU32(data_offset_ptr);
991  bool can_parse = false;
992  bool is_64_bit = false;
993  switch (header.magic) {
994  case MH_MAGIC:
996  data.SetAddressByteSize(4);
997  can_parse = true;
998  break;
999 
1000  case MH_MAGIC_64:
1002  data.SetAddressByteSize(8);
1003  can_parse = true;
1004  is_64_bit = true;
1005  break;
1006 
1007  case MH_CIGAM:
1010  : eByteOrderBig);
1011  data.SetAddressByteSize(4);
1012  can_parse = true;
1013  break;
1014 
1015  case MH_CIGAM_64:
1018  : eByteOrderBig);
1019  data.SetAddressByteSize(8);
1020  is_64_bit = true;
1021  can_parse = true;
1022  break;
1023 
1024  default:
1025  break;
1026  }
1027 
1028  if (can_parse) {
1029  data.GetU32(data_offset_ptr, &header.cputype, 6);
1030  if (is_64_bit)
1031  *data_offset_ptr += 4;
1032  return true;
1033  } else {
1034  memset(&header, 0, sizeof(header));
1035  }
1036  return false;
1037 }
1038 
1040  ModuleSP module_sp(GetModule());
1041  if (!module_sp)
1042  return false;
1043 
1044  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
1045  bool can_parse = false;
1046  lldb::offset_t offset = 0;
1048  // Leave magic in the original byte order
1049  m_header.magic = m_data.GetU32(&offset);
1050  switch (m_header.magic) {
1051  case MH_MAGIC:
1054  can_parse = true;
1055  break;
1056 
1057  case MH_MAGIC_64:
1060  can_parse = true;
1061  break;
1062 
1063  case MH_CIGAM:
1066  : eByteOrderBig);
1068  can_parse = true;
1069  break;
1070 
1071  case MH_CIGAM_64:
1074  : eByteOrderBig);
1076  can_parse = true;
1077  break;
1078 
1079  default:
1080  break;
1081  }
1082 
1083  if (can_parse) {
1084  m_data.GetU32(&offset, &m_header.cputype, 6);
1085 
1086  ModuleSpecList all_specs;
1087  ModuleSpec base_spec;
1089  base_spec, all_specs);
1090 
1091  for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
1092  ArchSpec mach_arch =
1094 
1095  // Check if the module has a required architecture
1096  const ArchSpec &module_arch = module_sp->GetArchitecture();
1097  if (module_arch.IsValid() && !module_arch.IsCompatibleMatch(mach_arch))
1098  continue;
1099 
1100  if (SetModulesArchitecture(mach_arch)) {
1101  const size_t header_and_lc_size =
1102  m_header.sizeofcmds + MachHeaderSizeFromMagic(m_header.magic);
1103  if (m_data.GetByteSize() < header_and_lc_size) {
1104  DataBufferSP data_sp;
1105  ProcessSP process_sp(m_process_wp.lock());
1106  if (process_sp) {
1107  data_sp = ReadMemory(process_sp, m_memory_addr, header_and_lc_size);
1108  } else {
1109  // Read in all only the load command data from the file on disk
1110  data_sp = MapFileData(m_file, header_and_lc_size, m_file_offset);
1111  if (data_sp->GetByteSize() != header_and_lc_size)
1112  continue;
1113  }
1114  if (data_sp)
1115  m_data.SetData(data_sp);
1116  }
1117  }
1118  return true;
1119  }
1120  // None found.
1121  return false;
1122  } else {
1123  memset(&m_header, 0, sizeof(struct llvm::MachO::mach_header));
1124  }
1125  return false;
1126 }
1127 
1129  return m_data.GetByteOrder();
1130 }
1131 
1133  return m_header.filetype == MH_EXECUTE;
1134 }
1135 
1137  return m_header.filetype == MH_DYLINKER;
1138 }
1139 
1141  return m_header.flags & MH_DYLIB_IN_CACHE;
1142 }
1143 
1145  return m_data.GetAddressByteSize();
1146 }
1147 
1149  Symtab *symtab = GetSymtab();
1150  if (!symtab)
1151  return AddressClass::eUnknown;
1152 
1153  Symbol *symbol = symtab->FindSymbolContainingFileAddress(file_addr);
1154  if (symbol) {
1155  if (symbol->ValueIsAddress()) {
1156  SectionSP section_sp(symbol->GetAddressRef().GetSection());
1157  if (section_sp) {
1158  const lldb::SectionType section_type = section_sp->GetType();
1159  switch (section_type) {
1160  case eSectionTypeInvalid:
1161  return AddressClass::eUnknown;
1162 
1163  case eSectionTypeCode:
1164  if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1165  // For ARM we have a bit in the n_desc field of the symbol that
1166  // tells us ARM/Thumb which is bit 0x0008.
1167  if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB)
1168  return AddressClass::eCodeAlternateISA;
1169  }
1170  return AddressClass::eCode;
1171 
1172  case eSectionTypeContainer:
1173  return AddressClass::eUnknown;
1174 
1175  case eSectionTypeData:
1179  case eSectionTypeData4:
1180  case eSectionTypeData8:
1181  case eSectionTypeData16:
1183  case eSectionTypeZeroFill:
1186  case eSectionTypeGoSymtab:
1187  return AddressClass::eData;
1188 
1189  case eSectionTypeDebug:
1224  return AddressClass::eDebug;
1225 
1226  case eSectionTypeEHFrame:
1227  case eSectionTypeARMexidx:
1228  case eSectionTypeARMextab:
1230  return AddressClass::eRuntime;
1231 
1237  case eSectionTypeOther:
1238  return AddressClass::eUnknown;
1239  }
1240  }
1241  }
1242 
1243  const SymbolType symbol_type = symbol->GetType();
1244  switch (symbol_type) {
1245  case eSymbolTypeAny:
1246  return AddressClass::eUnknown;
1247  case eSymbolTypeAbsolute:
1248  return AddressClass::eUnknown;
1249 
1250  case eSymbolTypeCode:
1251  case eSymbolTypeTrampoline:
1252  case eSymbolTypeResolver:
1253  if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1254  // For ARM we have a bit in the n_desc field of the symbol that tells
1255  // us ARM/Thumb which is bit 0x0008.
1256  if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB)
1257  return AddressClass::eCodeAlternateISA;
1258  }
1259  return AddressClass::eCode;
1260 
1261  case eSymbolTypeData:
1262  return AddressClass::eData;
1263  case eSymbolTypeRuntime:
1264  return AddressClass::eRuntime;
1265  case eSymbolTypeException:
1266  return AddressClass::eRuntime;
1267  case eSymbolTypeSourceFile:
1268  return AddressClass::eDebug;
1269  case eSymbolTypeHeaderFile:
1270  return AddressClass::eDebug;
1271  case eSymbolTypeObjectFile:
1272  return AddressClass::eDebug;
1274  return AddressClass::eDebug;
1275  case eSymbolTypeBlock:
1276  return AddressClass::eDebug;
1277  case eSymbolTypeLocal:
1278  return AddressClass::eData;
1279  case eSymbolTypeParam:
1280  return AddressClass::eData;
1281  case eSymbolTypeVariable:
1282  return AddressClass::eData;
1284  return AddressClass::eDebug;
1285  case eSymbolTypeLineEntry:
1286  return AddressClass::eDebug;
1287  case eSymbolTypeLineHeader:
1288  return AddressClass::eDebug;
1289  case eSymbolTypeScopeBegin:
1290  return AddressClass::eDebug;
1291  case eSymbolTypeScopeEnd:
1292  return AddressClass::eDebug;
1293  case eSymbolTypeAdditional:
1294  return AddressClass::eUnknown;
1295  case eSymbolTypeCompiler:
1296  return AddressClass::eDebug;
1298  return AddressClass::eDebug;
1299  case eSymbolTypeUndefined:
1300  return AddressClass::eUnknown;
1301  case eSymbolTypeObjCClass:
1302  return AddressClass::eRuntime;
1304  return AddressClass::eRuntime;
1305  case eSymbolTypeObjCIVar:
1306  return AddressClass::eRuntime;
1307  case eSymbolTypeReExported:
1308  return AddressClass::eRuntime;
1309  }
1310  }
1311  return AddressClass::eUnknown;
1312 }
1313 
1315  if (m_dysymtab.cmd == 0) {
1316  ModuleSP module_sp(GetModule());
1317  if (module_sp) {
1319  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1320  const lldb::offset_t load_cmd_offset = offset;
1321 
1322  llvm::MachO::load_command lc;
1323  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
1324  break;
1325  if (lc.cmd == LC_DYSYMTAB) {
1326  m_dysymtab.cmd = lc.cmd;
1327  m_dysymtab.cmdsize = lc.cmdsize;
1328  if (m_data.GetU32(&offset, &m_dysymtab.ilocalsym,
1329  (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2) ==
1330  nullptr) {
1331  // Clear m_dysymtab if we were unable to read all items from the
1332  // load command
1333  ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
1334  }
1335  }
1336  offset = load_cmd_offset + lc.cmdsize;
1337  }
1338  }
1339  }
1340  if (m_dysymtab.cmd)
1341  return m_dysymtab.nlocalsym <= 1;
1342  return false;
1343 }
1344 
1346  EncryptedFileRanges result;
1348 
1349  llvm::MachO::encryption_info_command encryption_cmd;
1350  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1351  const lldb::offset_t load_cmd_offset = offset;
1352  if (m_data.GetU32(&offset, &encryption_cmd, 2) == nullptr)
1353  break;
1354 
1355  // LC_ENCRYPTION_INFO and LC_ENCRYPTION_INFO_64 have the same sizes for the
1356  // 3 fields we care about, so treat them the same.
1357  if (encryption_cmd.cmd == LC_ENCRYPTION_INFO ||
1358  encryption_cmd.cmd == LC_ENCRYPTION_INFO_64) {
1359  if (m_data.GetU32(&offset, &encryption_cmd.cryptoff, 3)) {
1360  if (encryption_cmd.cryptid != 0) {
1362  entry.SetRangeBase(encryption_cmd.cryptoff);
1363  entry.SetByteSize(encryption_cmd.cryptsize);
1364  result.Append(entry);
1365  }
1366  }
1367  }
1368  offset = load_cmd_offset + encryption_cmd.cmdsize;
1369  }
1370 
1371  return result;
1372 }
1373 
1375  llvm::MachO::segment_command_64 &seg_cmd, uint32_t cmd_idx) {
1376  if (m_length == 0 || seg_cmd.filesize == 0)
1377  return;
1378 
1379  if (IsSharedCacheBinary() && !IsInMemory()) {
1380  // In shared cache images, the load commands are relative to the
1381  // shared cache file, and not the specific image we are
1382  // examining. Let's fix this up so that it looks like a normal
1383  // image.
1384  if (strncmp(seg_cmd.segname, "__TEXT", sizeof(seg_cmd.segname)) == 0)
1385  m_text_address = seg_cmd.vmaddr;
1386  if (strncmp(seg_cmd.segname, "__LINKEDIT", sizeof(seg_cmd.segname)) == 0)
1387  m_linkedit_original_offset = seg_cmd.fileoff;
1388 
1389  seg_cmd.fileoff = seg_cmd.vmaddr - m_text_address;
1390  }
1391 
1392  if (seg_cmd.fileoff > m_length) {
1393  // We have a load command that says it extends past the end of the file.
1394  // This is likely a corrupt file. We don't have any way to return an error
1395  // condition here (this method was likely invoked from something like
1396  // ObjectFile::GetSectionList()), so we just null out the section contents,
1397  // and dump a message to stdout. The most common case here is core file
1398  // debugging with a truncated file.
1399  const char *lc_segment_name =
1400  seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1401  GetModule()->ReportWarning(
1402  "load command %u %s has a fileoff (0x%" PRIx64
1403  ") that extends beyond the end of the file (0x%" PRIx64
1404  "), ignoring this section",
1405  cmd_idx, lc_segment_name, seg_cmd.fileoff, m_length);
1406 
1407  seg_cmd.fileoff = 0;
1408  seg_cmd.filesize = 0;
1409  }
1410 
1411  if (seg_cmd.fileoff + seg_cmd.filesize > m_length) {
1412  // We have a load command that says it extends past the end of the file.
1413  // This is likely a corrupt file. We don't have any way to return an error
1414  // condition here (this method was likely invoked from something like
1415  // ObjectFile::GetSectionList()), so we just null out the section contents,
1416  // and dump a message to stdout. The most common case here is core file
1417  // debugging with a truncated file.
1418  const char *lc_segment_name =
1419  seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1420  GetModule()->ReportWarning(
1421  "load command %u %s has a fileoff + filesize (0x%" PRIx64
1422  ") that extends beyond the end of the file (0x%" PRIx64
1423  "), the segment will be truncated to match",
1424  cmd_idx, lc_segment_name, seg_cmd.fileoff + seg_cmd.filesize, m_length);
1425 
1426  // Truncate the length
1427  seg_cmd.filesize = m_length - seg_cmd.fileoff;
1428  }
1429 }
1430 
1431 static uint32_t
1432 GetSegmentPermissions(const llvm::MachO::segment_command_64 &seg_cmd) {
1433  uint32_t result = 0;
1434  if (seg_cmd.initprot & VM_PROT_READ)
1435  result |= ePermissionsReadable;
1436  if (seg_cmd.initprot & VM_PROT_WRITE)
1437  result |= ePermissionsWritable;
1438  if (seg_cmd.initprot & VM_PROT_EXECUTE)
1439  result |= ePermissionsExecutable;
1440  return result;
1441 }
1442 
1444  ConstString section_name) {
1445 
1446  if (flags & (S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS))
1447  return eSectionTypeCode;
1448 
1449  uint32_t mach_sect_type = flags & SECTION_TYPE;
1450  static ConstString g_sect_name_objc_data("__objc_data");
1451  static ConstString g_sect_name_objc_msgrefs("__objc_msgrefs");
1452  static ConstString g_sect_name_objc_selrefs("__objc_selrefs");
1453  static ConstString g_sect_name_objc_classrefs("__objc_classrefs");
1454  static ConstString g_sect_name_objc_superrefs("__objc_superrefs");
1455  static ConstString g_sect_name_objc_const("__objc_const");
1456  static ConstString g_sect_name_objc_classlist("__objc_classlist");
1457  static ConstString g_sect_name_cfstring("__cfstring");
1458 
1459  static ConstString g_sect_name_dwarf_debug_abbrev("__debug_abbrev");
1460  static ConstString g_sect_name_dwarf_debug_aranges("__debug_aranges");
1461  static ConstString g_sect_name_dwarf_debug_frame("__debug_frame");
1462  static ConstString g_sect_name_dwarf_debug_info("__debug_info");
1463  static ConstString g_sect_name_dwarf_debug_line("__debug_line");
1464  static ConstString g_sect_name_dwarf_debug_loc("__debug_loc");
1465  static ConstString g_sect_name_dwarf_debug_loclists("__debug_loclists");
1466  static ConstString g_sect_name_dwarf_debug_macinfo("__debug_macinfo");
1467  static ConstString g_sect_name_dwarf_debug_names("__debug_names");
1468  static ConstString g_sect_name_dwarf_debug_pubnames("__debug_pubnames");
1469  static ConstString g_sect_name_dwarf_debug_pubtypes("__debug_pubtypes");
1470  static ConstString g_sect_name_dwarf_debug_ranges("__debug_ranges");
1471  static ConstString g_sect_name_dwarf_debug_str("__debug_str");
1472  static ConstString g_sect_name_dwarf_debug_types("__debug_types");
1473  static ConstString g_sect_name_dwarf_apple_names("__apple_names");
1474  static ConstString g_sect_name_dwarf_apple_types("__apple_types");
1475  static ConstString g_sect_name_dwarf_apple_namespaces("__apple_namespac");
1476  static ConstString g_sect_name_dwarf_apple_objc("__apple_objc");
1477  static ConstString g_sect_name_eh_frame("__eh_frame");
1478  static ConstString g_sect_name_compact_unwind("__unwind_info");
1479  static ConstString g_sect_name_text("__text");
1480  static ConstString g_sect_name_data("__data");
1481  static ConstString g_sect_name_go_symtab("__gosymtab");
1482 
1483  if (section_name == g_sect_name_dwarf_debug_abbrev)
1485  if (section_name == g_sect_name_dwarf_debug_aranges)
1487  if (section_name == g_sect_name_dwarf_debug_frame)
1489  if (section_name == g_sect_name_dwarf_debug_info)
1491  if (section_name == g_sect_name_dwarf_debug_line)
1493  if (section_name == g_sect_name_dwarf_debug_loc)
1495  if (section_name == g_sect_name_dwarf_debug_loclists)
1497  if (section_name == g_sect_name_dwarf_debug_macinfo)
1499  if (section_name == g_sect_name_dwarf_debug_names)
1501  if (section_name == g_sect_name_dwarf_debug_pubnames)
1503  if (section_name == g_sect_name_dwarf_debug_pubtypes)
1505  if (section_name == g_sect_name_dwarf_debug_ranges)
1507  if (section_name == g_sect_name_dwarf_debug_str)
1509  if (section_name == g_sect_name_dwarf_debug_types)
1511  if (section_name == g_sect_name_dwarf_apple_names)
1513  if (section_name == g_sect_name_dwarf_apple_types)
1515  if (section_name == g_sect_name_dwarf_apple_namespaces)
1517  if (section_name == g_sect_name_dwarf_apple_objc)
1519  if (section_name == g_sect_name_objc_selrefs)
1521  if (section_name == g_sect_name_objc_msgrefs)
1523  if (section_name == g_sect_name_eh_frame)
1524  return eSectionTypeEHFrame;
1525  if (section_name == g_sect_name_compact_unwind)
1527  if (section_name == g_sect_name_cfstring)
1529  if (section_name == g_sect_name_go_symtab)
1530  return eSectionTypeGoSymtab;
1531  if (section_name == g_sect_name_objc_data ||
1532  section_name == g_sect_name_objc_classrefs ||
1533  section_name == g_sect_name_objc_superrefs ||
1534  section_name == g_sect_name_objc_const ||
1535  section_name == g_sect_name_objc_classlist) {
1536  return eSectionTypeDataPointers;
1537  }
1538 
1539  switch (mach_sect_type) {
1540  // TODO: categorize sections by other flags for regular sections
1541  case S_REGULAR:
1542  if (section_name == g_sect_name_text)
1543  return eSectionTypeCode;
1544  if (section_name == g_sect_name_data)
1545  return eSectionTypeData;
1546  return eSectionTypeOther;
1547  case S_ZEROFILL:
1548  return eSectionTypeZeroFill;
1549  case S_CSTRING_LITERALS: // section with only literal C strings
1550  return eSectionTypeDataCString;
1551  case S_4BYTE_LITERALS: // section with only 4 byte literals
1552  return eSectionTypeData4;
1553  case S_8BYTE_LITERALS: // section with only 8 byte literals
1554  return eSectionTypeData8;
1555  case S_LITERAL_POINTERS: // section with only pointers to literals
1556  return eSectionTypeDataPointers;
1557  case S_NON_LAZY_SYMBOL_POINTERS: // section with only non-lazy symbol pointers
1558  return eSectionTypeDataPointers;
1559  case S_LAZY_SYMBOL_POINTERS: // section with only lazy symbol pointers
1560  return eSectionTypeDataPointers;
1561  case S_SYMBOL_STUBS: // section with only symbol stubs, byte size of stub in
1562  // the reserved2 field
1563  return eSectionTypeCode;
1564  case S_MOD_INIT_FUNC_POINTERS: // section with only function pointers for
1565  // initialization
1566  return eSectionTypeDataPointers;
1567  case S_MOD_TERM_FUNC_POINTERS: // section with only function pointers for
1568  // termination
1569  return eSectionTypeDataPointers;
1570  case S_COALESCED:
1571  return eSectionTypeOther;
1572  case S_GB_ZEROFILL:
1573  return eSectionTypeZeroFill;
1574  case S_INTERPOSING: // section with only pairs of function pointers for
1575  // interposing
1576  return eSectionTypeCode;
1577  case S_16BYTE_LITERALS: // section with only 16 byte literals
1578  return eSectionTypeData16;
1579  case S_DTRACE_DOF:
1580  return eSectionTypeDebug;
1581  case S_LAZY_DYLIB_SYMBOL_POINTERS:
1582  return eSectionTypeDataPointers;
1583  default:
1584  return eSectionTypeOther;
1585  }
1586 }
1587 
1593  bool FileAddressesChanged = false;
1594 
1598 };
1599 
1601  const llvm::MachO::load_command &load_cmd_, lldb::offset_t offset,
1602  uint32_t cmd_idx, SegmentParsingContext &context) {
1603  llvm::MachO::segment_command_64 load_cmd;
1604  memcpy(&load_cmd, &load_cmd_, sizeof(load_cmd_));
1605 
1606  if (!m_data.GetU8(&offset, (uint8_t *)load_cmd.segname, 16))
1607  return;
1608 
1609  ModuleSP module_sp = GetModule();
1610  const bool is_core = GetType() == eTypeCoreFile;
1611  const bool is_dsym = (m_header.filetype == MH_DSYM);
1612  bool add_section = true;
1613  bool add_to_unified = true;
1614  ConstString const_segname(
1615  load_cmd.segname, strnlen(load_cmd.segname, sizeof(load_cmd.segname)));
1616 
1617  SectionSP unified_section_sp(
1618  context.UnifiedList.FindSectionByName(const_segname));
1619  if (is_dsym && unified_section_sp) {
1620  if (const_segname == GetSegmentNameLINKEDIT()) {
1621  // We need to keep the __LINKEDIT segment private to this object file
1622  // only
1623  add_to_unified = false;
1624  } else {
1625  // This is the dSYM file and this section has already been created by the
1626  // object file, no need to create it.
1627  add_section = false;
1628  }
1629  }
1630  load_cmd.vmaddr = m_data.GetAddress(&offset);
1631  load_cmd.vmsize = m_data.GetAddress(&offset);
1632  load_cmd.fileoff = m_data.GetAddress(&offset);
1633  load_cmd.filesize = m_data.GetAddress(&offset);
1634  if (!m_data.GetU32(&offset, &load_cmd.maxprot, 4))
1635  return;
1636 
1637  SanitizeSegmentCommand(load_cmd, cmd_idx);
1638 
1639  const uint32_t segment_permissions = GetSegmentPermissions(load_cmd);
1640  const bool segment_is_encrypted =
1641  (load_cmd.flags & SG_PROTECTED_VERSION_1) != 0;
1642 
1643  // Keep a list of mach segments around in case we need to get at data that
1644  // isn't stored in the abstracted Sections.
1645  m_mach_segments.push_back(load_cmd);
1646 
1647  // Use a segment ID of the segment index shifted left by 8 so they never
1648  // conflict with any of the sections.
1649  SectionSP segment_sp;
1650  if (add_section && (const_segname || is_core)) {
1651  segment_sp = std::make_shared<Section>(
1652  module_sp, // Module to which this section belongs
1653  this, // Object file to which this sections belongs
1654  ++context.NextSegmentIdx
1655  << 8, // Section ID is the 1 based segment index
1656  // shifted right by 8 bits as not to collide with any of the 256
1657  // section IDs that are possible
1658  const_segname, // Name of this section
1659  eSectionTypeContainer, // This section is a container of other
1660  // sections.
1661  load_cmd.vmaddr, // File VM address == addresses as they are
1662  // found in the object file
1663  load_cmd.vmsize, // VM size in bytes of this section
1664  load_cmd.fileoff, // Offset to the data for this section in
1665  // the file
1666  load_cmd.filesize, // Size in bytes of this section as found
1667  // in the file
1668  0, // Segments have no alignment information
1669  load_cmd.flags); // Flags for this section
1670 
1671  segment_sp->SetIsEncrypted(segment_is_encrypted);
1672  m_sections_up->AddSection(segment_sp);
1673  segment_sp->SetPermissions(segment_permissions);
1674  if (add_to_unified)
1675  context.UnifiedList.AddSection(segment_sp);
1676  } else if (unified_section_sp) {
1677  // If this is a dSYM and the file addresses in the dSYM differ from the
1678  // file addresses in the ObjectFile, we must use the file base address for
1679  // the Section from the dSYM for the DWARF to resolve correctly.
1680  // This only happens with binaries in the shared cache in practice;
1681  // normally a mismatch like this would give a binary & dSYM that do not
1682  // match UUIDs. When a binary is included in the shared cache, its
1683  // segments are rearranged to optimize the shared cache, so its file
1684  // addresses will differ from what the ObjectFile had originally,
1685  // and what the dSYM has.
1686  if (is_dsym && unified_section_sp->GetFileAddress() != load_cmd.vmaddr) {
1688  if (log) {
1689  log->Printf(
1690  "Installing dSYM's %s segment file address over ObjectFile's "
1691  "so symbol table/debug info resolves correctly for %s",
1692  const_segname.AsCString(),
1693  module_sp->GetFileSpec().GetFilename().AsCString());
1694  }
1695 
1696  // Make sure we've parsed the symbol table from the ObjectFile before
1697  // we go around changing its Sections.
1698  module_sp->GetObjectFile()->GetSymtab();
1699  // eh_frame would present the same problems but we parse that on a per-
1700  // function basis as-needed so it's more difficult to remove its use of
1701  // the Sections. Realistically, the environments where this code path
1702  // will be taken will not have eh_frame sections.
1703 
1704  unified_section_sp->SetFileAddress(load_cmd.vmaddr);
1705 
1706  // Notify the module that the section addresses have been changed once
1707  // we're done so any file-address caches can be updated.
1708  context.FileAddressesChanged = true;
1709  }
1710  m_sections_up->AddSection(unified_section_sp);
1711  }
1712 
1713  llvm::MachO::section_64 sect64;
1714  ::memset(&sect64, 0, sizeof(sect64));
1715  // Push a section into our mach sections for the section at index zero
1716  // (NO_SECT) if we don't have any mach sections yet...
1717  if (m_mach_sections.empty())
1718  m_mach_sections.push_back(sect64);
1719  uint32_t segment_sect_idx;
1720  const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1;
1721 
1722  const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8;
1723  for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects;
1724  ++segment_sect_idx) {
1725  if (m_data.GetU8(&offset, (uint8_t *)sect64.sectname,
1726  sizeof(sect64.sectname)) == nullptr)
1727  break;
1728  if (m_data.GetU8(&offset, (uint8_t *)sect64.segname,
1729  sizeof(sect64.segname)) == nullptr)
1730  break;
1731  sect64.addr = m_data.GetAddress(&offset);
1732  sect64.size = m_data.GetAddress(&offset);
1733 
1734  if (m_data.GetU32(&offset, &sect64.offset, num_u32s) == nullptr)
1735  break;
1736 
1737  if (IsSharedCacheBinary() && !IsInMemory()) {
1738  sect64.offset = sect64.addr - m_text_address;
1739  }
1740 
1741  // Keep a list of mach sections around in case we need to get at data that
1742  // isn't stored in the abstracted Sections.
1743  m_mach_sections.push_back(sect64);
1744 
1745  if (add_section) {
1746  ConstString section_name(
1747  sect64.sectname, strnlen(sect64.sectname, sizeof(sect64.sectname)));
1748  if (!const_segname) {
1749  // We have a segment with no name so we need to conjure up segments
1750  // that correspond to the section's segname if there isn't already such
1751  // a section. If there is such a section, we resize the section so that
1752  // it spans all sections. We also mark these sections as fake so
1753  // address matches don't hit if they land in the gaps between the child
1754  // sections.
1755  const_segname.SetTrimmedCStringWithLength(sect64.segname,
1756  sizeof(sect64.segname));
1757  segment_sp = context.UnifiedList.FindSectionByName(const_segname);
1758  if (segment_sp.get()) {
1759  Section *segment = segment_sp.get();
1760  // Grow the section size as needed.
1761  const lldb::addr_t sect64_min_addr = sect64.addr;
1762  const lldb::addr_t sect64_max_addr = sect64_min_addr + sect64.size;
1763  const lldb::addr_t curr_seg_byte_size = segment->GetByteSize();
1764  const lldb::addr_t curr_seg_min_addr = segment->GetFileAddress();
1765  const lldb::addr_t curr_seg_max_addr =
1766  curr_seg_min_addr + curr_seg_byte_size;
1767  if (sect64_min_addr >= curr_seg_min_addr) {
1768  const lldb::addr_t new_seg_byte_size =
1769  sect64_max_addr - curr_seg_min_addr;
1770  // Only grow the section size if needed
1771  if (new_seg_byte_size > curr_seg_byte_size)
1772  segment->SetByteSize(new_seg_byte_size);
1773  } else {
1774  // We need to change the base address of the segment and adjust the
1775  // child section offsets for all existing children.
1776  const lldb::addr_t slide_amount =
1777  sect64_min_addr - curr_seg_min_addr;
1778  segment->Slide(slide_amount, false);
1779  segment->GetChildren().Slide(-slide_amount, false);
1780  segment->SetByteSize(curr_seg_max_addr - sect64_min_addr);
1781  }
1782 
1783  // Grow the section size as needed.
1784  if (sect64.offset) {
1785  const lldb::addr_t segment_min_file_offset =
1786  segment->GetFileOffset();
1787  const lldb::addr_t segment_max_file_offset =
1788  segment_min_file_offset + segment->GetFileSize();
1789 
1790  const lldb::addr_t section_min_file_offset = sect64.offset;
1791  const lldb::addr_t section_max_file_offset =
1792  section_min_file_offset + sect64.size;
1793  const lldb::addr_t new_file_offset =
1794  std::min(section_min_file_offset, segment_min_file_offset);
1795  const lldb::addr_t new_file_size =
1796  std::max(section_max_file_offset, segment_max_file_offset) -
1797  new_file_offset;
1798  segment->SetFileOffset(new_file_offset);
1799  segment->SetFileSize(new_file_size);
1800  }
1801  } else {
1802  // Create a fake section for the section's named segment
1803  segment_sp = std::make_shared<Section>(
1804  segment_sp, // Parent section
1805  module_sp, // Module to which this section belongs
1806  this, // Object file to which this section belongs
1807  ++context.NextSegmentIdx
1808  << 8, // Section ID is the 1 based segment index
1809  // shifted right by 8 bits as not to
1810  // collide with any of the 256 section IDs
1811  // that are possible
1812  const_segname, // Name of this section
1813  eSectionTypeContainer, // This section is a container of
1814  // other sections.
1815  sect64.addr, // File VM address == addresses as they are
1816  // found in the object file
1817  sect64.size, // VM size in bytes of this section
1818  sect64.offset, // Offset to the data for this section in
1819  // the file
1820  sect64.offset ? sect64.size : 0, // Size in bytes of
1821  // this section as
1822  // found in the file
1823  sect64.align,
1824  load_cmd.flags); // Flags for this section
1825  segment_sp->SetIsFake(true);
1826  segment_sp->SetPermissions(segment_permissions);
1827  m_sections_up->AddSection(segment_sp);
1828  if (add_to_unified)
1829  context.UnifiedList.AddSection(segment_sp);
1830  segment_sp->SetIsEncrypted(segment_is_encrypted);
1831  }
1832  }
1833  assert(segment_sp.get());
1834 
1835  lldb::SectionType sect_type = GetSectionType(sect64.flags, section_name);
1836 
1837  SectionSP section_sp(new Section(
1838  segment_sp, module_sp, this, ++context.NextSectionIdx, section_name,
1839  sect_type, sect64.addr - segment_sp->GetFileAddress(), sect64.size,
1840  sect64.offset, sect64.offset == 0 ? 0 : sect64.size, sect64.align,
1841  sect64.flags));
1842  // Set the section to be encrypted to match the segment
1843 
1844  bool section_is_encrypted = false;
1845  if (!segment_is_encrypted && load_cmd.filesize != 0)
1846  section_is_encrypted = context.EncryptedRanges.FindEntryThatContains(
1847  sect64.offset) != nullptr;
1848 
1849  section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted);
1850  section_sp->SetPermissions(segment_permissions);
1851  segment_sp->GetChildren().AddSection(section_sp);
1852 
1853  if (segment_sp->IsFake()) {
1854  segment_sp.reset();
1855  const_segname.Clear();
1856  }
1857  }
1858  }
1859  if (segment_sp && is_dsym) {
1860  if (first_segment_sectID <= context.NextSectionIdx) {
1861  lldb::user_id_t sect_uid;
1862  for (sect_uid = first_segment_sectID; sect_uid <= context.NextSectionIdx;
1863  ++sect_uid) {
1864  SectionSP curr_section_sp(
1865  segment_sp->GetChildren().FindSectionByID(sect_uid));
1866  SectionSP next_section_sp;
1867  if (sect_uid + 1 <= context.NextSectionIdx)
1868  next_section_sp =
1869  segment_sp->GetChildren().FindSectionByID(sect_uid + 1);
1870 
1871  if (curr_section_sp.get()) {
1872  if (curr_section_sp->GetByteSize() == 0) {
1873  if (next_section_sp.get() != nullptr)
1874  curr_section_sp->SetByteSize(next_section_sp->GetFileAddress() -
1875  curr_section_sp->GetFileAddress());
1876  else
1877  curr_section_sp->SetByteSize(load_cmd.vmsize);
1878  }
1879  }
1880  }
1881  }
1882  }
1883 }
1884 
1886  const llvm::MachO::load_command &load_cmd, lldb::offset_t offset) {
1887  m_dysymtab.cmd = load_cmd.cmd;
1888  m_dysymtab.cmdsize = load_cmd.cmdsize;
1889  m_data.GetU32(&offset, &m_dysymtab.ilocalsym,
1890  (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2);
1891 }
1892 
1893 void ObjectFileMachO::CreateSections(SectionList &unified_section_list) {
1894  if (m_sections_up)
1895  return;
1896 
1897  m_sections_up = std::make_unique<SectionList>();
1898 
1900  // bool dump_sections = false;
1901  ModuleSP module_sp(GetModule());
1902 
1903  offset = MachHeaderSizeFromMagic(m_header.magic);
1904 
1905  SegmentParsingContext context(GetEncryptedFileRanges(), unified_section_list);
1906  llvm::MachO::load_command load_cmd;
1907  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1908  const lldb::offset_t load_cmd_offset = offset;
1909  if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
1910  break;
1911 
1912  if (load_cmd.cmd == LC_SEGMENT || load_cmd.cmd == LC_SEGMENT_64)
1913  ProcessSegmentCommand(load_cmd, offset, i, context);
1914  else if (load_cmd.cmd == LC_DYSYMTAB)
1915  ProcessDysymtabCommand(load_cmd, offset);
1916 
1917  offset = load_cmd_offset + load_cmd.cmdsize;
1918  }
1919 
1920  if (context.FileAddressesChanged && module_sp)
1921  module_sp->SectionFileAddressesChanged();
1922 }
1923 
1925 public:
1927  : m_section_list(section_list), m_section_infos() {
1928  // Get the number of sections down to a depth of 1 to include all segments
1929  // and their sections, but no other sections that may be added for debug
1930  // map or
1931  m_section_infos.resize(section_list->GetNumSections(1));
1932  }
1933 
1934  SectionSP GetSection(uint8_t n_sect, addr_t file_addr) {
1935  if (n_sect == 0)
1936  return SectionSP();
1937  if (n_sect < m_section_infos.size()) {
1938  if (!m_section_infos[n_sect].section_sp) {
1939  SectionSP section_sp(m_section_list->FindSectionByID(n_sect));
1940  m_section_infos[n_sect].section_sp = section_sp;
1941  if (section_sp) {
1942  m_section_infos[n_sect].vm_range.SetBaseAddress(
1943  section_sp->GetFileAddress());
1944  m_section_infos[n_sect].vm_range.SetByteSize(
1945  section_sp->GetByteSize());
1946  } else {
1947  std::string filename = "<unknown>";
1948  SectionSP first_section_sp(m_section_list->GetSectionAtIndex(0));
1949  if (first_section_sp)
1950  filename = first_section_sp->GetObjectFile()->GetFileSpec().GetPath();
1951 
1952  Host::SystemLog(Host::eSystemLogError,
1953  "error: unable to find section %d for a symbol in "
1954  "%s, corrupt file?\n",
1955  n_sect, filename.c_str());
1956  }
1957  }
1958  if (m_section_infos[n_sect].vm_range.Contains(file_addr)) {
1959  // Symbol is in section.
1960  return m_section_infos[n_sect].section_sp;
1961  } else if (m_section_infos[n_sect].vm_range.GetByteSize() == 0 &&
1962  m_section_infos[n_sect].vm_range.GetBaseAddress() ==
1963  file_addr) {
1964  // Symbol is in section with zero size, but has the same start address
1965  // as the section. This can happen with linker symbols (symbols that
1966  // start with the letter 'l' or 'L'.
1967  return m_section_infos[n_sect].section_sp;
1968  }
1969  }
1971  }
1972 
1973 protected:
1974  struct SectionInfo {
1976 
1978  SectionSP section_sp;
1979  };
1981  std::vector<SectionInfo> m_section_infos;
1982 };
1983 
1984 #define TRIE_SYMBOL_IS_THUMB (1ULL << 63)
1985 struct TrieEntry {
1986  void Dump() const {
1987  printf("0x%16.16llx 0x%16.16llx 0x%16.16llx \"%s\"",
1988  static_cast<unsigned long long>(address),
1989  static_cast<unsigned long long>(flags),
1990  static_cast<unsigned long long>(other), name.GetCString());
1991  if (import_name)
1992  printf(" -> \"%s\"\n", import_name.GetCString());
1993  else
1994  printf("\n");
1995  }
1998  uint64_t flags =
1999  0; // EXPORT_SYMBOL_FLAGS_REEXPORT, EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER,
2000  // TRIE_SYMBOL_IS_THUMB
2001  uint64_t other = 0;
2003 };
2004 
2008 
2010 
2011  void Dump(uint32_t idx) const {
2012  printf("[%3u] 0x%16.16llx: ", idx,
2013  static_cast<unsigned long long>(nodeOffset));
2014  entry.Dump();
2015  }
2016 
2017  bool operator<(const TrieEntryWithOffset &other) const {
2018  return (nodeOffset < other.nodeOffset);
2019  }
2020 };
2021 
2023  const bool is_arm, addr_t text_seg_base_addr,
2024  std::vector<llvm::StringRef> &nameSlices,
2025  std::set<lldb::addr_t> &resolver_addresses,
2026  std::vector<TrieEntryWithOffset> &reexports,
2027  std::vector<TrieEntryWithOffset> &ext_symbols) {
2028  if (!data.ValidOffset(offset))
2029  return true;
2030 
2031  // Terminal node -- end of a branch, possibly add this to
2032  // the symbol table or resolver table.
2033  const uint64_t terminalSize = data.GetULEB128(&offset);
2034  lldb::offset_t children_offset = offset + terminalSize;
2035  if (terminalSize != 0) {
2036  TrieEntryWithOffset e(offset);
2037  e.entry.flags = data.GetULEB128(&offset);
2038  const char *import_name = nullptr;
2039  if (e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
2040  e.entry.address = 0;
2041  e.entry.other = data.GetULEB128(&offset); // dylib ordinal
2042  import_name = data.GetCStr(&offset);
2043  } else {
2044  e.entry.address = data.GetULEB128(&offset);
2045  if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
2046  e.entry.address += text_seg_base_addr;
2047  if (e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
2048  e.entry.other = data.GetULEB128(&offset);
2049  uint64_t resolver_addr = e.entry.other;
2050  if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
2051  resolver_addr += text_seg_base_addr;
2052  if (is_arm)
2053  resolver_addr &= THUMB_ADDRESS_BIT_MASK;
2054  resolver_addresses.insert(resolver_addr);
2055  } else
2056  e.entry.other = 0;
2057  }
2058  bool add_this_entry = false;
2059  if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT) &&
2060  import_name && import_name[0]) {
2061  // add symbols that are reexport symbols with a valid import name.
2062  add_this_entry = true;
2063  } else if (e.entry.flags == 0 &&
2064  (import_name == nullptr || import_name[0] == '\0')) {
2065  // add externally visible symbols, in case the nlist record has
2066  // been stripped/omitted.
2067  add_this_entry = true;
2068  }
2069  if (add_this_entry) {
2070  std::string name;
2071  if (!nameSlices.empty()) {
2072  for (auto name_slice : nameSlices)
2073  name.append(name_slice.data(), name_slice.size());
2074  }
2075  if (name.size() > 1) {
2076  // Skip the leading '_'
2077  e.entry.name.SetCStringWithLength(name.c_str() + 1, name.size() - 1);
2078  }
2079  if (import_name) {
2080  // Skip the leading '_'
2081  e.entry.import_name.SetCString(import_name + 1);
2082  }
2083  if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT)) {
2084  reexports.push_back(e);
2085  } else {
2086  if (is_arm && (e.entry.address & 1)) {
2089  }
2090  ext_symbols.push_back(e);
2091  }
2092  }
2093  }
2094 
2095  const uint8_t childrenCount = data.GetU8(&children_offset);
2096  for (uint8_t i = 0; i < childrenCount; ++i) {
2097  const char *cstr = data.GetCStr(&children_offset);
2098  if (cstr)
2099  nameSlices.push_back(llvm::StringRef(cstr));
2100  else
2101  return false; // Corrupt data
2102  lldb::offset_t childNodeOffset = data.GetULEB128(&children_offset);
2103  if (childNodeOffset) {
2104  if (!ParseTrieEntries(data, childNodeOffset, is_arm, text_seg_base_addr,
2105  nameSlices, resolver_addresses, reexports,
2106  ext_symbols)) {
2107  return false;
2108  }
2109  }
2110  nameSlices.pop_back();
2111  }
2112  return true;
2113 }
2114 
2115 static SymbolType GetSymbolType(const char *&symbol_name,
2116  bool &demangled_is_synthesized,
2117  const SectionSP &text_section_sp,
2118  const SectionSP &data_section_sp,
2119  const SectionSP &data_dirty_section_sp,
2120  const SectionSP &data_const_section_sp,
2121  const SectionSP &symbol_section) {
2123 
2124  const char *symbol_sect_name = symbol_section->GetName().AsCString();
2125  if (symbol_section->IsDescendant(text_section_sp.get())) {
2126  if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS |
2127  S_ATTR_SELF_MODIFYING_CODE |
2128  S_ATTR_SOME_INSTRUCTIONS))
2129  type = eSymbolTypeData;
2130  else
2131  type = eSymbolTypeCode;
2132  } else if (symbol_section->IsDescendant(data_section_sp.get()) ||
2133  symbol_section->IsDescendant(data_dirty_section_sp.get()) ||
2134  symbol_section->IsDescendant(data_const_section_sp.get())) {
2135  if (symbol_sect_name &&
2136  ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) {
2137  type = eSymbolTypeRuntime;
2138 
2139  if (symbol_name) {
2140  llvm::StringRef symbol_name_ref(symbol_name);
2141  if (symbol_name_ref.startswith("OBJC_")) {
2142  static const llvm::StringRef g_objc_v2_prefix_class("OBJC_CLASS_$_");
2143  static const llvm::StringRef g_objc_v2_prefix_metaclass(
2144  "OBJC_METACLASS_$_");
2145  static const llvm::StringRef g_objc_v2_prefix_ivar("OBJC_IVAR_$_");
2146  if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
2147  symbol_name = symbol_name + g_objc_v2_prefix_class.size();
2148  type = eSymbolTypeObjCClass;
2149  demangled_is_synthesized = true;
2150  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_metaclass)) {
2151  symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
2152  type = eSymbolTypeObjCMetaClass;
2153  demangled_is_synthesized = true;
2154  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_ivar)) {
2155  symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
2156  type = eSymbolTypeObjCIVar;
2157  demangled_is_synthesized = true;
2158  }
2159  }
2160  }
2161  } else if (symbol_sect_name &&
2162  ::strstr(symbol_sect_name, "__gcc_except_tab") ==
2163  symbol_sect_name) {
2164  type = eSymbolTypeException;
2165  } else {
2166  type = eSymbolTypeData;
2167  }
2168  } else if (symbol_sect_name &&
2169  ::strstr(symbol_sect_name, "__IMPORT") == symbol_sect_name) {
2170  type = eSymbolTypeTrampoline;
2171  }
2172  return type;
2173 }
2174 
2175 // Read the UUID out of a dyld_shared_cache file on-disk.
2177  const ByteOrder byte_order,
2178  const uint32_t addr_byte_size) {
2179  UUID dsc_uuid;
2180  DataBufferSP DscData = MapFileData(
2181  dyld_shared_cache, sizeof(struct lldb_copy_dyld_cache_header_v1), 0);
2182  if (!DscData)
2183  return dsc_uuid;
2184  DataExtractor dsc_header_data(DscData, byte_order, addr_byte_size);
2185 
2186  char version_str[7];
2187  lldb::offset_t offset = 0;
2188  memcpy(version_str, dsc_header_data.GetData(&offset, 6), 6);
2189  version_str[6] = '\0';
2190  if (strcmp(version_str, "dyld_v") == 0) {
2191  offset = offsetof(struct lldb_copy_dyld_cache_header_v1, uuid);
2192  dsc_uuid = UUID::fromOptionalData(
2193  dsc_header_data.GetData(&offset, sizeof(uuid_t)), sizeof(uuid_t));
2194  }
2196  if (log && dsc_uuid.IsValid()) {
2197  LLDB_LOGF(log, "Shared cache %s has UUID %s",
2198  dyld_shared_cache.GetPath().c_str(),
2199  dsc_uuid.GetAsString().c_str());
2200  }
2201  return dsc_uuid;
2202 }
2203 
2204 static llvm::Optional<struct nlist_64>
2205 ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset,
2206  size_t nlist_byte_size) {
2207  struct nlist_64 nlist;
2208  if (!nlist_data.ValidOffsetForDataOfSize(nlist_data_offset, nlist_byte_size))
2209  return {};
2210  nlist.n_strx = nlist_data.GetU32_unchecked(&nlist_data_offset);
2211  nlist.n_type = nlist_data.GetU8_unchecked(&nlist_data_offset);
2212  nlist.n_sect = nlist_data.GetU8_unchecked(&nlist_data_offset);
2213  nlist.n_desc = nlist_data.GetU16_unchecked(&nlist_data_offset);
2214  nlist.n_value = nlist_data.GetAddress_unchecked(&nlist_data_offset);
2215  return nlist;
2216 }
2217 
2218 enum { DebugSymbols = true, NonDebugSymbols = false };
2219 
2221  LLDB_SCOPED_TIMERF("ObjectFileMachO::ParseSymtab () module = %s",
2222  m_file.GetFilename().AsCString(""));
2223  ModuleSP module_sp(GetModule());
2224  if (!module_sp)
2225  return;
2226 
2227  Progress progress(llvm::formatv("Parsing symbol table for {0}",
2228  m_file.GetFilename().AsCString("<Unknown>")));
2229 
2230  llvm::MachO::symtab_command symtab_load_command = {0, 0, 0, 0, 0, 0};
2231  llvm::MachO::linkedit_data_command function_starts_load_command = {0, 0, 0, 0};
2232  llvm::MachO::linkedit_data_command exports_trie_load_command = {0, 0, 0, 0};
2233  llvm::MachO::dyld_info_command dyld_info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2234  // The data element of type bool indicates that this entry is thumb
2235  // code.
2236  typedef AddressDataArray<lldb::addr_t, bool, 100> FunctionStarts;
2237 
2238  // Record the address of every function/data that we add to the symtab.
2239  // We add symbols to the table in the order of most information (nlist
2240  // records) to least (function starts), and avoid duplicating symbols
2241  // via this set.
2242  llvm::DenseSet<addr_t> symbols_added;
2243 
2244  // We are using a llvm::DenseSet for "symbols_added" so we must be sure we
2245  // do not add the tombstone or empty keys to the set.
2246  auto add_symbol_addr = [&symbols_added](lldb::addr_t file_addr) {
2247  // Don't add the tombstone or empty keys.
2248  if (file_addr == UINT64_MAX || file_addr == UINT64_MAX - 1)
2249  return;
2250  symbols_added.insert(file_addr);
2251  };
2252  FunctionStarts function_starts;
2254  uint32_t i;
2255  FileSpecList dylib_files;
2257  llvm::StringRef g_objc_v2_prefix_class("_OBJC_CLASS_$_");
2258  llvm::StringRef g_objc_v2_prefix_metaclass("_OBJC_METACLASS_$_");
2259  llvm::StringRef g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
2260 
2261  for (i = 0; i < m_header.ncmds; ++i) {
2262  const lldb::offset_t cmd_offset = offset;
2263  // Read in the load command and load command size
2264  llvm::MachO::load_command lc;
2265  if (m_data.GetU32(&offset, &lc, 2) == nullptr)
2266  break;
2267  // Watch for the symbol table load command
2268  switch (lc.cmd) {
2269  case LC_SYMTAB:
2270  symtab_load_command.cmd = lc.cmd;
2271  symtab_load_command.cmdsize = lc.cmdsize;
2272  // Read in the rest of the symtab load command
2273  if (m_data.GetU32(&offset, &symtab_load_command.symoff, 4) ==
2274  nullptr) // fill in symoff, nsyms, stroff, strsize fields
2275  return;
2276  break;
2277 
2278  case LC_DYLD_INFO:
2279  case LC_DYLD_INFO_ONLY:
2280  if (m_data.GetU32(&offset, &dyld_info.rebase_off, 10)) {
2281  dyld_info.cmd = lc.cmd;
2282  dyld_info.cmdsize = lc.cmdsize;
2283  } else {
2284  memset(&dyld_info, 0, sizeof(dyld_info));
2285  }
2286  break;
2287 
2288  case LC_LOAD_DYLIB:
2289  case LC_LOAD_WEAK_DYLIB:
2290  case LC_REEXPORT_DYLIB:
2291  case LC_LOADFVMLIB:
2292  case LC_LOAD_UPWARD_DYLIB: {
2293  uint32_t name_offset = cmd_offset + m_data.GetU32(&offset);
2294  const char *path = m_data.PeekCStr(name_offset);
2295  if (path) {
2296  FileSpec file_spec(path);
2297  // Strip the path if there is @rpath, @executable, etc so we just use
2298  // the basename
2299  if (path[0] == '@')
2300  file_spec.GetDirectory().Clear();
2301 
2302  if (lc.cmd == LC_REEXPORT_DYLIB) {
2303  m_reexported_dylibs.AppendIfUnique(file_spec);
2304  }
2305 
2306  dylib_files.Append(file_spec);
2307  }
2308  } break;
2309 
2310  case LC_DYLD_EXPORTS_TRIE:
2311  exports_trie_load_command.cmd = lc.cmd;
2312  exports_trie_load_command.cmdsize = lc.cmdsize;
2313  if (m_data.GetU32(&offset, &exports_trie_load_command.dataoff, 2) ==
2314  nullptr) // fill in offset and size fields
2315  memset(&exports_trie_load_command, 0,
2316  sizeof(exports_trie_load_command));
2317  break;
2318  case LC_FUNCTION_STARTS:
2319  function_starts_load_command.cmd = lc.cmd;
2320  function_starts_load_command.cmdsize = lc.cmdsize;
2321  if (m_data.GetU32(&offset, &function_starts_load_command.dataoff, 2) ==
2322  nullptr) // fill in data offset and size fields
2323  memset(&function_starts_load_command, 0,
2324  sizeof(function_starts_load_command));
2325  break;
2326 
2327  default:
2328  break;
2329  }
2330  offset = cmd_offset + lc.cmdsize;
2331  }
2332 
2333  if (!symtab_load_command.cmd)
2334  return;
2335 
2336  SectionList *section_list = GetSectionList();
2337  if (section_list == nullptr)
2338  return;
2339 
2340  const uint32_t addr_byte_size = m_data.GetAddressByteSize();
2341  const ByteOrder byte_order = m_data.GetByteOrder();
2342  bool bit_width_32 = addr_byte_size == 4;
2343  const size_t nlist_byte_size =
2344  bit_width_32 ? sizeof(struct nlist) : sizeof(struct nlist_64);
2345 
2346  DataExtractor nlist_data(nullptr, 0, byte_order, addr_byte_size);
2347  DataExtractor strtab_data(nullptr, 0, byte_order, addr_byte_size);
2348  DataExtractor function_starts_data(nullptr, 0, byte_order, addr_byte_size);
2349  DataExtractor indirect_symbol_index_data(nullptr, 0, byte_order,
2350  addr_byte_size);
2351  DataExtractor dyld_trie_data(nullptr, 0, byte_order, addr_byte_size);
2352 
2353  const addr_t nlist_data_byte_size =
2354  symtab_load_command.nsyms * nlist_byte_size;
2355  const addr_t strtab_data_byte_size = symtab_load_command.strsize;
2356  addr_t strtab_addr = LLDB_INVALID_ADDRESS;
2357 
2358  ProcessSP process_sp(m_process_wp.lock());
2359  Process *process = process_sp.get();
2360 
2361  uint32_t memory_module_load_level = eMemoryModuleLoadLevelComplete;
2362  bool is_shared_cache_image = IsSharedCacheBinary();
2363  bool is_local_shared_cache_image = is_shared_cache_image && !IsInMemory();
2364  SectionSP linkedit_section_sp(
2365  section_list->FindSectionByName(GetSegmentNameLINKEDIT()));
2366 
2367  if (process && m_header.filetype != llvm::MachO::MH_OBJECT &&
2368  !is_local_shared_cache_image) {
2369  Target &target = process->GetTarget();
2370 
2371  memory_module_load_level = target.GetMemoryModuleLoadLevel();
2372 
2373  // Reading mach file from memory in a process or core file...
2374 
2375  if (linkedit_section_sp) {
2376  addr_t linkedit_load_addr =
2377  linkedit_section_sp->GetLoadBaseAddress(&target);
2378  if (linkedit_load_addr == LLDB_INVALID_ADDRESS) {
2379  // We might be trying to access the symbol table before the
2380  // __LINKEDIT's load address has been set in the target. We can't
2381  // fail to read the symbol table, so calculate the right address
2382  // manually
2383  linkedit_load_addr = CalculateSectionLoadAddressForMemoryImage(
2384  m_memory_addr, GetMachHeaderSection(), linkedit_section_sp.get());
2385  }
2386 
2387  const addr_t linkedit_file_offset = linkedit_section_sp->GetFileOffset();
2388  const addr_t symoff_addr = linkedit_load_addr +
2389  symtab_load_command.symoff -
2390  linkedit_file_offset;
2391  strtab_addr = linkedit_load_addr + symtab_load_command.stroff -
2392  linkedit_file_offset;
2393 
2394  // Always load dyld - the dynamic linker - from memory if we didn't
2395  // find a binary anywhere else. lldb will not register
2396  // dylib/framework/bundle loads/unloads if we don't have the dyld
2397  // symbols, we force dyld to load from memory despite the user's
2398  // target.memory-module-load-level setting.
2399  if (memory_module_load_level == eMemoryModuleLoadLevelComplete ||
2400  m_header.filetype == llvm::MachO::MH_DYLINKER) {
2401  DataBufferSP nlist_data_sp(
2402  ReadMemory(process_sp, symoff_addr, nlist_data_byte_size));
2403  if (nlist_data_sp)
2404  nlist_data.SetData(nlist_data_sp, 0, nlist_data_sp->GetByteSize());
2405  if (m_dysymtab.nindirectsyms != 0) {
2406  const addr_t indirect_syms_addr = linkedit_load_addr +
2407  m_dysymtab.indirectsymoff -
2408  linkedit_file_offset;
2409  DataBufferSP indirect_syms_data_sp(ReadMemory(
2410  process_sp, indirect_syms_addr, m_dysymtab.nindirectsyms * 4));
2411  if (indirect_syms_data_sp)
2412  indirect_symbol_index_data.SetData(
2413  indirect_syms_data_sp, 0,
2414  indirect_syms_data_sp->GetByteSize());
2415  // If this binary is outside the shared cache,
2416  // cache the string table.
2417  // Binaries in the shared cache all share a giant string table,
2418  // and we can't share the string tables across multiple
2419  // ObjectFileMachO's, so we'd end up re-reading this mega-strtab
2420  // for every binary in the shared cache - it would be a big perf
2421  // problem. For binaries outside the shared cache, it's faster to
2422  // read the entire strtab at once instead of piece-by-piece as we
2423  // process the nlist records.
2424  if (!is_shared_cache_image) {
2425  DataBufferSP strtab_data_sp(
2426  ReadMemory(process_sp, strtab_addr, strtab_data_byte_size));
2427  if (strtab_data_sp) {
2428  strtab_data.SetData(strtab_data_sp, 0,
2429  strtab_data_sp->GetByteSize());
2430  }
2431  }
2432  }
2433  if (memory_module_load_level >= eMemoryModuleLoadLevelPartial) {
2434  if (function_starts_load_command.cmd) {
2435  const addr_t func_start_addr =
2436  linkedit_load_addr + function_starts_load_command.dataoff -
2437  linkedit_file_offset;
2438  DataBufferSP func_start_data_sp(
2439  ReadMemory(process_sp, func_start_addr,
2440  function_starts_load_command.datasize));
2441  if (func_start_data_sp)
2442  function_starts_data.SetData(func_start_data_sp, 0,
2443  func_start_data_sp->GetByteSize());
2444  }
2445  }
2446  }
2447  }
2448  } else {
2449  if (is_local_shared_cache_image) {
2450  // The load commands in shared cache images are relative to the
2451  // beginning of the shared cache, not the library image. The
2452  // data we get handed when creating the ObjectFileMachO starts
2453  // at the beginning of a specific library and spans to the end
2454  // of the cache to be able to reach the shared LINKEDIT
2455  // segments. We need to convert the load command offsets to be
2456  // relative to the beginning of our specific image.
2457  lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset();
2458  lldb::offset_t linkedit_slide =
2459  linkedit_offset - m_linkedit_original_offset;
2460  symtab_load_command.symoff += linkedit_slide;
2461  symtab_load_command.stroff += linkedit_slide;
2462  dyld_info.export_off += linkedit_slide;
2463  m_dysymtab.indirectsymoff += linkedit_slide;
2464  function_starts_load_command.dataoff += linkedit_slide;
2465  exports_trie_load_command.dataoff += linkedit_slide;
2466  }
2467 
2468  nlist_data.SetData(m_data, symtab_load_command.symoff,
2469  nlist_data_byte_size);
2470  strtab_data.SetData(m_data, symtab_load_command.stroff,
2471  strtab_data_byte_size);
2472 
2473  // We shouldn't have exports data from both the LC_DYLD_INFO command
2474  // AND the LC_DYLD_EXPORTS_TRIE command in the same binary:
2475  lldbassert(!((dyld_info.export_size > 0)
2476  && (exports_trie_load_command.datasize > 0)));
2477  if (dyld_info.export_size > 0) {
2478  dyld_trie_data.SetData(m_data, dyld_info.export_off,
2479  dyld_info.export_size);
2480  } else if (exports_trie_load_command.datasize > 0) {
2481  dyld_trie_data.SetData(m_data, exports_trie_load_command.dataoff,
2482  exports_trie_load_command.datasize);
2483  }
2484 
2485  if (m_dysymtab.nindirectsyms != 0) {
2486  indirect_symbol_index_data.SetData(m_data, m_dysymtab.indirectsymoff,
2487  m_dysymtab.nindirectsyms * 4);
2488  }
2489  if (function_starts_load_command.cmd) {
2490  function_starts_data.SetData(m_data, function_starts_load_command.dataoff,
2491  function_starts_load_command.datasize);
2492  }
2493  }
2494 
2495  const bool have_strtab_data = strtab_data.GetByteSize() > 0;
2496 
2497  ConstString g_segment_name_TEXT = GetSegmentNameTEXT();
2498  ConstString g_segment_name_DATA = GetSegmentNameDATA();
2499  ConstString g_segment_name_DATA_DIRTY = GetSegmentNameDATA_DIRTY();
2500  ConstString g_segment_name_DATA_CONST = GetSegmentNameDATA_CONST();
2501  ConstString g_segment_name_OBJC = GetSegmentNameOBJC();
2502  ConstString g_section_name_eh_frame = GetSectionNameEHFrame();
2503  SectionSP text_section_sp(
2504  section_list->FindSectionByName(g_segment_name_TEXT));
2505  SectionSP data_section_sp(
2506  section_list->FindSectionByName(g_segment_name_DATA));
2507  SectionSP data_dirty_section_sp(
2508  section_list->FindSectionByName(g_segment_name_DATA_DIRTY));
2509  SectionSP data_const_section_sp(
2510  section_list->FindSectionByName(g_segment_name_DATA_CONST));
2511  SectionSP objc_section_sp(
2512  section_list->FindSectionByName(g_segment_name_OBJC));
2513  SectionSP eh_frame_section_sp;
2514  if (text_section_sp.get())
2515  eh_frame_section_sp = text_section_sp->GetChildren().FindSectionByName(
2516  g_section_name_eh_frame);
2517  else
2518  eh_frame_section_sp =
2519  section_list->FindSectionByName(g_section_name_eh_frame);
2520 
2521  const bool is_arm = (m_header.cputype == llvm::MachO::CPU_TYPE_ARM);
2522  const bool always_thumb = GetArchitecture().IsAlwaysThumbInstructions();
2523 
2524  // lldb works best if it knows the start address of all functions in a
2525  // module. Linker symbols or debug info are normally the best source of
2526  // information for start addr / size but they may be stripped in a released
2527  // binary. Two additional sources of information exist in Mach-O binaries:
2528  // LC_FUNCTION_STARTS - a list of ULEB128 encoded offsets of each
2529  // function's start address in the
2530  // binary, relative to the text section.
2531  // eh_frame - the eh_frame FDEs have the start addr & size of
2532  // each function
2533  // LC_FUNCTION_STARTS is the fastest source to read in, and is present on
2534  // all modern binaries.
2535  // Binaries built to run on older releases may need to use eh_frame
2536  // information.
2537 
2538  if (text_section_sp && function_starts_data.GetByteSize()) {
2539  FunctionStarts::Entry function_start_entry;
2540  function_start_entry.data = false;
2541  lldb::offset_t function_start_offset = 0;
2542  function_start_entry.addr = text_section_sp->GetFileAddress();
2543  uint64_t delta;
2544  while ((delta = function_starts_data.GetULEB128(&function_start_offset)) >
2545  0) {
2546  // Now append the current entry
2547  function_start_entry.addr += delta;
2548  if (is_arm) {
2549  if (function_start_entry.addr & 1) {
2550  function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2551  function_start_entry.data = true;
2552  } else if (always_thumb) {
2553  function_start_entry.data = true;
2554  }
2555  }
2556  function_starts.Append(function_start_entry);
2557  }
2558  } else {
2559  // If m_type is eTypeDebugInfo, then this is a dSYM - it will have the
2560  // load command claiming an eh_frame but it doesn't actually have the
2561  // eh_frame content. And if we have a dSYM, we don't need to do any of
2562  // this fill-in-the-missing-symbols works anyway - the debug info should
2563  // give us all the functions in the module.
2564  if (text_section_sp.get() && eh_frame_section_sp.get() &&
2565  m_type != eTypeDebugInfo) {
2566  DWARFCallFrameInfo eh_frame(*this, eh_frame_section_sp,
2567  DWARFCallFrameInfo::EH);
2569  eh_frame.GetFunctionAddressAndSizeVector(functions);
2570  addr_t text_base_addr = text_section_sp->GetFileAddress();
2571  size_t count = functions.GetSize();
2572  for (size_t i = 0; i < count; ++i) {
2574  functions.GetEntryAtIndex(i);
2575  if (func) {
2576  FunctionStarts::Entry function_start_entry;
2577  function_start_entry.addr = func->base - text_base_addr;
2578  if (is_arm) {
2579  if (function_start_entry.addr & 1) {
2580  function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2581  function_start_entry.data = true;
2582  } else if (always_thumb) {
2583  function_start_entry.data = true;
2584  }
2585  }
2586  function_starts.Append(function_start_entry);
2587  }
2588  }
2589  }
2590  }
2591 
2592  const size_t function_starts_count = function_starts.GetSize();
2593 
2594  // For user process binaries (executables, dylibs, frameworks, bundles), if
2595  // we don't have LC_FUNCTION_STARTS/eh_frame section in this binary, we're
2596  // going to assume the binary has been stripped. Don't allow assembly
2597  // language instruction emulation because we don't know proper function
2598  // start boundaries.
2599  //
2600  // For all other types of binaries (kernels, stand-alone bare board
2601  // binaries, kexts), they may not have LC_FUNCTION_STARTS / eh_frame
2602  // sections - we should not make any assumptions about them based on that.
2603  if (function_starts_count == 0 && CalculateStrata() == eStrataUser) {
2605  Log *unwind_or_symbol_log(lldb_private::GetLogIfAnyCategoriesSet(
2607 
2608  if (unwind_or_symbol_log)
2609  module_sp->LogMessage(
2610  unwind_or_symbol_log,
2611  "no LC_FUNCTION_STARTS, will not allow assembly profiled unwinds");
2612  }
2613 
2614  const user_id_t TEXT_eh_frame_sectID = eh_frame_section_sp.get()
2615  ? eh_frame_section_sp->GetID()
2616  : static_cast<user_id_t>(NO_SECT);
2617 
2618  lldb::offset_t nlist_data_offset = 0;
2619 
2620  uint32_t N_SO_index = UINT32_MAX;
2621 
2622  MachSymtabSectionInfo section_info(section_list);
2623  std::vector<uint32_t> N_FUN_indexes;
2624  std::vector<uint32_t> N_NSYM_indexes;
2625  std::vector<uint32_t> N_INCL_indexes;
2626  std::vector<uint32_t> N_BRAC_indexes;
2627  std::vector<uint32_t> N_COMM_indexes;
2628  typedef std::multimap<uint64_t, uint32_t> ValueToSymbolIndexMap;
2629  typedef llvm::DenseMap<uint32_t, uint32_t> NListIndexToSymbolIndexMap;
2630  typedef llvm::DenseMap<const char *, uint32_t> ConstNameToSymbolIndexMap;
2631  ValueToSymbolIndexMap N_FUN_addr_to_sym_idx;
2632  ValueToSymbolIndexMap N_STSYM_addr_to_sym_idx;
2633  ConstNameToSymbolIndexMap N_GSYM_name_to_sym_idx;
2634  // Any symbols that get merged into another will get an entry in this map
2635  // so we know
2636  NListIndexToSymbolIndexMap m_nlist_idx_to_sym_idx;
2637  uint32_t nlist_idx = 0;
2638  Symbol *symbol_ptr = nullptr;
2639 
2640  uint32_t sym_idx = 0;
2641  Symbol *sym = nullptr;
2642  size_t num_syms = 0;
2643  std::string memory_symbol_name;
2644  uint32_t unmapped_local_symbols_found = 0;
2645 
2646  std::vector<TrieEntryWithOffset> reexport_trie_entries;
2647  std::vector<TrieEntryWithOffset> external_sym_trie_entries;
2648  std::set<lldb::addr_t> resolver_addresses;
2649 
2650  if (dyld_trie_data.GetByteSize() > 0) {
2651  ConstString text_segment_name("__TEXT");
2652  SectionSP text_segment_sp =
2653  GetSectionList()->FindSectionByName(text_segment_name);
2654  lldb::addr_t text_segment_file_addr = LLDB_INVALID_ADDRESS;
2655  if (text_segment_sp)
2656  text_segment_file_addr = text_segment_sp->GetFileAddress();
2657  std::vector<llvm::StringRef> nameSlices;
2658  ParseTrieEntries(dyld_trie_data, 0, is_arm, text_segment_file_addr,
2659  nameSlices, resolver_addresses, reexport_trie_entries,
2660  external_sym_trie_entries);
2661  }
2662 
2663  typedef std::set<ConstString> IndirectSymbols;
2664  IndirectSymbols indirect_symbol_names;
2665 
2666 #if TARGET_OS_IPHONE
2667 
2668  // Some recent builds of the dyld_shared_cache (hereafter: DSC) have been
2669  // optimized by moving LOCAL symbols out of the memory mapped portion of
2670  // the DSC. The symbol information has all been retained, but it isn't
2671  // available in the normal nlist data. However, there *are* duplicate
2672  // entries of *some*
2673  // LOCAL symbols in the normal nlist data. To handle this situation
2674  // correctly, we must first attempt
2675  // to parse any DSC unmapped symbol information. If we find any, we set a
2676  // flag that tells the normal nlist parser to ignore all LOCAL symbols.
2677 
2678  if (IsSharedCacheBinary()) {
2679  // Before we can start mapping the DSC, we need to make certain the
2680  // target process is actually using the cache we can find.
2681 
2682  // Next we need to determine the correct path for the dyld shared cache.
2683 
2684  ArchSpec header_arch = GetArchitecture();
2685  char dsc_path[PATH_MAX];
2686  char dsc_path_development[PATH_MAX];
2687 
2688  snprintf(
2689  dsc_path, sizeof(dsc_path), "%s%s%s",
2690  "/System/Library/Caches/com.apple.dyld/", /* IPHONE_DYLD_SHARED_CACHE_DIR
2691  */
2692  "dyld_shared_cache_", /* DYLD_SHARED_CACHE_BASE_NAME */
2693  header_arch.GetArchitectureName());
2694 
2695  snprintf(
2696  dsc_path_development, sizeof(dsc_path), "%s%s%s%s",
2697  "/System/Library/Caches/com.apple.dyld/", /* IPHONE_DYLD_SHARED_CACHE_DIR
2698  */
2699  "dyld_shared_cache_", /* DYLD_SHARED_CACHE_BASE_NAME */
2700  header_arch.GetArchitectureName(), ".development");
2701 
2702  FileSpec dsc_nondevelopment_filespec(dsc_path);
2703  FileSpec dsc_development_filespec(dsc_path_development);
2704  FileSpec dsc_filespec;
2705 
2706  UUID dsc_uuid;
2707  UUID process_shared_cache_uuid;
2708  addr_t process_shared_cache_base_addr;
2709 
2710  if (process) {
2711  GetProcessSharedCacheUUID(process, process_shared_cache_base_addr,
2712  process_shared_cache_uuid);
2713  }
2714 
2715  // First see if we can find an exact match for the inferior process
2716  // shared cache UUID in the development or non-development shared caches
2717  // on disk.
2718  if (process_shared_cache_uuid.IsValid()) {
2719  if (FileSystem::Instance().Exists(dsc_development_filespec)) {
2720  UUID dsc_development_uuid = GetSharedCacheUUID(
2721  dsc_development_filespec, byte_order, addr_byte_size);
2722  if (dsc_development_uuid.IsValid() &&
2723  dsc_development_uuid == process_shared_cache_uuid) {
2724  dsc_filespec = dsc_development_filespec;
2725  dsc_uuid = dsc_development_uuid;
2726  }
2727  }
2728  if (!dsc_uuid.IsValid() &&
2729  FileSystem::Instance().Exists(dsc_nondevelopment_filespec)) {
2730  UUID dsc_nondevelopment_uuid = GetSharedCacheUUID(
2731  dsc_nondevelopment_filespec, byte_order, addr_byte_size);
2732  if (dsc_nondevelopment_uuid.IsValid() &&
2733  dsc_nondevelopment_uuid == process_shared_cache_uuid) {
2734  dsc_filespec = dsc_nondevelopment_filespec;
2735  dsc_uuid = dsc_nondevelopment_uuid;
2736  }
2737  }
2738  }
2739 
2740  // Failing a UUID match, prefer the development dyld_shared cache if both
2741  // are present.
2742  if (!FileSystem::Instance().Exists(dsc_filespec)) {
2743  if (FileSystem::Instance().Exists(dsc_development_filespec)) {
2744  dsc_filespec = dsc_development_filespec;
2745  } else {
2746  dsc_filespec = dsc_nondevelopment_filespec;
2747  }
2748  }
2749 
2750  /* The dyld_cache_header has a pointer to the
2751  dyld_cache_local_symbols_info structure (localSymbolsOffset).
2752  The dyld_cache_local_symbols_info structure gives us three things:
2753  1. The start and count of the nlist records in the dyld_shared_cache
2754  file
2755  2. The start and size of the strings for these nlist records
2756  3. The start and count of dyld_cache_local_symbols_entry entries
2757 
2758  There is one dyld_cache_local_symbols_entry per dylib/framework in the
2759  dyld shared cache.
2760  The "dylibOffset" field is the Mach-O header of this dylib/framework in
2761  the dyld shared cache.
2762  The dyld_cache_local_symbols_entry also lists the start of this
2763  dylib/framework's nlist records
2764  and the count of how many nlist records there are for this
2765  dylib/framework.
2766  */
2767 
2768  // Process the dyld shared cache header to find the unmapped symbols
2769 
2770  DataBufferSP dsc_data_sp = MapFileData(
2771  dsc_filespec, sizeof(struct lldb_copy_dyld_cache_header_v1), 0);
2772  if (!dsc_uuid.IsValid()) {
2773  dsc_uuid = GetSharedCacheUUID(dsc_filespec, byte_order, addr_byte_size);
2774  }
2775  if (dsc_data_sp) {
2776  DataExtractor dsc_header_data(dsc_data_sp, byte_order, addr_byte_size);
2777 
2778  bool uuid_match = true;
2779  if (dsc_uuid.IsValid() && process) {
2780  if (process_shared_cache_uuid.IsValid() &&
2781  dsc_uuid != process_shared_cache_uuid) {
2782  // The on-disk dyld_shared_cache file is not the same as the one in
2783  // this process' memory, don't use it.
2784  uuid_match = false;
2785  ModuleSP module_sp(GetModule());
2786  if (module_sp)
2787  module_sp->ReportWarning("process shared cache does not match "
2788  "on-disk dyld_shared_cache file, some "
2789  "symbol names will be missing.");
2790  }
2791  }
2792 
2793  offset = offsetof(struct lldb_copy_dyld_cache_header_v1, mappingOffset);
2794 
2795  uint32_t mappingOffset = dsc_header_data.GetU32(&offset);
2796 
2797  // If the mappingOffset points to a location inside the header, we've
2798  // opened an old dyld shared cache, and should not proceed further.
2799  if (uuid_match &&
2800  mappingOffset >= sizeof(struct lldb_copy_dyld_cache_header_v1)) {
2801 
2802  DataBufferSP dsc_mapping_info_data_sp = MapFileData(
2803  dsc_filespec, sizeof(struct lldb_copy_dyld_cache_mapping_info),
2804  mappingOffset);
2805 
2806  DataExtractor dsc_mapping_info_data(dsc_mapping_info_data_sp,
2807  byte_order, addr_byte_size);
2808  offset = 0;
2809 
2810  // The File addresses (from the in-memory Mach-O load commands) for
2811  // the shared libraries in the shared library cache need to be
2812  // adjusted by an offset to match up with the dylibOffset identifying
2813  // field in the dyld_cache_local_symbol_entry's. This offset is
2814  // recorded in mapping_offset_value.
2815  const uint64_t mapping_offset_value =
2816  dsc_mapping_info_data.GetU64(&offset);
2817 
2818  offset =
2819  offsetof(struct lldb_copy_dyld_cache_header_v1, localSymbolsOffset);
2820  uint64_t localSymbolsOffset = dsc_header_data.GetU64(&offset);
2821  uint64_t localSymbolsSize = dsc_header_data.GetU64(&offset);
2822 
2823  if (localSymbolsOffset && localSymbolsSize) {
2824  // Map the local symbols
2825  DataBufferSP dsc_local_symbols_data_sp =
2826  MapFileData(dsc_filespec, localSymbolsSize, localSymbolsOffset);
2827 
2828  if (dsc_local_symbols_data_sp) {
2829  DataExtractor dsc_local_symbols_data(dsc_local_symbols_data_sp,
2830  byte_order, addr_byte_size);
2831 
2832  offset = 0;
2833 
2834  typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
2835  typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
2836  UndefinedNameToDescMap undefined_name_to_desc;
2837  SymbolIndexToName reexport_shlib_needs_fixup;
2838 
2839  // Read the local_symbols_infos struct in one shot
2840  struct lldb_copy_dyld_cache_local_symbols_info local_symbols_info;
2841  dsc_local_symbols_data.GetU32(&offset,
2842  &local_symbols_info.nlistOffset, 6);
2843 
2844  SectionSP text_section_sp(
2845  section_list->FindSectionByName(GetSegmentNameTEXT()));
2846 
2847  uint32_t header_file_offset =
2848  (text_section_sp->GetFileAddress() - mapping_offset_value);
2849 
2850  offset = local_symbols_info.entriesOffset;
2851  for (uint32_t entry_index = 0;
2852  entry_index < local_symbols_info.entriesCount; entry_index++) {
2854  local_symbols_entry;
2855  local_symbols_entry.dylibOffset =
2856  dsc_local_symbols_data.GetU32(&offset);
2857  local_symbols_entry.nlistStartIndex =
2858  dsc_local_symbols_data.GetU32(&offset);
2859  local_symbols_entry.nlistCount =
2860  dsc_local_symbols_data.GetU32(&offset);
2861 
2862  if (header_file_offset == local_symbols_entry.dylibOffset) {
2863  unmapped_local_symbols_found = local_symbols_entry.nlistCount;
2864 
2865  // The normal nlist code cannot correctly size the Symbols
2866  // array, we need to allocate it here.
2867  sym = symtab.Resize(
2868  symtab_load_command.nsyms + m_dysymtab.nindirectsyms +
2869  unmapped_local_symbols_found - m_dysymtab.nlocalsym);
2870  num_syms = symtab.GetNumSymbols();
2871 
2872  nlist_data_offset =
2873  local_symbols_info.nlistOffset +
2874  (nlist_byte_size * local_symbols_entry.nlistStartIndex);
2875  uint32_t string_table_offset = local_symbols_info.stringsOffset;
2876 
2877  for (uint32_t nlist_index = 0;
2878  nlist_index < local_symbols_entry.nlistCount;
2879  nlist_index++) {
2880  /////////////////////////////
2881  {
2882  llvm::Optional<struct nlist_64> nlist_maybe =
2883  ParseNList(dsc_local_symbols_data, nlist_data_offset,
2884  nlist_byte_size);
2885  if (!nlist_maybe)
2886  break;
2887  struct nlist_64 nlist = *nlist_maybe;
2888 
2890  const char *symbol_name = dsc_local_symbols_data.PeekCStr(
2891  string_table_offset + nlist.n_strx);
2892 
2893  if (symbol_name == NULL) {
2894  // No symbol should be NULL, even the symbols with no
2895  // string values should have an offset zero which
2896  // points to an empty C-string
2897  Host::SystemLog(
2898  Host::eSystemLogError,
2899  "error: DSC unmapped local symbol[%u] has invalid "
2900  "string table offset 0x%x in %s, ignoring symbol\n",
2901  entry_index, nlist.n_strx,
2902  module_sp->GetFileSpec().GetPath().c_str());
2903  continue;
2904  }
2905  if (symbol_name[0] == '\0')
2906  symbol_name = NULL;
2907 
2908  const char *symbol_name_non_abi_mangled = NULL;
2909 
2910  SectionSP symbol_section;
2911  uint32_t symbol_byte_size = 0;
2912  bool add_nlist = true;
2913  bool is_debug = ((nlist.n_type & N_STAB) != 0);
2914  bool demangled_is_synthesized = false;
2915  bool is_gsym = false;
2916  bool set_value = true;
2917 
2918  assert(sym_idx < num_syms);
2919 
2920  sym[sym_idx].SetDebug(is_debug);
2921 
2922  if (is_debug) {
2923  switch (nlist.n_type) {
2924  case N_GSYM:
2925  // global symbol: name,,NO_SECT,type,0
2926  // Sometimes the N_GSYM value contains the address.
2927 
2928  // FIXME: In the .o files, we have a GSYM and a debug
2929  // symbol for all the ObjC data. They
2930  // have the same address, but we want to ensure that
2931  // we always find only the real symbol, 'cause we
2932  // don't currently correctly attribute the
2933  // GSYM one to the ObjCClass/Ivar/MetaClass
2934  // symbol type. This is a temporary hack to make
2935  // sure the ObjectiveC symbols get treated correctly.
2936  // To do this right, we should coalesce all the GSYM
2937  // & global symbols that have the same address.
2938 
2939  is_gsym = true;
2940  sym[sym_idx].SetExternal(true);
2941 
2942  if (symbol_name && symbol_name[0] == '_' &&
2943  symbol_name[1] == 'O') {
2944  llvm::StringRef symbol_name_ref(symbol_name);
2945  if (symbol_name_ref.startswith(
2946  g_objc_v2_prefix_class)) {
2947  symbol_name_non_abi_mangled = symbol_name + 1;
2948  symbol_name =
2949  symbol_name + g_objc_v2_prefix_class.size();
2950  type = eSymbolTypeObjCClass;
2951  demangled_is_synthesized = true;
2952 
2953  } else if (symbol_name_ref.startswith(
2954  g_objc_v2_prefix_metaclass)) {
2955  symbol_name_non_abi_mangled = symbol_name + 1;
2956  symbol_name =
2957  symbol_name + g_objc_v2_prefix_metaclass.size();
2958  type = eSymbolTypeObjCMetaClass;
2959  demangled_is_synthesized = true;
2960  } else if (symbol_name_ref.startswith(
2961  g_objc_v2_prefix_ivar)) {
2962  symbol_name_non_abi_mangled = symbol_name + 1;
2963  symbol_name =
2964  symbol_name + g_objc_v2_prefix_ivar.size();
2965  type = eSymbolTypeObjCIVar;
2966  demangled_is_synthesized = true;
2967  }
2968  } else {
2969  if (nlist.n_value != 0)
2970  symbol_section = section_info.GetSection(
2971  nlist.n_sect, nlist.n_value);
2972  type = eSymbolTypeData;
2973  }
2974  break;
2975 
2976  case N_FNAME:
2977  // procedure name (f77 kludge): name,,NO_SECT,0,0
2978  type = eSymbolTypeCompiler;
2979  break;
2980 
2981  case N_FUN:
2982  // procedure: name,,n_sect,linenumber,address
2983  if (symbol_name) {
2984  type = eSymbolTypeCode;
2985  symbol_section = section_info.GetSection(
2986  nlist.n_sect, nlist.n_value);
2987 
2988  N_FUN_addr_to_sym_idx.insert(
2989  std::make_pair(nlist.n_value, sym_idx));
2990  // We use the current number of symbols in the
2991  // symbol table in lieu of using nlist_idx in case
2992  // we ever start trimming entries out
2993  N_FUN_indexes.push_back(sym_idx);
2994  } else {
2995  type = eSymbolTypeCompiler;
2996 
2997  if (!N_FUN_indexes.empty()) {
2998  // Copy the size of the function into the
2999  // original
3000  // STAB entry so we don't have
3001  // to hunt for it later
3002  symtab.SymbolAtIndex(N_FUN_indexes.back())
3003  ->SetByteSize(nlist.n_value);
3004  N_FUN_indexes.pop_back();
3005  // We don't really need the end function STAB as
3006  // it contains the size which we already placed
3007  // with the original symbol, so don't add it if
3008  // we want a minimal symbol table
3009  add_nlist = false;
3010  }
3011  }
3012  break;
3013 
3014  case N_STSYM:
3015  // static symbol: name,,n_sect,type,address
3016  N_STSYM_addr_to_sym_idx.insert(
3017  std::make_pair(nlist.n_value, sym_idx));
3018  symbol_section = section_info.GetSection(nlist.n_sect,
3019  nlist.n_value);
3020  if (symbol_name && symbol_name[0]) {
3021  type = ObjectFile::GetSymbolTypeFromName(
3022  symbol_name + 1, eSymbolTypeData);
3023  }
3024  break;
3025 
3026  case N_LCSYM:
3027  // .lcomm symbol: name,,n_sect,type,address
3028  symbol_section = section_info.GetSection(nlist.n_sect,
3029  nlist.n_value);
3030  type = eSymbolTypeCommonBlock;
3031  break;
3032 
3033  case N_BNSYM:
3034  // We use the current number of symbols in the symbol
3035  // table in lieu of using nlist_idx in case we ever
3036  // start trimming entries out Skip these if we want
3037  // minimal symbol tables
3038  add_nlist = false;
3039  break;
3040 
3041  case N_ENSYM:
3042  // Set the size of the N_BNSYM to the terminating
3043  // index of this N_ENSYM so that we can always skip
3044  // the entire symbol if we need to navigate more
3045  // quickly at the source level when parsing STABS
3046  // Skip these if we want minimal symbol tables
3047  add_nlist = false;
3048  break;
3049 
3050  case N_OPT:
3051  // emitted with gcc2_compiled and in gcc source
3052  type = eSymbolTypeCompiler;
3053  break;
3054 
3055  case N_RSYM:
3056  // register sym: name,,NO_SECT,type,register
3057  type = eSymbolTypeVariable;
3058  break;
3059 
3060  case N_SLINE:
3061  // src line: 0,,n_sect,linenumber,address
3062  symbol_section = section_info.GetSection(nlist.n_sect,
3063  nlist.n_value);
3064  type = eSymbolTypeLineEntry;
3065  break;
3066 
3067  case N_SSYM:
3068  // structure elt: name,,NO_SECT,type,struct_offset
3069  type = eSymbolTypeVariableType;
3070  break;
3071 
3072  case N_SO:
3073  // source file name
3074  type = eSymbolTypeSourceFile;
3075  if (symbol_name == NULL) {
3076  add_nlist = false;
3077  if (N_SO_index != UINT32_MAX) {
3078  // Set the size of the N_SO to the terminating
3079  // index of this N_SO so that we can always skip
3080  // the entire N_SO if we need to navigate more
3081  // quickly at the source level when parsing STABS
3082  symbol_ptr = symtab.SymbolAtIndex(N_SO_index);
3083  symbol_ptr->SetByteSize(sym_idx);
3084  symbol_ptr->SetSizeIsSibling(true);
3085  }
3086  N_NSYM_indexes.clear();
3087  N_INCL_indexes.clear();
3088  N_BRAC_indexes.clear();
3089  N_COMM_indexes.clear();
3090  N_FUN_indexes.clear();
3091  N_SO_index = UINT32_MAX;
3092  } else {
3093  // We use the current number of symbols in the
3094  // symbol table in lieu of using nlist_idx in case
3095  // we ever start trimming entries out
3096  const bool N_SO_has_full_path = symbol_name[0] == '/';
3097  if (N_SO_has_full_path) {
3098  if ((N_SO_index == sym_idx - 1) &&
3099  ((sym_idx - 1) < num_syms)) {
3100  // We have two consecutive N_SO entries where
3101  // the first contains a directory and the
3102  // second contains a full path.
3103  sym[sym_idx - 1].GetMangled().SetValue(
3104  ConstString(symbol_name), false);
3105  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3106  add_nlist = false;
3107  } else {
3108  // This is the first entry in a N_SO that
3109  // contains a directory or
3110  // a full path to the source file
3111  N_SO_index = sym_idx;
3112  }
3113  } else if ((N_SO_index == sym_idx - 1) &&
3114  ((sym_idx - 1) < num_syms)) {
3115  // This is usually the second N_SO entry that
3116  // contains just the filename, so here we combine
3117  // it with the first one if we are minimizing the
3118  // symbol table
3119  const char *so_path = sym[sym_idx - 1]
3120  .GetMangled()
3121  .GetDemangledName()
3122  .AsCString();
3123  if (so_path && so_path[0]) {
3124  std::string full_so_path(so_path);
3125  const size_t double_slash_pos =
3126  full_so_path.find("//");
3127  if (double_slash_pos != std::string::npos) {
3128  // The linker has been generating bad N_SO
3129  // entries with doubled up paths
3130  // in the format "%s%s" where the first
3131  // string in the DW_AT_comp_dir, and the
3132  // second is the directory for the source
3133  // file so you end up with a path that looks
3134  // like "/tmp/src//tmp/src/"
3135  FileSpec so_dir(so_path);
3136  if (!FileSystem::Instance().Exists(so_dir)) {
3137  so_dir.SetFile(
3138  &full_so_path[double_slash_pos + 1],
3139  FileSpec::Style::native);
3140  if (FileSystem::Instance().Exists(so_dir)) {
3141  // Trim off the incorrect path
3142  full_so_path.erase(0, double_slash_pos + 1);
3143  }
3144  }
3145  }
3146  if (*full_so_path.rbegin() != '/')
3147  full_so_path += '/';
3148  full_so_path += symbol_name;
3149  sym[sym_idx - 1].GetMangled().SetValue(
3150  ConstString(full_so_path.c_str()), false);
3151  add_nlist = false;
3152  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3153  }
3154  } else {
3155  // This could be a relative path to a N_SO
3156  N_SO_index = sym_idx;
3157  }
3158  }
3159  break;
3160 
3161  case N_OSO:
3162  // object file name: name,,0,0,st_mtime
3163  type = eSymbolTypeObjectFile;
3164  break;
3165 
3166  case N_LSYM:
3167  // local sym: name,,NO_SECT,type,offset
3168  type = eSymbolTypeLocal;
3169  break;
3170 
3171  // INCL scopes
3172  case N_BINCL:
3173  // include file beginning: name,,NO_SECT,0,sum We use
3174  // the current number of symbols in the symbol table
3175  // in lieu of using nlist_idx in case we ever start
3176  // trimming entries out
3177  N_INCL_indexes.push_back(sym_idx);
3178  type = eSymbolTypeScopeBegin;
3179  break;
3180 
3181  case N_EINCL:
3182  // include file end: name,,NO_SECT,0,0
3183  // Set the size of the N_BINCL to the terminating
3184  // index of this N_EINCL so that we can always skip
3185  // the entire symbol if we need to navigate more
3186  // quickly at the source level when parsing STABS
3187  if (!N_INCL_indexes.empty()) {
3188  symbol_ptr =
3189  symtab.SymbolAtIndex(N_INCL_indexes.back());
3190  symbol_ptr->SetByteSize(sym_idx + 1);
3191  symbol_ptr->SetSizeIsSibling(true);
3192  N_INCL_indexes.pop_back();
3193  }
3194  type = eSymbolTypeScopeEnd;
3195  break;
3196 
3197  case N_SOL:
3198  // #included file name: name,,n_sect,0,address
3199  type = eSymbolTypeHeaderFile;
3200 
3201  // We currently don't use the header files on darwin
3202  add_nlist = false;
3203  break;
3204 
3205  case N_PARAMS:
3206  // compiler parameters: name,,NO_SECT,0,0
3207  type = eSymbolTypeCompiler;
3208  break;
3209 
3210  case N_VERSION:
3211  // compiler version: name,,NO_SECT,0,0
3212  type = eSymbolTypeCompiler;
3213  break;
3214 
3215  case N_OLEVEL:
3216  // compiler -O level: name,,NO_SECT,0,0
3217  type = eSymbolTypeCompiler;
3218  break;
3219 
3220  case N_PSYM:
3221  // parameter: name,,NO_SECT,type,offset
3222  type = eSymbolTypeVariable;
3223  break;
3224 
3225  case N_ENTRY:
3226  // alternate entry: name,,n_sect,linenumber,address
3227  symbol_section = section_info.GetSection(nlist.n_sect,
3228  nlist.n_value);
3229  type = eSymbolTypeLineEntry;
3230  break;
3231 
3232  // Left and Right Braces
3233  case N_LBRAC:
3234  // left bracket: 0,,NO_SECT,nesting level,address We
3235  // use the current number of symbols in the symbol
3236  // table in lieu of using nlist_idx in case we ever
3237  // start trimming entries out
3238  symbol_section = section_info.GetSection(nlist.n_sect,
3239  nlist.n_value);
3240  N_BRAC_indexes.push_back(sym_idx);
3241  type = eSymbolTypeScopeBegin;
3242  break;
3243 
3244  case N_RBRAC:
3245  // right bracket: 0,,NO_SECT,nesting level,address
3246  // Set the size of the N_LBRAC to the terminating
3247  // index of this N_RBRAC so that we can always skip
3248  // the entire symbol if we need to navigate more
3249  // quickly at the source level when parsing STABS
3250  symbol_section = section_info.GetSection(nlist.n_sect,
3251  nlist.n_value);
3252  if (!N_BRAC_indexes.empty()) {
3253  symbol_ptr =
3254  symtab.SymbolAtIndex(N_BRAC_indexes.back());
3255  symbol_ptr->SetByteSize(sym_idx + 1);
3256  symbol_ptr->SetSizeIsSibling(true);
3257  N_BRAC_indexes.pop_back();
3258  }
3259  type = eSymbolTypeScopeEnd;
3260  break;
3261 
3262  case N_EXCL:
3263  // deleted include file: name,,NO_SECT,0,sum
3264  type = eSymbolTypeHeaderFile;
3265  break;
3266 
3267  // COMM scopes
3268  case N_BCOMM:
3269  // begin common: name,,NO_SECT,0,0
3270  // We use the current number of symbols in the symbol
3271  // table in lieu of using nlist_idx in case we ever
3272  // start trimming entries out
3273  type = eSymbolTypeScopeBegin;
3274  N_COMM_indexes.push_back(sym_idx);
3275  break;
3276 
3277  case N_ECOML:
3278  // end common (local name): 0,,n_sect,0,address
3279  symbol_section = section_info.GetSection(nlist.n_sect,
3280  nlist.n_value);
3281  // Fall through
3282 
3283  case N_ECOMM:
3284  // end common: name,,n_sect,0,0
3285  // Set the size of the N_BCOMM to the terminating
3286  // index of this N_ECOMM/N_ECOML so that we can
3287  // always skip the entire symbol if we need to
3288  // navigate more quickly at the source level when
3289  // parsing STABS
3290  if (!N_COMM_indexes.empty()) {
3291  symbol_ptr =
3292  symtab.SymbolAtIndex(N_COMM_indexes.back());
3293  symbol_ptr->SetByteSize(sym_idx + 1);
3294  symbol_ptr->SetSizeIsSibling(true);
3295  N_COMM_indexes.pop_back();
3296  }
3297  type = eSymbolTypeScopeEnd;
3298  break;
3299 
3300  case N_LENG:
3301  // second stab entry with length information
3302  type = eSymbolTypeAdditional;
3303  break;
3304 
3305  default:
3306  break;
3307  }
3308  } else {
3309  // uint8_t n_pext = N_PEXT & nlist.n_type;
3310  uint8_t n_type = N_TYPE & nlist.n_type;
3311  sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
3312 
3313  switch (n_type) {
3314  case N_INDR: {
3315  const char *reexport_name_cstr =
3316  strtab_data.PeekCStr(nlist.n_value);
3317  if (reexport_name_cstr && reexport_name_cstr[0]) {
3318  type = eSymbolTypeReExported;
3319  ConstString reexport_name(
3320  reexport_name_cstr +
3321  ((reexport_name_cstr[0] == '_') ? 1 : 0));
3322  sym[sym_idx].SetReExportedSymbolName(reexport_name);
3323  set_value = false;
3324  reexport_shlib_needs_fixup[sym_idx] = reexport_name;
3325  indirect_symbol_names.insert(ConstString(
3326  symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
3327  } else
3328  type = eSymbolTypeUndefined;
3329  } break;
3330 
3331  case N_UNDF:
3332  if (symbol_name && symbol_name[0]) {
3333  ConstString undefined_name(
3334  symbol_name + ((symbol_name[0] == '_') ? 1 : 0));
3335  undefined_name_to_desc[undefined_name] = nlist.n_desc;
3336  }
3337  // Fall through
3338  case N_PBUD:
3339  type = eSymbolTypeUndefined;
3340  break;
3341 
3342  case N_ABS:
3343  type = eSymbolTypeAbsolute;
3344  break;
3345 
3346  case N_SECT: {
3347  symbol_section = section_info.GetSection(nlist.n_sect,
3348  nlist.n_value);
3349 
3350  if (symbol_section == NULL) {
3351  // TODO: warn about this?
3352  add_nlist = false;
3353  break;
3354  }
3355 
3356  if (TEXT_eh_frame_sectID == nlist.n_sect) {
3357  type = eSymbolTypeException;
3358  } else {
3359  uint32_t section_type =
3360  symbol_section->Get() & SECTION_TYPE;
3361 
3362  switch (section_type) {
3363  case S_CSTRING_LITERALS:
3364  type = eSymbolTypeData;
3365  break; // section with only literal C strings
3366  case S_4BYTE_LITERALS:
3367  type = eSymbolTypeData;
3368  break; // section with only 4 byte literals
3369  case S_8BYTE_LITERALS:
3370  type = eSymbolTypeData;
3371  break; // section with only 8 byte literals
3372  case S_LITERAL_POINTERS:
3373  type = eSymbolTypeTrampoline;
3374  break; // section with only pointers to literals
3375  case S_NON_LAZY_SYMBOL_POINTERS:
3376  type = eSymbolTypeTrampoline;
3377  break; // section with only non-lazy symbol
3378  // pointers
3379  case S_LAZY_SYMBOL_POINTERS:
3380  type = eSymbolTypeTrampoline;
3381  break; // section with only lazy symbol pointers
3382  case S_SYMBOL_STUBS:
3383  type = eSymbolTypeTrampoline;
3384  break; // section with only symbol stubs, byte
3385  // size of stub in the reserved2 field
3386  case S_MOD_INIT_FUNC_POINTERS:
3387  type = eSymbolTypeCode;
3388  break; // section with only function pointers for
3389  // initialization
3390  case S_MOD_TERM_FUNC_POINTERS:
3391  type = eSymbolTypeCode;
3392  break; // section with only function pointers for
3393  // termination
3394  case S_INTERPOSING:
3395  type = eSymbolTypeTrampoline;
3396  break; // section with only pairs of function
3397  // pointers for interposing
3398  case S_16BYTE_LITERALS:
3399  type = eSymbolTypeData;
3400  break; // section with only 16 byte literals
3401  case S_DTRACE_DOF:
3403  break;
3404  case S_LAZY_DYLIB_SYMBOL_POINTERS:
3405  type = eSymbolTypeTrampoline;
3406  break;
3407  default:
3408  switch (symbol_section->GetType()) {
3410  type = eSymbolTypeCode;
3411  break;
3412  case eSectionTypeData:
3413  case eSectionTypeDataCString: // Inlined C string
3414  // data
3415  case eSectionTypeDataCStringPointers: // Pointers
3416  // to C
3417  // string
3418  // data
3419  case eSectionTypeDataSymbolAddress: // Address of
3420  // a symbol in
3421  // the symbol
3422  // table
3423  case eSectionTypeData4:
3424  case eSectionTypeData8:
3425  case eSectionTypeData16:
3426  type = eSymbolTypeData;
3427  break;
3428  default:
3429  break;
3430  }
3431  break;
3432  }
3433 
3434  if (type == eSymbolTypeInvalid) {
3435  const char *symbol_sect_name =
3436  symbol_section->GetName().AsCString();
3437  if (symbol_section->IsDescendant(
3438  text_section_sp.get())) {
3439  if (symbol_section->IsClear(
3440  S_ATTR_PURE_INSTRUCTIONS |
3441  S_ATTR_SELF_MODIFYING_CODE |
3442  S_ATTR_SOME_INSTRUCTIONS))
3443  type = eSymbolTypeData;
3444  else
3445  type = eSymbolTypeCode;
3446  } else if (symbol_section->IsDescendant(
3447  data_section_sp.get()) ||
3448  symbol_section->IsDescendant(
3449  data_dirty_section_sp.get()) ||
3450  symbol_section->IsDescendant(
3451  data_const_section_sp.get())) {
3452  if (symbol_sect_name &&
3453  ::strstr(symbol_sect_name, "__objc") ==
3454  symbol_sect_name) {
3455  type = eSymbolTypeRuntime;
3456 
3457  if (symbol_name) {
3458  llvm::StringRef symbol_name_ref(symbol_name);
3459  if (symbol_name_ref.startswith("_OBJC_")) {
3460  llvm::StringRef
3461  g_objc_v2_prefix_class(
3462  "_OBJC_CLASS_$_");
3463  llvm::StringRef
3464  g_objc_v2_prefix_metaclass(
3465  "_OBJC_METACLASS_$_");
3466  llvm::StringRef
3467  g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
3468  if (symbol_name_ref.startswith(
3469  g_objc_v2_prefix_class)) {
3470  symbol_name_non_abi_mangled =
3471  symbol_name + 1;
3472  symbol_name =
3473  symbol_name +
3474  g_objc_v2_prefix_class.size();
3475  type = eSymbolTypeObjCClass;
3476  demangled_is_synthesized = true;
3477  } else if (
3478  symbol_name_ref.startswith(
3479  g_objc_v2_prefix_metaclass)) {
3480  symbol_name_non_abi_mangled =
3481  symbol_name + 1;
3482  symbol_name =
3483  symbol_name +
3484  g_objc_v2_prefix_metaclass.size();
3485  type = eSymbolTypeObjCMetaClass;
3486  demangled_is_synthesized = true;
3487  } else if (symbol_name_ref.startswith(
3488  g_objc_v2_prefix_ivar)) {
3489  symbol_name_non_abi_mangled =
3490  symbol_name + 1;
3491  symbol_name =
3492  symbol_name +
3493  g_objc_v2_prefix_ivar.size();
3494  type = eSymbolTypeObjCIVar;
3495  demangled_is_synthesized = true;
3496  }
3497  }
3498  }
3499  } else if (symbol_sect_name &&
3500  ::strstr(symbol_sect_name,
3501  "__gcc_except_tab") ==
3502  symbol_sect_name) {
3503  type = eSymbolTypeException;
3504  } else {
3505  type = eSymbolTypeData;
3506  }
3507  } else if (symbol_sect_name &&
3508  ::strstr(symbol_sect_name, "__IMPORT") ==
3509  symbol_sect_name) {
3510  type = eSymbolTypeTrampoline;
3511  } else if (symbol_section->IsDescendant(
3512  objc_section_sp.get())) {
3513  type = eSymbolTypeRuntime;
3514  if (symbol_name && symbol_name[0] == '.') {
3515  llvm::StringRef symbol_name_ref(symbol_name);
3516  llvm::StringRef
3517  g_objc_v1_prefix_class(".objc_class_name_");
3518  if (symbol_name_ref.startswith(
3519  g_objc_v1_prefix_class)) {
3520  symbol_name_non_abi_mangled = symbol_name;
3521  symbol_name = symbol_name +
3522  g_objc_v1_prefix_class.size();
3523  type = eSymbolTypeObjCClass;
3524  demangled_is_synthesized = true;
3525  }
3526  }
3527  }
3528  }
3529  }
3530  } break;
3531  }
3532  }
3533 
3534  if (add_nlist) {
3535  uint64_t symbol_value = nlist.n_value;
3536  if (symbol_name_non_abi_mangled) {
3537  sym[sym_idx].GetMangled().SetMangledName(
3538  ConstString(symbol_name_non_abi_mangled));
3539  sym[sym_idx].GetMangled().SetDemangledName(
3540  ConstString(symbol_name));
3541  } else {
3542  bool symbol_name_is_mangled = false;
3543 
3544  if (symbol_name && symbol_name[0] == '_') {
3545  symbol_name_is_mangled = symbol_name[1] == '_';
3546  symbol_name++; // Skip the leading underscore
3547  }
3548 
3549  if (symbol_name) {
3550  ConstString const_symbol_name(symbol_name);
3551  sym[sym_idx].GetMangled().SetValue(
3552  const_symbol_name, symbol_name_is_mangled);
3553  if (is_gsym && is_debug) {
3554  const char *gsym_name =
3555  sym[sym_idx]
3556  .GetMangled()
3557  .GetName(Mangled::ePreferMangled)
3558  .GetCString();
3559  if (gsym_name)
3560  N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
3561  }
3562  }
3563  }
3564  if (symbol_section) {
3565  const addr_t section_file_addr =
3566  symbol_section->GetFileAddress();
3567  if (symbol_byte_size == 0 &&
3568  function_starts_count > 0) {
3569  addr_t symbol_lookup_file_addr = nlist.n_value;
3570  // Do an exact address match for non-ARM addresses,
3571  // else get the closest since the symbol might be a
3572  // thumb symbol which has an address with bit zero
3573  // set
3574  FunctionStarts::Entry *func_start_entry =
3575  function_starts.FindEntry(symbol_lookup_file_addr,
3576  !is_arm);
3577  if (is_arm && func_start_entry) {
3578  // Verify that the function start address is the
3579  // symbol address (ARM) or the symbol address + 1
3580  // (thumb)
3581  if (func_start_entry->addr !=
3582  symbol_lookup_file_addr &&
3583  func_start_entry->addr !=
3584  (symbol_lookup_file_addr + 1)) {
3585  // Not the right entry, NULL it out...
3586  func_start_entry = NULL;
3587  }
3588  }
3589  if (func_start_entry) {
3590  func_start_entry->data = true;
3591 
3592  addr_t symbol_file_addr = func_start_entry->addr;
3593  uint32_t symbol_flags = 0;
3594  if (is_arm) {
3595  if (symbol_file_addr & 1)
3596  symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
3597  symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3598  }
3599 
3600  const FunctionStarts::Entry *next_func_start_entry =
3601  function_starts.FindNextEntry(func_start_entry);
3602  const addr_t section_end_file_addr =
3603  section_file_addr +
3604  symbol_section->GetByteSize();
3605  if (next_func_start_entry) {
3606  addr_t next_symbol_file_addr =
3607  next_func_start_entry->addr;
3608  // Be sure the clear the Thumb address bit when
3609  // we calculate the size from the current and
3610  // next address
3611  if (is_arm)
3612  next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3613  symbol_byte_size = std::min<lldb::addr_t>(
3614  next_symbol_file_addr - symbol_file_addr,
3615  section_end_file_addr - symbol_file_addr);
3616  } else {
3617  symbol_byte_size =
3618  section_end_file_addr - symbol_file_addr;
3619  }
3620  }
3621  }
3622  symbol_value -= section_file_addr;
3623  }
3624 
3625  if (is_debug == false) {
3626  if (type == eSymbolTypeCode) {
3627  // See if we can find a N_FUN entry for any code
3628  // symbols. If we do find a match, and the name
3629  // matches, then we can merge the two into just the
3630  // function symbol to avoid duplicate entries in
3631  // the symbol table
3632  auto range =
3633  N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
3634  if (range.first != range.second) {
3635  bool found_it = false;
3636  for (auto pos = range.first; pos != range.second;
3637  ++pos) {
3638  if (sym[sym_idx].GetMangled().GetName(
3639  Mangled::ePreferMangled) ==
3640  sym[pos->second].GetMangled().GetName(
3641  Mangled::ePreferMangled)) {
3642  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3643  // We just need the flags from the linker
3644  // symbol, so put these flags
3645  // into the N_FUN flags to avoid duplicate
3646  // symbols in the symbol table
3647  sym[pos->second].SetExternal(
3648  sym[sym_idx].IsExternal());
3649  sym[pos->second].SetFlags(nlist.n_type << 16 |
3650  nlist.n_desc);
3651  if (resolver_addresses.find(nlist.n_value) !=
3652  resolver_addresses.end())
3653  sym[pos->second].SetType(eSymbolTypeResolver);
3654  sym[sym_idx].Clear();
3655  found_it = true;
3656  break;
3657  }
3658  }
3659  if (found_it)
3660  continue;
3661  } else {
3662  if (resolver_addresses.find(nlist.n_value) !=
3663  resolver_addresses.end())
3664  type = eSymbolTypeResolver;
3665  }
3666  } else if (type == eSymbolTypeData ||
3667  type == eSymbolTypeObjCClass ||
3668  type == eSymbolTypeObjCMetaClass ||
3669  type == eSymbolTypeObjCIVar) {
3670  // See if we can find a N_STSYM entry for any data
3671  // symbols. If we do find a match, and the name
3672  // matches, then we can merge the two into just the
3673  // Static symbol to avoid duplicate entries in the
3674  // symbol table
3675  auto range = N_STSYM_addr_to_sym_idx.equal_range(
3676  nlist.n_value);
3677  if (range.first != range.second) {
3678  bool found_it = false;
3679  for (auto pos = range.first; pos != range.second;
3680  ++pos) {
3681  if (sym[sym_idx].GetMangled().GetName(
3682  Mangled::ePreferMangled) ==
3683  sym[pos->second].GetMangled().GetName(
3684  Mangled::ePreferMangled)) {
3685  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3686  // We just need the flags from the linker
3687  // symbol, so put these flags
3688  // into the N_STSYM flags to avoid duplicate
3689  // symbols in the symbol table
3690  sym[pos->second].SetExternal(
3691  sym[sym_idx].IsExternal());
3692  sym[pos->second].SetFlags(nlist.n_type << 16 |
3693  nlist.n_desc);
3694  sym[sym_idx].Clear();
3695  found_it = true;
3696  break;
3697  }
3698  }
3699  if (found_it)
3700  continue;
3701  } else {
3702  const char *gsym_name =
3703  sym[sym_idx]
3704  .GetMangled()
3705  .GetName(Mangled::ePreferMangled)
3706  .GetCString();
3707  if (gsym_name) {
3708  // Combine N_GSYM stab entries with the non
3709  // stab symbol
3710  ConstNameToSymbolIndexMap::const_iterator pos =
3711  N_GSYM_name_to_sym_idx.find(gsym_name);
3712  if (pos != N_GSYM_name_to_sym_idx.end()) {
3713  const uint32_t GSYM_sym_idx = pos->second;
3714  m_nlist_idx_to_sym_idx[nlist_idx] =
3715  GSYM_sym_idx;
3716  // Copy the address, because often the N_GSYM
3717  // address has an invalid address of zero
3718  // when the global is a common symbol
3719  sym[GSYM_sym_idx].GetAddressRef().SetSection(
3720  symbol_section);
3721  sym[GSYM_sym_idx].GetAddressRef().SetOffset(
3722  symbol_value);
3723  add_symbol_addr(sym[GSYM_sym_idx]
3724  .GetAddress()
3725  .GetFileAddress());
3726  // We just need the flags from the linker
3727  // symbol, so put these flags
3728  // into the N_GSYM flags to avoid duplicate
3729  // symbols in the symbol table
3730  sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 |
3731  nlist.n_desc);
3732  sym[sym_idx].Clear();
3733  continue;
3734  }
3735  }
3736  }
3737  }
3738  }
3739 
3740  sym[sym_idx].SetID(nlist_idx);
3741  sym[sym_idx].SetType(type);
3742  if (set_value) {
3743  sym[sym_idx].GetAddressRef().SetSection(symbol_section);
3744  sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
3745  add_symbol_addr(
3746  sym[sym_idx].GetAddress().GetFileAddress());
3747  }
3748  sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
3749 
3750  if (symbol_byte_size > 0)
3751  sym[sym_idx].SetByteSize(symbol_byte_size);
3752 
3753  if (demangled_is_synthesized)
3754  sym[sym_idx].SetDemangledNameIsSynthesized(true);
3755  ++sym_idx;
3756  } else {
3757  sym[sym_idx].Clear();
3758  }
3759  }
3760  /////////////////////////////
3761  }
3762  break; // No more entries to consider
3763  }
3764  }
3765 
3766  for (const auto &pos : reexport_shlib_needs_fixup) {
3767  const auto undef_pos = undefined_name_to_desc.find(pos.second);
3768  if (undef_pos != undefined_name_to_desc.end()) {
3769  const uint8_t dylib_ordinal =
3770  llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
3771  if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
3772  sym[pos.first].SetReExportedSymbolSharedLibrary(
3773  dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
3774  }
3775  }
3776  }
3777  }
3778  }
3779  }
3780  }
3781 
3782  // Must reset this in case it was mutated above!
3783  nlist_data_offset = 0;
3784 #endif
3785 
3786  if (nlist_data.GetByteSize() > 0) {
3787 
3788  // If the sym array was not created while parsing the DSC unmapped
3789  // symbols, create it now.
3790  if (sym == nullptr) {
3791  sym =
3792  symtab.Resize(symtab_load_command.nsyms + m_dysymtab.nindirectsyms);
3793  num_syms = symtab.GetNumSymbols();
3794  }
3795 
3796  if (unmapped_local_symbols_found) {
3797  assert(m_dysymtab.ilocalsym == 0);
3798  nlist_data_offset += (m_dysymtab.nlocalsym * nlist_byte_size);
3799  nlist_idx = m_dysymtab.nlocalsym;
3800  } else {
3801  nlist_idx = 0;
3802  }
3803 
3804  typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
3805  typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
3806  UndefinedNameToDescMap undefined_name_to_desc;
3807  SymbolIndexToName reexport_shlib_needs_fixup;
3808 
3809  // Symtab parsing is a huge mess. Everything is entangled and the code
3810  // requires access to a ridiculous amount of variables. LLDB depends
3811  // heavily on the proper merging of symbols and to get that right we need
3812  // to make sure we have parsed all the debug symbols first. Therefore we
3813  // invoke the lambda twice, once to parse only the debug symbols and then
3814  // once more to parse the remaining symbols.
3815  auto ParseSymbolLambda = [&](struct nlist_64 &nlist, uint32_t nlist_idx,
3816  bool debug_only) {
3817  const bool is_debug = ((nlist.n_type & N_STAB) != 0);
3818  if (is_debug != debug_only)
3819  return true;
3820 
3821  const char *symbol_name_non_abi_mangled = nullptr;
3822  const char *symbol_name = nullptr;
3823 
3824  if (have_strtab_data) {
3825  symbol_name = strtab_data.PeekCStr(nlist.n_strx);
3826 
3827  if (symbol_name == nullptr) {
3828  // No symbol should be NULL, even the symbols with no string values
3829  // should have an offset zero which points to an empty C-string
3830  Host::SystemLog(Host::eSystemLogError,
3831  "error: symbol[%u] has invalid string table offset "
3832  "0x%x in %s, ignoring symbol\n",
3833  nlist_idx, nlist.n_strx,
3834  module_sp->GetFileSpec().GetPath().c_str());
3835  return true;
3836  }
3837  if (symbol_name[0] == '\0')
3838  symbol_name = nullptr;
3839  } else {
3840  const addr_t str_addr = strtab_addr + nlist.n_strx;
3841  Status str_error;
3842  if (process->ReadCStringFromMemory(str_addr, memory_symbol_name,
3843  str_error))
3844  symbol_name = memory_symbol_name.c_str();
3845  }
3846 
3848  SectionSP symbol_section;
3849  lldb::addr_t symbol_byte_size = 0;
3850  bool add_nlist = true;
3851  bool is_gsym = false;
3852  bool demangled_is_synthesized = false;
3853  bool set_value = true;
3854 
3855  assert(sym_idx < num_syms);
3856  sym[sym_idx].SetDebug(is_debug);
3857 
3858  if (is_debug) {
3859  switch (nlist.n_type) {
3860  case N_GSYM:
3861  // global symbol: name,,NO_SECT,type,0
3862  // Sometimes the N_GSYM value contains the address.
3863 
3864  // FIXME: In the .o files, we have a GSYM and a debug symbol for all
3865  // the ObjC data. They
3866  // have the same address, but we want to ensure that we always find
3867  // only the real symbol, 'cause we don't currently correctly
3868  // attribute the GSYM one to the ObjCClass/Ivar/MetaClass symbol
3869  // type. This is a temporary hack to make sure the ObjectiveC
3870  // symbols get treated correctly. To do this right, we should
3871  // coalesce all the GSYM & global symbols that have the same
3872  // address.
3873  is_gsym = true;
3874  sym[sym_idx].SetExternal(true);
3875 
3876  if (symbol_name && symbol_name[0] == '_' && symbol_name[1] == 'O') {
3877  llvm::StringRef symbol_name_ref(symbol_name);
3878  if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
3879  symbol_name_non_abi_mangled = symbol_name + 1;
3880  symbol_name = symbol_name + g_objc_v2_prefix_class.size();
3881  type = eSymbolTypeObjCClass;
3882  demangled_is_synthesized = true;
3883 
3884  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_metaclass)) {
3885  symbol_name_non_abi_mangled = symbol_name + 1;
3886  symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
3887  type = eSymbolTypeObjCMetaClass;
3888  demangled_is_synthesized = true;
3889  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_ivar)) {
3890  symbol_name_non_abi_mangled = symbol_name + 1;
3891  symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
3892  type = eSymbolTypeObjCIVar;
3893  demangled_is_synthesized = true;
3894  }
3895  } else {
3896  if (nlist.n_value != 0)
3897  symbol_section =
3898  section_info.GetSection(nlist.n_sect, nlist.n_value);
3899  type = eSymbolTypeData;
3900  }
3901  break;
3902 
3903  case N_FNAME:
3904  // procedure name (f77 kludge): name,,NO_SECT,0,0
3905  type = eSymbolTypeCompiler;
3906  break;
3907 
3908  case N_FUN:
3909  // procedure: name,,n_sect,linenumber,address
3910  if (symbol_name) {
3911  type = eSymbolTypeCode;
3912  symbol_section =
3913  section_info.GetSection(nlist.n_sect, nlist.n_value);
3914 
3915  N_FUN_addr_to_sym_idx.insert(
3916  std::make_pair(nlist.n_value, sym_idx));
3917  // We use the current number of symbols in the symbol table in
3918  // lieu of using nlist_idx in case we ever start trimming entries
3919  // out
3920  N_FUN_indexes.push_back(sym_idx);
3921  } else {
3922  type = eSymbolTypeCompiler;
3923 
3924  if (!N_FUN_indexes.empty()) {
3925  // Copy the size of the function into the original STAB entry
3926  // so we don't have to hunt for it later
3927  symtab.SymbolAtIndex(N_FUN_indexes.back())
3928  ->SetByteSize(nlist.n_value);
3929  N_FUN_indexes.pop_back();
3930  // We don't really need the end function STAB as it contains
3931  // the size which we already placed with the original symbol,
3932  // so don't add it if we want a minimal symbol table
3933  add_nlist = false;
3934  }
3935  }
3936  break;
3937 
3938  case N_STSYM:
3939  // static symbol: name,,n_sect,type,address
3940  N_STSYM_addr_to_sym_idx.insert(
3941  std::make_pair(nlist.n_value, sym_idx));
3942  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3943  if (symbol_name && symbol_name[0]) {
3944  type = ObjectFile::GetSymbolTypeFromName(symbol_name + 1,
3945  eSymbolTypeData);
3946  }
3947  break;
3948 
3949  case N_LCSYM:
3950  // .lcomm symbol: name,,n_sect,type,address
3951  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3952  type = eSymbolTypeCommonBlock;
3953  break;
3954 
3955  case N_BNSYM:
3956  // We use the current number of symbols in the symbol table in lieu
3957  // of using nlist_idx in case we ever start trimming entries out
3958  // Skip these if we want minimal symbol tables
3959  add_nlist = false;
3960  break;
3961 
3962  case N_ENSYM:
3963  // Set the size of the N_BNSYM to the terminating index of this
3964  // N_ENSYM so that we can always skip the entire symbol if we need
3965  // to navigate more quickly at the source level when parsing STABS
3966  // Skip these if we want minimal symbol tables
3967  add_nlist = false;
3968  break;
3969 
3970  case N_OPT:
3971  // emitted with gcc2_compiled and in gcc source
3972  type = eSymbolTypeCompiler;
3973  break;
3974 
3975  case N_RSYM:
3976  // register sym: name,,NO_SECT,type,register
3977  type = eSymbolTypeVariable;
3978  break;
3979 
3980  case N_SLINE:
3981  // src line: 0,,n_sect,linenumber,address
3982  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3983  type = eSymbolTypeLineEntry;
3984  break;
3985 
3986  case N_SSYM:
3987  // structure elt: name,,NO_SECT,type,struct_offset
3988  type = eSymbolTypeVariableType;
3989  break;
3990 
3991  case N_SO:
3992  // source file name
3993  type = eSymbolTypeSourceFile;
3994  if (symbol_name == nullptr) {
3995  add_nlist = false;
3996  if (N_SO_index != UINT32_MAX) {
3997  // Set the size of the N_SO to the terminating index of this
3998  // N_SO so that we can always skip the entire N_SO if we need
3999  // to navigate more quickly at the source level when parsing
4000  // STABS
4001  symbol_ptr = symtab.SymbolAtIndex(N_SO_index);
4002  symbol_ptr->SetByteSize(sym_idx);
4003  symbol_ptr->SetSizeIsSibling(true);
4004  }
4005  N_NSYM_indexes.clear();
4006  N_INCL_indexes.clear();
4007  N_BRAC_indexes.clear();
4008  N_COMM_indexes.clear();
4009  N_FUN_indexes.clear();
4010  N_SO_index = UINT32_MAX;
4011  } else {
4012  // We use the current number of symbols in the symbol table in
4013  // lieu of using nlist_idx in case we ever start trimming entries
4014  // out
4015  const bool N_SO_has_full_path = symbol_name[0] == '/';
4016  if (N_SO_has_full_path) {
4017  if ((N_SO_index == sym_idx - 1) && ((sym_idx - 1) < num_syms)) {
4018  // We have two consecutive N_SO entries where the first
4019  // contains a directory and the second contains a full path.
4020  sym[sym_idx - 1].GetMangled().SetValue(ConstString(symbol_name),
4021  false);
4022  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
4023  add_nlist = false;
4024  } else {
4025  // This is the first entry in a N_SO that contains a
4026  // directory or a full path to the source file
4027  N_SO_index = sym_idx;
4028  }
4029  } else if ((N_SO_index == sym_idx - 1) &&
4030  ((sym_idx - 1) < num_syms)) {
4031  // This is usually the second N_SO entry that contains just the
4032  // filename, so here we combine it with the first one if we are
4033  // minimizing the symbol table
4034  const char *so_path =
4035  sym[sym_idx - 1].GetMangled().GetDemangledName().AsCString();
4036  if (so_path && so_path[0]) {
4037  std::string full_so_path(so_path);
4038  const size_t double_slash_pos = full_so_path.find("//");
4039  if (double_slash_pos != std::string::npos) {
4040  // The linker has been generating bad N_SO entries with
4041  // doubled up paths in the format "%s%s" where the first
4042  // string in the DW_AT_comp_dir, and the second is the
4043  // directory for the source file so you end up with a path
4044  // that looks like "/tmp/src//tmp/src/"
4045  FileSpec so_dir(so_path);
4046  if (!FileSystem::Instance().Exists(so_dir)) {
4047  so_dir.SetFile(&full_so_path[double_slash_pos + 1],
4048  FileSpec::Style::native);
4049  if (FileSystem::Instance().Exists(so_dir)) {
4050  // Trim off the incorrect path
4051  full_so_path.erase(0, double_slash_pos + 1);
4052  }
4053  }
4054  }
4055  if (*full_so_path.rbegin() != '/')
4056  full_so_path += '/';
4057  full_so_path += symbol_name;
4058  sym[sym_idx - 1].GetMangled().SetValue(
4059  ConstString(full_so_path.c_str()), false);
4060  add_nlist = false;
4061  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
4062  }
4063  } else {
4064  // This could be a relative path to a N_SO
4065  N_SO_index = sym_idx;
4066  }
4067  }
4068  break;
4069 
4070  case N_OSO:
4071  // object file name: name,,0,0,st_mtime
4072  type = eSymbolTypeObjectFile;
4073  break;
4074 
4075  case N_LSYM:
4076  // local sym: name,,NO_SECT,type,offset
4077  type = eSymbolTypeLocal;
4078  break;
4079 
4080  // INCL scopes
4081  case N_BINCL:
4082  // include file beginning: name,,NO_SECT,0,sum We use the current
4083  // number of symbols in the symbol table in lieu of using nlist_idx
4084  // in case we ever start trimming entries out
4085  N_INCL_indexes.push_back(sym_idx);
4086  type = eSymbolTypeScopeBegin;
4087  break;
4088 
4089  case N_EINCL:
4090  // include file end: name,,NO_SECT,0,0
4091  // Set the size of the N_BINCL to the terminating index of this
4092  // N_EINCL so that we can always skip the entire symbol if we need
4093  // to navigate more quickly at the source level when parsing STABS
4094  if (!N_INCL_indexes.empty()) {
4095  symbol_ptr = symtab.SymbolAtIndex(N_INCL_indexes.back());
4096  symbol_ptr->SetByteSize(sym_idx + 1);
4097  symbol_ptr->SetSizeIsSibling(true);
4098  N_INCL_indexes.pop_back();
4099  }
4100  type = eSymbolTypeScopeEnd;
4101  break;
4102 
4103  case N_SOL:
4104  // #included file name: name,,n_sect,0,address
4105  type = eSymbolTypeHeaderFile;
4106 
4107  // We currently don't use the header files on darwin
4108  add_nlist = false;
4109  break;
4110 
4111  case N_PARAMS:
4112  // compiler parameters: name,,NO_SECT,0,0
4113  type = eSymbolTypeCompiler;
4114  break;
4115 
4116  case N_VERSION:
4117  // compiler version: name,,NO_SECT,0,0
4118  type = eSymbolTypeCompiler;
4119  break;
4120 
4121  case N_OLEVEL:
4122  // compiler -O level: name,,NO_SECT,0,0
4123  type = eSymbolTypeCompiler;
4124  break;
4125 
4126  case N_PSYM:
4127  // parameter: name,,NO_SECT,type,offset
4128  type = eSymbolTypeVariable;
4129  break;
4130 
4131  case N_ENTRY:
4132  // alternate entry: name,,n_sect,linenumber,address
4133  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4134  type = eSymbolTypeLineEntry;
4135  break;
4136 
4137  // Left and Right Braces
4138  case N_LBRAC:
4139  // left bracket: 0,,NO_SECT,nesting level,address We use the
4140  // current number of symbols in the symbol table in lieu of using
4141  // nlist_idx in case we ever start trimming entries out
4142  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4143  N_BRAC_indexes.push_back(sym_idx);
4144  type = eSymbolTypeScopeBegin;
4145  break;
4146 
4147  case N_RBRAC:
4148  // right bracket: 0,,NO_SECT,nesting level,address Set the size of
4149  // the N_LBRAC to the terminating index of this N_RBRAC so that we
4150  // can always skip the entire symbol if we need to navigate more
4151  // quickly at the source level when parsing STABS
4152  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4153  if (!N_BRAC_indexes.empty()) {
4154  symbol_ptr = symtab.SymbolAtIndex(N_BRAC_indexes.back());
4155  symbol_ptr->SetByteSize(sym_idx + 1);
4156  symbol_ptr->SetSizeIsSibling(true);
4157  N_BRAC_indexes.pop_back();
4158  }
4159  type = eSymbolTypeScopeEnd;
4160  break;
4161 
4162  case N_EXCL:
4163  // deleted include file: name,,NO_SECT,0,sum
4164  type = eSymbolTypeHeaderFile;
4165  break;
4166 
4167  // COMM scopes
4168  case N_BCOMM:
4169  // begin common: name,,NO_SECT,0,0
4170  // We use the current number of symbols in the symbol table in lieu
4171  // of using nlist_idx in case we ever start trimming entries out
4172  type = eSymbolTypeScopeBegin;
4173  N_COMM_indexes.push_back(sym_idx);
4174  break;
4175 
4176  case N_ECOML:
4177  // end common (local name): 0,,n_sect,0,address
4178  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4179  LLVM_FALLTHROUGH;
4180 
4181  case N_ECOMM:
4182  // end common: name,,n_sect,0,0
4183  // Set the size of the N_BCOMM to the terminating index of this
4184  // N_ECOMM/N_ECOML so that we can always skip the entire symbol if
4185  // we need to navigate more quickly at the source level when
4186  // parsing STABS
4187  if (!N_COMM_indexes.empty()) {
4188  symbol_ptr = symtab.SymbolAtIndex(N_COMM_indexes.back());
4189  symbol_ptr->SetByteSize(sym_idx + 1);
4190  symbol_ptr->SetSizeIsSibling(true);
4191  N_COMM_indexes.pop_back();
4192  }
4193  type = eSymbolTypeScopeEnd;
4194  break;
4195 
4196  case N_LENG:
4197  // second stab entry with length information
4198  type = eSymbolTypeAdditional;
4199  break;
4200 
4201  default:
4202  break;
4203  }
4204  } else {
4205  uint8_t n_type = N_TYPE & nlist.n_type;
4206  sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
4207 
4208  switch (n_type) {
4209  case N_INDR: {
4210  const char *reexport_name_cstr = strtab_data.PeekCStr(nlist.n_value);
4211  if (reexport_name_cstr && reexport_name_cstr[0]) {
4212  type = eSymbolTypeReExported;
4213  ConstString reexport_name(reexport_name_cstr +
4214  ((reexport_name_cstr[0] == '_') ? 1 : 0));
4215  sym[sym_idx].SetReExportedSymbolName(reexport_name);
4216  set_value = false;
4217  reexport_shlib_needs_fixup[sym_idx] = reexport_name;
4218  indirect_symbol_names.insert(
4219  ConstString(symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
4220  } else
4221  type = eSymbolTypeUndefined;
4222  } break;
4223 
4224  case N_UNDF:
4225  if (symbol_name && symbol_name[0]) {
4226  ConstString undefined_name(symbol_name +
4227  ((symbol_name[0] == '_') ? 1 : 0));
4228  undefined_name_to_desc[undefined_name] = nlist.n_desc;
4229  }
4230  LLVM_FALLTHROUGH;
4231 
4232  case N_PBUD:
4233  type = eSymbolTypeUndefined;
4234  break;
4235 
4236  case N_ABS:
4237  type = eSymbolTypeAbsolute;
4238  break;
4239 
4240  case N_SECT: {
4241  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4242 
4243  if (!symbol_section) {
4244  // TODO: warn about this?
4245  add_nlist = false;
4246  break;
4247  }
4248 
4249  if (TEXT_eh_frame_sectID == nlist.n_sect) {
4250  type = eSymbolTypeException;
4251  } else {
4252  uint32_t section_type = symbol_section->Get() & SECTION_TYPE;
4253 
4254  switch (section_type) {
4255  case S_CSTRING_LITERALS:
4256  type = eSymbolTypeData;
4257  break; // section with only literal C strings
4258  case S_4BYTE_LITERALS:
4259  type = eSymbolTypeData;
4260  break; // section with only 4 byte literals
4261  case S_8BYTE_LITERALS:
4262  type = eSymbolTypeData;
4263  break; // section with only 8 byte literals
4264  case S_LITERAL_POINTERS:
4265  type = eSymbolTypeTrampoline;
4266  break; // section with only pointers to literals
4267  case S_NON_LAZY_SYMBOL_POINTERS:
4268  type = eSymbolTypeTrampoline;
4269  break; // section with only non-lazy symbol pointers
4270  case S_LAZY_SYMBOL_POINTERS:
4271  type = eSymbolTypeTrampoline;
4272  break; // section with only lazy symbol pointers
4273  case S_SYMBOL_STUBS:
4274  type = eSymbolTypeTrampoline;
4275  break; // section with only symbol stubs, byte size of stub in
4276  // the reserved2 field
4277  case S_MOD_INIT_FUNC_POINTERS:
4278  type = eSymbolTypeCode;
4279  break; // section with only function pointers for initialization
4280  case S_MOD_TERM_FUNC_POINTERS:
4281  type = eSymbolTypeCode;
4282  break; // section with only function pointers for termination
4283  case S_INTERPOSING:
4284  type = eSymbolTypeTrampoline;
4285  break; // section with only pairs of function pointers for
4286  // interposing
4287  case S_16BYTE_LITERALS:
4288  type = eSymbolTypeData;
4289  break; // section with only 16 byte literals
4290  case S_DTRACE_DOF:
4292  break;
4293  case S_LAZY_DYLIB_SYMBOL_POINTERS:
4294  type = eSymbolTypeTrampoline;
4295  break;
4296  default:
4297  switch (symbol_section->GetType()) {
4299  type = eSymbolTypeCode;
4300  break;
4301  case eSectionTypeData:
4302  case eSectionTypeDataCString: // Inlined C string data
4303  case eSectionTypeDataCStringPointers: // Pointers to C string
4304  // data
4305  case eSectionTypeDataSymbolAddress: // Address of a symbol in
4306  // the symbol table
4307  case eSectionTypeData4:
4308  case eSectionTypeData8:
4309  case eSectionTypeData16:
4310  type = eSymbolTypeData;
4311  break;
4312  default:
4313  break;
4314  }
4315  break;
4316  }
4317 
4318  if (type == eSymbolTypeInvalid) {
4319  const char *symbol_sect_name =
4320  symbol_section->GetName().AsCString();
4321  if (symbol_section->IsDescendant(text_section_sp.get())) {
4322  if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS |
4323  S_ATTR_SELF_MODIFYING_CODE |
4324  S_ATTR_SOME_INSTRUCTIONS))
4325  type = eSymbolTypeData;
4326  else
4327  type = eSymbolTypeCode;
4328  } else if (symbol_section->IsDescendant(data_section_sp.get()) ||
4329  symbol_section->IsDescendant(
4330  data_dirty_section_sp.get()) ||
4331  symbol_section->IsDescendant(
4332  data_const_section_sp.get())) {
4333  if (symbol_sect_name &&
4334  ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) {
4335  type = eSymbolTypeRuntime;
4336 
4337  if (symbol_name) {
4338  llvm::StringRef symbol_name_ref(symbol_name);
4339  if (symbol_name_ref.startswith("_OBJC_")) {
4340  llvm::StringRef g_objc_v2_prefix_class(
4341  "_OBJC_CLASS_$_");
4342  llvm::StringRef g_objc_v2_prefix_metaclass(
4343  "_OBJC_METACLASS_$_");
4344  llvm::StringRef g_objc_v2_prefix_ivar(
4345  "_OBJC_IVAR_$_");
4346  if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
4347  symbol_name_non_abi_mangled = symbol_name + 1;
4348  symbol_name =
4349  symbol_name + g_objc_v2_prefix_class.size();
4350  type = eSymbolTypeObjCClass;
4351  demangled_is_synthesized = true;
4352  } else if (symbol_name_ref.startswith(
4353  g_objc_v2_prefix_metaclass)) {
4354  symbol_name_non_abi_mangled = symbol_name + 1;
4355  symbol_name =
4356  symbol_name + g_objc_v2_prefix_metaclass.size();
4357  type = eSymbolTypeObjCMetaClass;
4358  demangled_is_synthesized = true;
4359  } else if (symbol_name_ref.startswith(
4360  g_objc_v2_prefix_ivar)) {
4361  symbol_name_non_abi_mangled = symbol_name + 1;
4362  symbol_name =
4363  symbol_name + g_objc_v2_prefix_ivar.size();
4364  type = eSymbolTypeObjCIVar;
4365  demangled_is_synthesized = true;
4366  }
4367  }
4368  }
4369  } else if (symbol_sect_name &&
4370  ::strstr(symbol_sect_name, "__gcc_except_tab") ==
4371  symbol_sect_name) {
4372  type = eSymbolTypeException;
4373  } else {
4374  type = eSymbolTypeData;
4375  }
4376  } else if (symbol_sect_name &&
4377  ::strstr(symbol_sect_name, "__IMPORT") ==
4378  symbol_sect_name) {
4379  type = eSymbolTypeTrampoline;
4380  } else if (symbol_section->IsDescendant(objc_section_sp.get())) {
4381  type = eSymbolTypeRuntime;
4382  if (symbol_name && symbol_name[0] == '.') {
4383  llvm::StringRef symbol_name_ref(symbol_name);
4384  llvm::StringRef g_objc_v1_prefix_class(
4385  ".objc_class_name_");
4386  if (symbol_name_ref.startswith(g_objc_v1_prefix_class)) {
4387  symbol_name_non_abi_mangled = symbol_name;
4388  symbol_name = symbol_name + g_objc_v1_prefix_class.size();
4389  type = eSymbolTypeObjCClass;
4390  demangled_is_synthesized = true;
4391  }
4392  }
4393  }
4394  }
4395  }
4396  } break;
4397  }
4398  }
4399 
4400  if (!add_nlist) {
4401  sym[sym_idx].Clear();
4402  return true;
4403  }
4404 
4405  uint64_t symbol_value = nlist.n_value;
4406 
4407  if (symbol_name_non_abi_mangled) {
4408  sym[sym_idx].GetMangled().SetMangledName(
4409  ConstString(symbol_name_non_abi_mangled));
4410  sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name));
4411  } else {
4412  bool symbol_name_is_mangled = false;
4413 
4414  if (symbol_name && symbol_name[0] == '_') {
4415  symbol_name_is_mangled = symbol_name[1] == '_';
4416  symbol_name++; // Skip the leading underscore
4417  }
4418 
4419  if (symbol_name) {
4420  ConstString const_symbol_name(symbol_name);
4421  sym[sym_idx].GetMangled().SetValue(const_symbol_name,
4422  symbol_name_is_mangled);
4423  }
4424  }
4425 
4426  if (is_gsym) {
4427  const char *gsym_name = sym[sym_idx]
4428  .GetMangled()
4429  .GetName(Mangled::ePreferMangled)
4430  .GetCString();
4431  if (gsym_name)
4432  N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
4433  }
4434 
4435  if (symbol_section) {
4436  const addr_t section_file_addr = symbol_section->GetFileAddress();
4437  if (symbol_byte_size == 0 && function_starts_count > 0) {
4438  addr_t symbol_lookup_file_addr = nlist.n_value;
4439  // Do an exact address match for non-ARM addresses, else get the
4440  // closest since the symbol might be a thumb symbol which has an
4441  // address with bit zero set.
4442  FunctionStarts::Entry *func_start_entry =
4443  function_starts.FindEntry(symbol_lookup_file_addr, !is_arm);
4444  if (is_arm && func_start_entry) {
4445  // Verify that the function start address is the symbol address
4446  // (ARM) or the symbol address + 1 (thumb).
4447  if (func_start_entry->addr != symbol_lookup_file_addr &&
4448  func_start_entry->addr != (symbol_lookup_file_addr + 1)) {
4449  // Not the right entry, NULL it out...
4450  func_start_entry = nullptr;
4451  }
4452  }
4453  if (func_start_entry) {
4454  func_start_entry->data = true;
4455 
4456  addr_t symbol_file_addr = func_start_entry->addr;
4457  if (is_arm)
4458  symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4459 
4460  const FunctionStarts::Entry *next_func_start_entry =
4461  function_starts.FindNextEntry(func_start_entry);
4462  const addr_t section_end_file_addr =
4463  section_file_addr + symbol_section->GetByteSize();
4464  if (next_func_start_entry) {
4465  addr_t next_symbol_file_addr = next_func_start_entry->addr;
4466  // Be sure the clear the Thumb address bit when we calculate the
4467  // size from the current and next address
4468  if (is_arm)
4469  next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4470  symbol_byte_size = std::min<lldb::addr_t>(
4471  next_symbol_file_addr - symbol_file_addr,
4472  section_end_file_addr - symbol_file_addr);
4473  } else {
4474  symbol_byte_size = section_end_file_addr - symbol_file_addr;
4475  }
4476  }
4477  }
4478  symbol_value -= section_file_addr;
4479  }
4480 
4481  if (!is_debug) {
4482  if (type == eSymbolTypeCode) {
4483  // See if we can find a N_FUN entry for any code symbols. If we do
4484  // find a match, and the name matches, then we can merge the two into
4485  // just the function symbol to avoid duplicate entries in the symbol
4486  // table.
4487  std::pair<ValueToSymbolIndexMap::const_iterator,
4488  ValueToSymbolIndexMap::const_iterator>
4489  range;
4490  range = N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
4491  if (range.first != range.second) {
4492  for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4493  pos != range.second; ++pos) {
4494  if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) ==
4495  sym[pos->second].GetMangled().GetName(
4496  Mangled::ePreferMangled)) {
4497  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4498  // We just need the flags from the linker symbol, so put these
4499  // flags into the N_FUN flags to avoid duplicate symbols in the
4500  // symbol table.
4501  sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4502  sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4503  if (resolver_addresses.find(nlist.n_value) !=
4504  resolver_addresses.end())
4505  sym[pos->second].SetType(eSymbolTypeResolver);
4506  sym[sym_idx].Clear();
4507  return true;
4508  }
4509  }
4510  } else {
4511  if (resolver_addresses.find(nlist.n_value) !=
4512  resolver_addresses.end())
4513  type = eSymbolTypeResolver;
4514  }
4515  } else if (type == eSymbolTypeData || type == eSymbolTypeObjCClass ||
4516  type == eSymbolTypeObjCMetaClass ||
4517  type == eSymbolTypeObjCIVar) {
4518  // See if we can find a N_STSYM entry for any data symbols. If we do
4519  // find a match, and the name matches, then we can merge the two into
4520  // just the Static symbol to avoid duplicate entries in the symbol
4521  // table.
4522  std::pair<ValueToSymbolIndexMap::const_iterator,
4523  ValueToSymbolIndexMap::const_iterator>
4524  range;
4525  range = N_STSYM_addr_to_sym_idx.equal_range(nlist.n_value);
4526  if (range.first != range.second) {
4527  for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4528  pos != range.second; ++pos) {
4529  if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) ==
4530  sym[pos->second].GetMangled().GetName(
4531  Mangled::ePreferMangled)) {
4532  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4533  // We just need the flags from the linker symbol, so put these
4534  // flags into the N_STSYM flags to avoid duplicate symbols in
4535  // the symbol table.
4536  sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4537  sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4538  sym[sym_idx].Clear();
4539  return true;
4540  }
4541  }
4542  } else {
4543  // Combine N_GSYM stab entries with the non stab symbol.
4544  const char *gsym_name = sym[sym_idx]
4545  .GetMangled()
4546  .GetName(Mangled::ePreferMangled)
4547  .GetCString();
4548  if (gsym_name) {
4549  ConstNameToSymbolIndexMap::const_iterator pos =
4550  N_GSYM_name_to_sym_idx.find(gsym_name);
4551  if (pos != N_GSYM_name_to_sym_idx.end()) {
4552  const uint32_t GSYM_sym_idx = pos->second;
4553  m_nlist_idx_to_sym_idx[nlist_idx] = GSYM_sym_idx;
4554  // Copy the address, because often the N_GSYM address has an
4555  // invalid address of zero when the global is a common symbol.
4556  sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section);
4557  sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value);
4558  add_symbol_addr(
4559  sym[GSYM_sym_idx].GetAddress().GetFileAddress());
4560  // We just need the flags from the linker symbol, so put these
4561  // flags into the N_GSYM flags to avoid duplicate symbols in
4562  // the symbol table.
4563  sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4564  sym[sym_idx].Clear();
4565  return true;
4566  }
4567  }
4568  }
4569  }
4570  }
4571 
4572  sym[sym_idx].SetID(nlist_idx);
4573  sym[sym_idx].SetType(type);
4574  if (set_value) {
4575  sym[sym_idx].GetAddressRef().SetSection(symbol_section);
4576  sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
4577  if (symbol_section)
4578  add_symbol_addr(sym[sym_idx].GetAddress().GetFileAddress());
4579  }
4580  sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4581  if (nlist.n_desc & N_WEAK_REF)
4582  sym[sym_idx].SetIsWeak(true);
4583 
4584  if (symbol_byte_size > 0)
4585  sym[sym_idx].SetByteSize(symbol_byte_size);
4586 
4587  if (demangled_is_synthesized)
4588  sym[sym_idx].SetDemangledNameIsSynthesized(true);
4589 
4590  ++sym_idx;
4591  return true;
4592  };
4593 
4594  // First parse all the nlists but don't process them yet. See the next
4595  // comment for an explanation why.
4596  std::vector<struct nlist_64> nlists;
4597  nlists.reserve(symtab_load_command.nsyms);
4598  for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) {
4599  if (auto nlist =
4600  ParseNList(nlist_data, nlist_data_offset, nlist_byte_size))
4601  nlists.push_back(*nlist);
4602  else
4603  break;
4604  }
4605 
4606  // Now parse all the debug symbols. This is needed to merge non-debug
4607  // symbols in the next step. Non-debug symbols are always coalesced into
4608  // the debug symbol. Doing this in one step would mean that some symbols
4609  // won't be merged.
4610  nlist_idx = 0;
4611  for (auto &nlist : nlists) {
4612  if (!ParseSymbolLambda(nlist, nlist_idx++, DebugSymbols))
4613  break;
4614  }
4615 
4616  // Finally parse all the non debug symbols.
4617  nlist_idx = 0;
4618  for (auto &nlist : nlists) {
4619  if (!ParseSymbolLambda(nlist, nlist_idx++, NonDebugSymbols))
4620  break;
4621  }
4622 
4623  for (const auto &pos : reexport_shlib_needs_fixup) {
4624  const auto undef_pos = undefined_name_to_desc.find(pos.second);
4625  if (undef_pos != undefined_name_to_desc.end()) {
4626  const uint8_t dylib_ordinal =
4627  llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
4628  if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
4629  sym[pos.first].SetReExportedSymbolSharedLibrary(
4630  dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
4631  }
4632  }
4633  }
4634 
4635  // Count how many trie symbols we'll add to the symbol table
4636  int trie_symbol_table_augment_count = 0;
4637  for (auto &e : external_sym_trie_entries) {
4638  if (symbols_added.find(e.entry.address) == symbols_added.end())
4639  trie_symbol_table_augment_count++;
4640  }
4641 
4642  if (num_syms < sym_idx + trie_symbol_table_augment_count) {
4643  num_syms = sym_idx + trie_symbol_table_augment_count;
4644  sym = symtab.Resize(num_syms);
4645  }
4646  uint32_t synthetic_sym_id = symtab_load_command.nsyms;
4647 
4648  // Add symbols from the trie to the symbol table.
4649  for (auto &e : external_sym_trie_entries) {
4650  if (symbols_added.find(e.entry.address) != symbols_added.end())
4651  continue;
4652 
4653  // Find the section that this trie address is in, use that to annotate
4654  // symbol type as we add the trie address and name to the symbol table.
4655  Address symbol_addr;
4656  if (module_sp->ResolveFileAddress(e.entry.address, symbol_addr)) {
4657  SectionSP symbol_section(symbol_addr.GetSection());
4658  const char *symbol_name = e.entry.name.GetCString();
4659  bool demangled_is_synthesized = false;
4660  SymbolType type =
4661  GetSymbolType(symbol_name, demangled_is_synthesized, text_section_sp,
4662  data_section_sp, data_dirty_section_sp,
4663  data_const_section_sp, symbol_section);
4664 
4665  sym[sym_idx].SetType(type);
4666  if (symbol_section) {
4667  sym[sym_idx].SetID(synthetic_sym_id++);
4668  sym[sym_idx].GetMangled().SetMangledName(ConstString(symbol_name));
4669  if (demangled_is_synthesized)
4670  sym[sym_idx].SetDemangledNameIsSynthesized(true);
4671  sym[sym_idx].SetIsSynthetic(true);
4672  sym[sym_idx].SetExternal(true);
4673  sym[sym_idx].GetAddressRef() = symbol_addr;
4674  add_symbol_addr(symbol_addr.GetFileAddress());
4675  if (e.entry.flags & TRIE_SYMBOL_IS_THUMB)
4677  ++sym_idx;
4678  }
4679  }
4680  }
4681 
4682  if (function_starts_count > 0) {
4683  uint32_t num_synthetic_function_symbols = 0;
4684  for (i = 0; i < function_starts_count; ++i) {
4685  if (symbols_added.find(function_starts.GetEntryRef(i).addr) ==
4686  symbols_added.end())
4687  ++num_synthetic_function_symbols;
4688  }
4689 
4690  if (num_synthetic_function_symbols > 0) {
4691  if (num_syms < sym_idx + num_synthetic_function_symbols) {
4692  num_syms = sym_idx + num_synthetic_function_symbols;
4693  sym = symtab.Resize(num_syms);
4694  }
4695  for (i = 0; i < function_starts_count; ++i) {
4696  const FunctionStarts::Entry *func_start_entry =
4697  function_starts.GetEntryAtIndex(i);
4698  if (symbols_added.find(func_start_entry->addr) == symbols_added.end()) {
4699  addr_t symbol_file_addr = func_start_entry->addr;
4700  uint32_t symbol_flags = 0;
4701  if (func_start_entry->data)
4702  symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
4703  Address symbol_addr;
4704  if (module_sp->ResolveFileAddress(symbol_file_addr, symbol_addr)) {
4705  SectionSP symbol_section(symbol_addr.GetSection());
4706  uint32_t symbol_byte_size = 0;
4707  if (symbol_section) {
4708  const addr_t section_file_addr = symbol_section->GetFileAddress();
4709  const FunctionStarts::Entry *next_func_start_entry =
4710  function_starts.FindNextEntry(func_start_entry);
4711  const addr_t section_end_file_addr =
4712  section_file_addr + symbol_section->GetByteSize();
4713  if (next_func_start_entry) {
4714  addr_t next_symbol_file_addr = next_func_start_entry->addr;
4715  if (is_arm)
4716  next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4717  symbol_byte_size = std::min<lldb::addr_t>(
4718  next_symbol_file_addr - symbol_file_addr,
4719  section_end_file_addr - symbol_file_addr);
4720  } else {
4721  symbol_byte_size = section_end_file_addr - symbol_file_addr;
4722  }
4723  sym[sym_idx].SetID(synthetic_sym_id++);
4724  // Don't set the name for any synthetic symbols, the Symbol
4725  // object will generate one if needed when the name is accessed
4726  // via accessors.
4727  sym[sym_idx].GetMangled().SetDemangledName(ConstString());
4728  sym[sym_idx].SetType(eSymbolTypeCode);
4729  sym[sym_idx].SetIsSynthetic(true);
4730  sym[sym_idx].GetAddressRef() = symbol_addr;
4731  add_symbol_addr(symbol_addr.GetFileAddress());
4732  if (symbol_flags)
4733  sym[sym_idx].SetFlags(symbol_flags);
4734  if (symbol_byte_size)
4735  sym[sym_idx].SetByteSize(symbol_byte_size);
4736  ++sym_idx;
4737  }
4738  }
4739  }
4740  }
4741  }
4742  }
4743 
4744  // Trim our symbols down to just what we ended up with after removing any
4745  // symbols.
4746  if (sym_idx < num_syms) {
4747  num_syms = sym_idx;
4748  sym = symtab.Resize(num_syms);
4749  }
4750 
4751  // Now synthesize indirect symbols
4752  if (m_dysymtab.nindirectsyms != 0) {
4753  if (indirect_symbol_index_data.GetByteSize()) {
4754  NListIndexToSymbolIndexMap::const_iterator end_index_pos =
4755  m_nlist_idx_to_sym_idx.end();
4756 
4757  for (uint32_t sect_idx = 1; sect_idx < m_mach_sections.size();
4758  ++sect_idx) {
4759  if ((m_mach_sections[sect_idx].flags & SECTION_TYPE) ==
4760  S_SYMBOL_STUBS) {
4761  uint32_t symbol_stub_byte_size = m_mach_sections[sect_idx].reserved2;
4762  if (symbol_stub_byte_size == 0)
4763  continue;
4764 
4765  const uint32_t num_symbol_stubs =
4766  m_mach_sections[sect_idx].size / symbol_stub_byte_size;
4767 
4768  if (num_symbol_stubs == 0)
4769  continue;
4770 
4771  const uint32_t symbol_stub_index_offset =
4772  m_mach_sections[sect_idx].reserved1;
4773  for (uint32_t stub_idx = 0; stub_idx < num_symbol_stubs; ++stub_idx) {
4774  const uint32_t symbol_stub_index =
4775  symbol_stub_index_offset + stub_idx;
4776  const lldb::addr_t symbol_stub_addr =
4777  m_mach_sections[sect_idx].addr +
4778  (stub_idx * symbol_stub_byte_size);
4779  lldb::offset_t symbol_stub_offset = symbol_stub_index * 4;
4780  if (indirect_symbol_index_data.ValidOffsetForDataOfSize(
4781  symbol_stub_offset, 4)) {
4782  const uint32_t stub_sym_id =
4783  indirect_symbol_index_data.GetU32(&symbol_stub_offset);
4784  if (stub_sym_id & (INDIRECT_SYMBOL_ABS | INDIRECT_SYMBOL_LOCAL))
4785  continue;
4786 
4787  NListIndexToSymbolIndexMap::const_iterator index_pos =
4788  m_nlist_idx_to_sym_idx.find(stub_sym_id);
4789  Symbol *stub_symbol = nullptr;
4790  if (index_pos != end_index_pos) {
4791  // We have a remapping from the original nlist index to a
4792  // current symbol index, so just look this up by index
4793  stub_symbol = symtab.SymbolAtIndex(index_pos->second);
4794  } else {
4795  // We need to lookup a symbol using the original nlist symbol
4796  // index since this index is coming from the S_SYMBOL_STUBS
4797  stub_symbol = symtab.FindSymbolByID(stub_sym_id);
4798  }
4799 
4800  if (stub_symbol) {
4801  Address so_addr(symbol_stub_addr, section_list);
4802 
4803  if (stub_symbol->GetType() == eSymbolTypeUndefined) {
4804  // Change the external symbol into a trampoline that makes
4805  // sense These symbols were N_UNDF N_EXT, and are useless
4806  // to us, so we can re-use them so we don't have to make up
4807  // a synthetic symbol for no good reason.
4808  if (resolver_addresses.find(symbol_stub_addr) ==
4809  resolver_addresses.end())
4810  stub_symbol->SetType(eSymbolTypeTrampoline);
4811  else
4812  stub_symbol->SetType(eSymbolTypeResolver);
4813  stub_symbol->SetExternal(false);
4814  stub_symbol->GetAddressRef() = so_addr;
4815  stub_symbol->SetByteSize(symbol_stub_byte_size);
4816  } else {
4817  // Make a synthetic symbol to describe the trampoline stub
4818  Mangled stub_symbol_mangled_name(stub_symbol->GetMangled());
4819  if (sym_idx >= num_syms) {
4820  sym = symtab.Resize(++num_syms);
4821  stub_symbol = nullptr; // this pointer no longer valid
4822  }
4823  sym[sym_idx].SetID(synthetic_sym_id++);
4824  sym[sym_idx].GetMangled() = stub_symbol_mangled_name;
4825  if (resolver_addresses.find(symbol_stub_addr) ==
4826  resolver_addresses.end())
4827  sym[sym_idx].SetType(eSymbolTypeTrampoline);
4828  else
4829  sym[sym_idx].SetType(eSymbolTypeResolver);
4830  sym[sym_idx].SetIsSynthetic(true);
4831  sym[sym_idx].GetAddressRef() = so_addr;
4832  add_symbol_addr(so_addr.GetFileAddress());
4833  sym[sym_idx].SetByteSize(symbol_stub_byte_size);
4834  ++sym_idx;
4835  }
4836  } else {
4837  if (log)
4838  log->Warning("symbol stub referencing symbol table symbol "
4839  "%u that isn't in our minimal symbol table, "
4840  "fix this!!!",
4841  stub_sym_id);
4842  }
4843  }
4844  }
4845  }
4846  }
4847  }
4848  }
4849 
4850  if (!reexport_trie_entries.empty()) {
4851  for (const auto &e : reexport_trie_entries) {
4852  if (e.entry.import_name) {
4853  // Only add indirect symbols from the Trie entries if we didn't have
4854  // a N_INDR nlist entry for this already
4855  if (indirect_symbol_names.find(e.entry.name) ==
4856  indirect_symbol_names.end()) {
4857  // Make a synthetic symbol to describe re-exported symbol.
4858  if (sym_idx >= num_syms)
4859  sym = symtab.Resize(++num_syms);
4860  sym[sym_idx].SetID(synthetic_sym_id++);
4861  sym[sym_idx].GetMangled() = Mangled(e.entry.name);
4862  sym[sym_idx].SetType(eSymbolTypeReExported);
4863  sym[sym_idx].SetIsSynthetic(true);
4864  sym[sym_idx].SetReExportedSymbolName(e.entry.import_name);
4865  if (e.entry.other > 0 && e.entry.other <= dylib_files.GetSize()) {
4866  sym[sym_idx].SetReExportedSymbolSharedLibrary(
4867  dylib_files.GetFileSpecAtIndex(e.entry.other - 1));
4868  }
4869  ++sym_idx;
4870  }
4871  }
4872  }
4873  }
4874 }
4875 
4877  ModuleSP module_sp(GetModule());
4878  if (module_sp) {
4879  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
4880  s->Printf("%p: ", static_cast<void *>(this));
4881  s->Indent();
4882  if (m_header.magic == MH_MAGIC_64 || m_header.magic == MH_CIGAM_64)
4883  s->PutCString("ObjectFileMachO64");
4884  else
4885  s->PutCString("ObjectFileMachO32");
4886 
4887  *s << ", file = '" << m_file;
4888  ModuleSpecList all_specs;
4889  ModuleSpec base_spec;
4891  base_spec, all_specs);
4892  for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
4893  *s << "', triple";
4894  if (e)
4895  s->Printf("[%d]", i);
4896  *s << " = ";
4897  *s << all_specs.GetModuleSpecRefAtIndex(i)
4898  .GetArchitecture()
4899  .GetTriple()
4900  .getTriple();
4901  }
4902  *s << "\n";
4903  SectionList *sections = GetSectionList();
4904  if (sections)
4905  sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
4906  UINT32_MAX);
4907 
4908  if (m_symtab_up)
4909  m_symtab_up->Dump(s, nullptr, eSortOrderNone);
4910  }
4911 }
4912 
4913 UUID ObjectFileMachO::GetUUID(const llvm::MachO::mach_header &header,
4914  const lldb_private::DataExtractor &data,
4915  lldb::offset_t lc_offset) {
4916  uint32_t i;
4917  llvm::MachO::uuid_command load_cmd;
4918 
4919  lldb::offset_t offset = lc_offset;
4920  for (i = 0; i < header.ncmds; ++i) {
4921  const lldb::offset_t cmd_offset = offset;
4922  if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
4923  break;
4924 
4925  if (load_cmd.cmd == LC_UUID) {
4926  const uint8_t *uuid_bytes = data.PeekData(offset, 16);
4927 
4928  if (uuid_bytes) {
4929  // OpenCL on Mac OS X uses the same UUID for each of its object files.
4930  // We pretend these object files have no UUID to prevent crashing.
4931 
4932  const uint8_t opencl_uuid[] = {0x8c, 0x8e, 0xb3, 0x9b, 0x3b, 0xa8,
4933  0x4b, 0x16, 0xb6, 0xa4, 0x27, 0x63,
4934  0xbb, 0x14, 0xf0, 0x0d};
4935 
4936  if (!memcmp(uuid_bytes, opencl_uuid, 16))
4937  return UUID();
4938 
4939  return UUID::fromOptionalData(uuid_bytes, 16);
4940  }
4941  return UUID();
4942  }
4943  offset = cmd_offset + load_cmd.cmdsize;
4944  }
4945  return UUID();
4946 }
4947 
4948 static llvm::StringRef GetOSName(uint32_t cmd) {
4949  switch (cmd) {
4950  case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
4951  return llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4952  case llvm::MachO::LC_VERSION_MIN_MACOSX:
4953  return llvm::Triple::getOSTypeName(llvm::Triple::MacOSX);
4954  case llvm::MachO::LC_VERSION_MIN_TVOS:
4955  return llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4956  case llvm::MachO::LC_VERSION_MIN_WATCHOS:
4957  return llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4958  default:
4959  llvm_unreachable("unexpected LC_VERSION load command");
4960  }
4961 }
4962 
4963 namespace {
4964 struct OSEnv {
4965  llvm::StringRef os_type;
4966  llvm::StringRef environment;
4967  OSEnv(uint32_t cmd) {
4968  switch (cmd) {
4969  case llvm::MachO::PLATFORM_MACOS:
4970  os_type = llvm::Triple::getOSTypeName(llvm::Triple::MacOSX);
4971  return;
4972  case llvm::MachO::PLATFORM_IOS:
4973  os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4974  return;
4975  case llvm::MachO::PLATFORM_TVOS:
4976  os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4977  return;
4978  case llvm::MachO::PLATFORM_WATCHOS:
4979  os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4980  return;
4981  // TODO: add BridgeOS & DriverKit once in llvm/lib/Support/Triple.cpp
4982  // NEED_BRIDGEOS_TRIPLE
4983  // case llvm::MachO::PLATFORM_BRIDGEOS:
4984  // os_type = llvm::Triple::getOSTypeName(llvm::Triple::BridgeOS);
4985  // return;
4986  // case llvm::MachO::PLATFORM_DRIVERKIT:
4987  // os_type = llvm::Triple::getOSTypeName(llvm::Triple::DriverKit);
4988  // return;
4989  case llvm::MachO::PLATFORM_MACCATALYST:
4990  os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4991  environment = llvm::Triple::getEnvironmentTypeName(llvm::Triple::MacABI);
4992  return;
4993  case llvm::MachO::PLATFORM_IOSSIMULATOR:
4994  os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4995  environment =
4996  llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4997  return;
4998  case llvm::MachO::PLATFORM_TVOSSIMULATOR:
4999  os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
5000  environment =
5001  llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
5002  return;
5003  case llvm::MachO::PLATFORM_WATCHOSSIMULATOR:
5004  os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
5005  environment =
5006  llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
5007  return;
5008  default: {
5011  LLDB_LOGF(log, "unsupported platform in LC_BUILD_VERSION");
5012  }
5013  }
5014  }
5015 };
5016 
5017 struct MinOS {
5018  uint32_t major_version, minor_version, patch_version;
5019  MinOS(uint32_t version)
5020  : major_version(version >> 16), minor_version((version >> 8) & 0xffu),
5021  patch_version(version & 0xffu) {}
5022 };
5023 } // namespace
5024 
5025 void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header,
5026  const lldb_private::DataExtractor &data,
5027  lldb::offset_t lc_offset,
5028  ModuleSpec &base_spec,
5029  lldb_private::ModuleSpecList &all_specs) {
5030  auto &base_arch = base_spec.GetArchitecture();
5031  base_arch.SetArchitecture(eArchTypeMachO, header.cputype, header.cpusubtype);
5032  if (!base_arch.IsValid())
5033  return;
5034 
5035  bool found_any = false;
5036  auto add_triple = [&](const llvm::Triple &triple) {
5037  auto spec = base_spec;
5038  spec.GetArchitecture().GetTriple() = triple;
5039  if (spec.GetArchitecture().IsValid()) {
5040  spec.GetUUID() = ObjectFileMachO::GetUUID(header, data, lc_offset);
5041  all_specs.Append(spec);
5042  found_any = true;
5043  }
5044  };
5045 
5046  // Set OS to an unspecified unknown or a "*" so it can match any OS
5047  llvm::Triple base_triple = base_arch.GetTriple();
5048  base_triple.setOS(llvm::Triple::UnknownOS);
5049  base_triple.setOSName(llvm::StringRef());
5050 
5051  if (header.filetype == MH_PRELOAD) {
5052  if (header.cputype == CPU_TYPE_ARM) {
5053  // If this is a 32-bit arm binary, and it's a standalone binary, force
5054  // the Vendor to Apple so we don't accidentally pick up the generic
5055  // armv7 ABI at runtime. Apple's armv7 ABI always uses r7 for the
5056  // frame pointer register; most other armv7 ABIs use a combination of
5057  // r7 and r11.
5058  base_triple.setVendor(llvm::Triple::Apple);
5059  } else {
5060  // Set vendor to an unspecified unknown or a "*" so it can match any
5061  // vendor This is required for correct behavior of EFI debugging on
5062  // x86_64
5063  base_triple.setVendor(llvm::Triple::UnknownVendor);
5064  base_triple.setVendorName(llvm::StringRef());
5065  }
5066  return add_triple(base_triple);
5067  }
5068 
5069  llvm::MachO::load_command load_cmd;
5070 
5071  // See if there is an LC_VERSION_MIN_* load command that can give
5072  // us the OS type.
5073  lldb::offset_t offset = lc_offset;
5074  for (uint32_t i = 0; i < header.ncmds; ++i) {
5075  const lldb::offset_t cmd_offset = offset;
5076  if (data.GetU32(&offset, &load_cmd, 2) == NULL)
5077  break;
5078 
5079  llvm::MachO::version_min_command version_min;
5080  switch (load_cmd.cmd) {
5081  case llvm::MachO::LC_VERSION_MIN_MACOSX:
5082  case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
5083  case llvm::MachO::LC_VERSION_MIN_TVOS:
5084  case llvm::MachO::LC_VERSION_MIN_WATCHOS: {
5085  if (load_cmd.cmdsize != sizeof(version_min))
5086  break;
5087  if (data.ExtractBytes(cmd_offset, sizeof(version_min),
5088  data.GetByteOrder(), &version_min) == 0)
5089  break;
5090  MinOS min_os(version_min.version);
5091  llvm::SmallString<32> os_name;
5092  llvm::raw_svector_ostream os(os_name);
5093  os << GetOSName(load_cmd.cmd) << min_os.major_version << '.'
5094  << min_os.minor_version << '.' << min_os.patch_version;
5095 
5096  auto triple = base_triple;
5097  triple.setOSName(os.str());
5098 
5099  // Disambiguate legacy simulator platforms.
5100  if (load_cmd.cmd != llvm::MachO::LC_VERSION_MIN_MACOSX &&
5101  (base_triple.getArch() == llvm::Triple::x86_64 ||
5102  base_triple.getArch() == llvm::Triple::x86)) {
5103  // The combination of legacy LC_VERSION_MIN load command and
5104  // x86 architecture always indicates a simulator environment.
5105  // The combination of LC_VERSION_MIN and arm architecture only
5106  // appears for native binaries. Back-deploying simulator
5107  // binaries on Apple Silicon Macs use the modern unambigous
5108  // LC_BUILD_VERSION load commands; no special handling required.
5109  triple.setEnvironment(llvm::Triple::Simulator);
5110  }
5111  add_triple(triple);
5112  break;
5113  }
5114  default:
5115  break;
5116  }
5117 
5118  offset = cmd_offset + load_cmd.cmdsize;
5119  }
5120 
5121  // See if there are LC_BUILD_VERSION load commands that can give
5122  // us the OS type.
5123  offset = lc_offset;
5124  for (uint32_t i = 0; i < header.ncmds; ++i) {
5125  const lldb::offset_t cmd_offset = offset;
5126  if (data.GetU32(&offset, &load_cmd, 2) == NULL)
5127  break;
5128 
5129  do {
5130  if (load_cmd.cmd == llvm::MachO::LC_BUILD_VERSION) {
5131  llvm::MachO::build_version_command build_version;
5132  if (load_cmd.cmdsize < sizeof(build_version)) {
5133  // Malformed load command.
5134  break;
5135  }
5136  if (data.ExtractBytes(cmd_offset, sizeof(build_version),
5137  data.GetByteOrder(), &build_version) == 0)
5138  break;
5139  MinOS min_os(build_version.minos);
5140  OSEnv os_env(build_version.platform);
5141  llvm::SmallString<16> os_name;
5142  llvm::raw_svector_ostream os(os_name);
5143  os << os_env.os_type << min_os.major_version << '.'
5144  << min_os.minor_version << '.' << min_os.patch_version;
5145  auto triple = base_triple;
5146  triple.setOSName(os.str());
5147  os_name.clear();
5148  if (!os_env.environment.empty())
5149  triple.setEnvironmentName(os_env.environment);
5150  add_triple(triple);
5151  }
5152  } while (0);
5153  offset = cmd_offset + load_cmd.cmdsize;
5154  }
5155 
5156  if (!found_any) {
5157  if (header.filetype == MH_KEXT_BUNDLE) {
5158  base_triple.setVendor(llvm::Triple::Apple);
5159  add_triple(base_triple);
5160  } else {
5161  // We didn't find a LC_VERSION_MIN load command and this isn't a KEXT
5162  // so lets not say our Vendor is Apple, leave it as an unspecified
5163  // unknown.
5164  base_triple.setVendor(llvm::Triple::UnknownVendor);
5165  base_triple.setVendorName(llvm::StringRef());
5166  add_triple(base_triple);
5167  }
5168  }
5169 }
5170 
5172  ModuleSP module_sp, const llvm::MachO::mach_header &header,
5173  const lldb_private::DataExtractor &data, lldb::offset_t lc_offset) {
5174  ModuleSpecList all_specs;
5175  ModuleSpec base_spec;
5176  GetAllArchSpecs(header, data, MachHeaderSizeFromMagic(header.magic),
5177  base_spec, all_specs);
5178 
5179  // If the object file offers multiple alternative load commands,
5180  // pick the one that matches the module.
5181  if (module_sp) {
5182  const ArchSpec &module_arch = module_sp->GetArchitecture();
5183  for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
5184  ArchSpec mach_arch =
5186  if (module_arch.IsCompatibleMatch(mach_arch))
5187  return mach_arch;
5188  }
5189  }
5190 
5191  // Return the first arch we found.
5192  if (all_specs.GetSize() == 0)
5193  return {};
5194  return all_specs.GetModuleSpecRefAtIndex(0).GetArchitecture();
5195 }
5196 
5198  ModuleSP module_sp(GetModule());
5199  if (module_sp) {
5200  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5202  return GetUUID(m_header, m_data, offset);
5203  }
5204  return UUID();
5205 }
5206 
5208  uint32_t count = 0;
5209  ModuleSP module_sp(GetModule());
5210  if (module_sp) {
5211  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5212  llvm::MachO::load_command load_cmd;
5214  std::vector<std::string> rpath_paths;
5215  std::vector<std::string> rpath_relative_paths;
5216  std::vector<std::string> at_exec_relative_paths;
5217  uint32_t i;
5218  for (i = 0; i < m_header.ncmds; ++i) {
5219  const uint32_t cmd_offset = offset;
5220  if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
5221  break;
5222 
5223  switch (load_cmd.cmd) {
5224  case LC_RPATH:
5225  case LC_LOAD_DYLIB:
5226  case LC_LOAD_WEAK_DYLIB:
5227  case LC_REEXPORT_DYLIB:
5228  case LC_LOAD_DYLINKER:
5229  case LC_LOADFVMLIB:
5230  case LC_LOAD_UPWARD_DYLIB: {
5231  uint32_t name_offset = cmd_offset + m_data.GetU32(&offset);
5232  const char *path = m_data.PeekCStr(name_offset);
5233  if (path) {
5234  if (load_cmd.cmd == LC_RPATH)
5235  rpath_paths.push_back(path);
5236  else {
5237  if (path[0] == '@') {
5238  if (strncmp(path, "@rpath", strlen("@rpath")) == 0)
5239  rpath_relative_paths.push_back(path + strlen("@rpath"));
5240  else if (strncmp(path, "@executable_path",
5241  strlen("@executable_path")) == 0)
5242  at_exec_relative_paths.push_back(path +
5243  strlen("@executable_path"));
5244  } else {
5245  FileSpec file_spec(path);
5246  if (files.AppendIfUnique(file_spec))
5247  count++;
5248  }
5249  }
5250  }
5251  } break;
5252 
5253  default:
5254  break;
5255  }
5256  offset = cmd_offset + load_cmd.cmdsize;
5257  }
5258 
5259  FileSpec this_file_spec(m_file);
5260  FileSystem::Instance().Resolve(this_file_spec);
5261 
5262  if (!rpath_paths.empty()) {
5263  // Fixup all LC_RPATH values to be absolute paths
5264  std::string loader_path("@loader_path");
5265  std::string executable_path("@executable_path");
5266  for (auto &rpath : rpath_paths) {
5267  if (llvm::StringRef(rpath).startswith(loader_path)) {
5268  rpath.erase(0, loader_path.size());
5269  rpath.insert(0, this_file_spec.GetDirectory().GetCString());
5270  } else if (llvm::StringRef(rpath).startswith(executable_path)) {
5271  rpath.erase(0, executable_path.size());
5272  rpath.insert(0, this_file_spec.GetDirectory().GetCString());
5273  }
5274  }
5275 
5276  for (const auto &rpath_relative_path : rpath_relative_paths) {
5277  for (const auto &rpath : rpath_paths) {
5278  std::string path = rpath;
5279  path += rpath_relative_path;
5280  // It is OK to resolve this path because we must find a file on disk
5281  // for us to accept it anyway if it is rpath relative.
5282  FileSpec file_spec(path);
5283  FileSystem::Instance().Resolve(file_spec);
5284  if (FileSystem::Instance().Exists(file_spec) &&
5285  files.AppendIfUnique(file_spec)) {
5286  count++;
5287  break;
5288  }
5289  }
5290  }
5291  }
5292 
5293  // We may have @executable_paths but no RPATHS. Figure those out here.
5294  // Only do this if this object file is the executable. We have no way to
5295  // get back to the actual executable otherwise, so we won't get the right
5296  // path.
5297  if (!at_exec_relative_paths.empty() && CalculateType() == eTypeExecutable) {
5298  FileSpec exec_dir = this_file_spec.CopyByRemovingLastPathComponent();
5299  for (const auto &at_exec_relative_path : at_exec_relative_paths) {
5300  FileSpec file_spec =
5301  exec_dir.CopyByAppendingPathComponent(at_exec_relative_path);
5302  if (FileSystem::Instance().Exists(file_spec) &&
5303  files.AppendIfUnique(file_spec))
5304  count++;
5305  }
5306  }
5307  }
5308  return count;
5309 }
5310 
5312  // If the object file is not an executable it can't hold the entry point.
5313  // m_entry_point_address is initialized to an invalid address, so we can just
5314  // return that. If m_entry_point_address is valid it means we've found it
5315  // already, so return the cached value.
5316 
5317  if ((!IsExecutable() && !IsDynamicLoader()) ||
5319  return m_entry_point_address;
5320  }
5321 
5322  // Otherwise, look for the UnixThread or Thread command. The data for the
5323  // Thread command is given in /usr/include/mach-o.h, but it is basically:
5324  //
5325  // uint32_t flavor - this is the flavor argument you would pass to
5326  // thread_get_state
5327  // uint32_t count - this is the count of longs in the thread state data
5328  // struct XXX_thread_state state - this is the structure from
5329  // <machine/thread_status.h> corresponding to the flavor.
5330  // <repeat this trio>
5331  //
5332  // So we just keep reading the various register flavors till we find the GPR
5333  // one, then read the PC out of there.
5334  // FIXME: We will need to have a "RegisterContext data provider" class at some
5335  // point that can get all the registers
5336  // out of data in this form & attach them to a given thread. That should
5337  // underlie the MacOS X User process plugin, and we'll also need it for the
5338  // MacOS X Core File process plugin. When we have that we can also use it
5339  // here.
5340  //
5341  // For now we hard-code the offsets and flavors we need:
5342  //
5343  //
5344 
5345  ModuleSP module_sp(GetModule());
5346  if (module_sp) {
5347  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5348  llvm::MachO::load_command load_cmd;
5350  uint32_t i;
5351  lldb::addr_t start_address = LLDB_INVALID_ADDRESS;
5352  bool done = false;
5353 
5354  for (i = 0; i < m_header.ncmds; ++i) {
5355  const lldb::offset_t cmd_offset = offset;
5356  if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
5357  break;
5358 
5359  switch (load_cmd.cmd) {
5360  case LC_UNIXTHREAD:
5361  case LC_THREAD: {
5362  while (offset < cmd_offset + load_cmd.cmdsize) {
5363  uint32_t flavor = m_data.GetU32(&offset);
5364  uint32_t count = m_data.GetU32(&offset);
5365  if (count == 0) {
5366  // We've gotten off somehow, log and exit;
5367  return m_entry_point_address;
5368  }
5369 
5370  switch (m_header.cputype) {
5371  case llvm::MachO::CPU_TYPE_ARM:
5372  if (flavor == 1 ||
5373  flavor == 9) // ARM_THREAD_STATE/ARM_THREAD_STATE32
5374  // from mach/arm/thread_status.h
5375  {
5376  offset += 60; // This is the offset of pc in the GPR thread state
5377  // data structure.
5378  start_address = m_data.GetU32(&offset);
5379  done = true;
5380  }
5381  break;
5384  if (flavor == 6) // ARM_THREAD_STATE64 from mach/arm/thread_status.h
5385  {
5386  offset += 256; // This is the offset of pc in the GPR thread state
5387  // data structure.
5388  start_address = m_data.GetU64(&offset);
5389  done = true;
5390  }
5391  break;
5392  case llvm::MachO::CPU_TYPE_I386:
5393  if (flavor ==
5394  1) // x86_THREAD_STATE32 from mach/i386/thread_status.h
5395  {
5396  offset += 40; // This is the offset of eip in the GPR thread state
5397  // data structure.
5398  start_address = m_data.GetU32(&offset);
5399  done = true;
5400  }
5401  break;
5402  case llvm::MachO::CPU_TYPE_X86_64:
5403  if (flavor ==
5404  4) // x86_THREAD_STATE64 from mach/i386/thread_status.h
5405  {
5406  offset += 16 * 8; // This is the offset of rip in the GPR thread
5407  // state data structure.
5408  start_address = m_data.GetU64(&offset);
5409  done = true;
5410  }
5411  break;
5412  default:
5413  return m_entry_point_address;
5414  }
5415  // Haven't found the GPR flavor yet, skip over the data for this
5416  // flavor:
5417  if (done)
5418  break;
5419  offset += count * 4;
5420  }
5421  } break;
5422  case LC_MAIN: {
5423  ConstString text_segment_name("__TEXT");
5424  uint64_t entryoffset = m_data.GetU64(&offset);
5425  SectionSP text_segment_sp =
5426  GetSectionList()->FindSectionByName(text_segment_name);
5427  if (text_segment_sp) {
5428  done = true;
5429  start_address = text_segment_sp->GetFileAddress() + entryoffset;
5430  }
5431  } break;
5432 
5433  default:
5434  break;
5435  }
5436  if (done)
5437  break;
5438 
5439  // Go to the next load command:
5440  offset = cmd_offset + load_cmd.cmdsize;
5441  }
5442 
5443  if (start_address == LLDB_INVALID_ADDRESS && IsDynamicLoader()) {
5444  if (GetSymtab()) {
5445  Symbol *dyld_start_sym = GetSymtab()->FindFirstSymbolWithNameAndType(
5446  ConstString("_dyld_start"), SymbolType::eSymbolTypeCode,
5447  Symtab::eDebugAny, Symtab::eVisibilityAny);
5448  if (dyld_start_sym && dyld_start_sym->GetAddress().IsValid()) {
5449  start_address = dyld_start_sym->GetAddress().GetFileAddress();
5450  }
5451  }
5452  }
5453 
5454  if (start_address != LLDB_INVALID_ADDRESS) {
5455  // We got the start address from the load commands, so now resolve that
5456  // address in the sections of this ObjectFile:
5458  start_address, GetSectionList())) {
5460  }
5461  } else {
5462  // We couldn't read the UnixThread load command - maybe it wasn't there.
5463  // As a fallback look for the "start" symbol in the main executable.
5464 
5465  ModuleSP module_sp(GetModule());
5466 
5467  if (module_sp) {
5468  SymbolContextList contexts;
5469  SymbolContext context;
5470  module_sp->FindSymbolsWithNameAndType(ConstString("start"),
5471  eSymbolTypeCode, contexts);
5472  if (contexts.GetSize()) {
5473  if (contexts.GetContextAtIndex(0, context))
5475  }
5476  }
5477  }
5478  }
5479 
5480  return m_entry_point_address;
5481 }
5482 
5484  lldb_private::Address header_addr;
5485  SectionList *section_list = GetSectionList();
5486  if (section_list) {
5487  SectionSP text_segment_sp(
5488  section_list->FindSectionByName(GetSegmentNameTEXT()));
5489  if (text_segment_sp) {
5490  header_addr.SetSection(text_segment_sp);
5491  header_addr.SetOffset(0);
5492  }
5493  }
5494  return header_addr;
5495 }
5496 
5498  ModuleSP module_sp(GetModule());
5499  if (module_sp) {
5500  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5504  FileRangeArray::Entry file_range;
5505  llvm::MachO::thread_command thread_cmd;
5506  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5507  const uint32_t cmd_offset = offset;
5508  if (m_data.GetU32(&offset, &thread_cmd, 2) == nullptr)
5509  break;
5510 
5511  if (thread_cmd.cmd == LC_THREAD) {
5512  file_range.SetRangeBase(offset);
5513  file_range.SetByteSize(thread_cmd.cmdsize - 8);
5514  m_thread_context_offsets.Append(file_range);
5515  }
5516  offset = cmd_offset + thread_cmd.cmdsize;
5517  }
5518  }
5519  }
5521 }
5522 
5524  std::string result;
5525  ModuleSP module_sp(GetModule());
5526  if (module_sp) {
5527  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5528 
5529  // First, look over the load commands for an LC_NOTE load command with
5530  // data_owner string "kern ver str" & use that if found.
5532  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5533  const uint32_t cmd_offset = offset;
5534  llvm::MachO::load_command lc;
5535  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
5536  break;
5537  if (lc.cmd == LC_NOTE) {
5538  char data_owner[17];
5539  m_data.CopyData(offset, 16, data_owner);
5540  data_owner[16] = '\0';
5541  offset += 16;
5542  uint64_t fileoff = m_data.GetU64_unchecked(&offset);
5543  uint64_t size = m_data.GetU64_unchecked(&offset);
5544 
5545  // "kern ver str" has a uint32_t version and then a nul terminated
5546  // c-string.
5547  if (strcmp("kern ver str", data_owner) == 0) {
5548  offset = fileoff;
5549  uint32_t version;
5550  if (m_data.GetU32(&offset, &version, 1) != nullptr) {
5551  if (version == 1) {
5552  uint32_t strsize = size - sizeof(uint32_t);
5553  char *buf = (char *)malloc(strsize);
5554  if (buf) {
5555  m_data.CopyData(offset, strsize, buf);
5556  buf[strsize - 1] = '\0';
5557  result = buf;
5558  if (buf)
5559  free(buf);
5560  return result;
5561  }
5562  }
5563  }
5564  }
5565  }
5566  offset = cmd_offset + lc.cmdsize;
5567  }
5568 
5569  // Second, make a pass over the load commands looking for an obsolete
5570  // LC_IDENT load command.
5571  offset = MachHeaderSizeFromMagic(m_header.magic);
5572  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5573  const uint32_t cmd_offset = offset;
5574  llvm::MachO::ident_command ident_command;
5575  if (m_data.GetU32(&offset, &ident_command, 2) == nullptr)
5576  break;
5577  if (ident_command.cmd == LC_IDENT && ident_command.cmdsize != 0) {
5578  char *buf = (char *)malloc(ident_command.cmdsize);
5579  if (buf != nullptr && m_data.CopyData(offset, ident_command.cmdsize,
5580  buf) == ident_command.cmdsize) {
5581  buf[ident_command.cmdsize - 1] = '\0';
5582  result = buf;
5583  }
5584  if (buf)
5585  free(buf);
5586  }
5587  offset = cmd_offset + ident_command.cmdsize;
5588  }
5589  }
5590  return result;
5591 }
5592 
5594  addr_t mask = 0;
5595  ModuleSP module_sp(GetModule());
5596  if (module_sp) {
5597  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5599  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5600  const uint32_t cmd_offset = offset;
5601  llvm::MachO::load_command lc;
5602  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
5603  break;
5604  if (lc.cmd == LC_NOTE) {
5605  char data_owner[17];
5606  m_data.CopyData(offset, 16, data_owner);
5607  data_owner[16] = '\0';
5608  offset += 16;
5609  uint64_t fileoff = m_data.GetU64_unchecked(&offset);
5610 
5611  // "addrable bits" has a uint32_t version and a uint32_t
5612  // number of bits used in addressing.
5613  if (strcmp("addrable bits", data_owner) == 0) {
5614  offset = fileoff;
5615  uint32_t version;
5616  if (m_data.GetU32(&offset, &version, 1) != nullptr) {
5617  if (version == 3) {
5618  uint32_t num_addr_bits = m_data.GetU32_unchecked(&offset);
5619  if (num_addr_bits != 0) {
5620  mask = ~((1ULL << num_addr_bits) - 1);
5621  }
5622  break;
5623  }
5624  }
5625  }
5626  }
5627  offset = cmd_offset + lc.cmdsize;
5628  }
5629  }
5630  return mask;
5631 }
5632 
5634  ObjectFile::BinaryType &type) {
5635  address = LLDB_INVALID_ADDRESS;
5636  uuid.Clear();
5637  ModuleSP module_sp(GetModule());
5638  if (module_sp) {
5639  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5641  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5642  const uint32_t cmd_offset = offset;
5643  llvm::MachO::load_command lc;
5644  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
5645  break;
5646  if (lc.cmd == LC_NOTE) {
5647  char data_owner[17];
5648  memset(data_owner, 0, sizeof(data_owner));
5649  m_data.CopyData(offset, 16, data_owner);
5650  offset += 16;
5651  uint64_t fileoff = m_data.GetU64_unchecked(&offset);
5652  uint64_t size = m_data.GetU64_unchecked(&offset);
5653 
5654  // "main bin spec" (main binary specification) data payload is
5655  // formatted:
5656  // uint32_t version [currently 1]
5657  // uint32_t type [0 == unspecified, 1 == kernel,
5658  // 2 == user process, 3 == firmware ]
5659  // uint64_t address [ UINT64_MAX if address not specified ]
5660  // uuid_t uuid [ all zero's if uuid not specified ]
5661  // uint32_t log2_pagesize [ process page size in log base
5662  // 2, e.g. 4k pages are 12.
5663  // 0 for unspecified ]
5664  // uint32_t unused [ for alignment ]
5665 
5666  if (strcmp("