LLDB  mainline
ObjectFileMachO.cpp
Go to the documentation of this file.
1 //===-- ObjectFileMachO.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ADT/StringRef.h"
10 
15 #include "lldb/Core/Debugger.h"
16 #include "lldb/Core/FileSpecList.h"
17 #include "lldb/Core/Module.h"
18 #include "lldb/Core/ModuleSpec.h"
20 #include "lldb/Core/Progress.h"
21 #include "lldb/Core/Section.h"
22 #include "lldb/Core/StreamFile.h"
23 #include "lldb/Host/Host.h"
24 #include "lldb/Host/SafeMachO.h"
27 #include "lldb/Symbol/ObjectFile.h"
30 #include "lldb/Target/Platform.h"
31 #include "lldb/Target/Process.h"
33 #include "lldb/Target/Target.h"
34 #include "lldb/Target/Thread.h"
35 #include "lldb/Target/ThreadList.h"
36 #include "lldb/Utility/ArchSpec.h"
38 #include "lldb/Utility/FileSpec.h"
39 #include "lldb/Utility/Log.h"
40 #include "lldb/Utility/RangeMap.h"
42 #include "lldb/Utility/Status.h"
44 #include "lldb/Utility/Timer.h"
45 #include "lldb/Utility/UUID.h"
46 
47 #include "llvm/ADT/DenseSet.h"
48 #include "llvm/Support/FormatVariadic.h"
49 #include "llvm/Support/MemoryBuffer.h"
50 
51 #include "ObjectFileMachO.h"
52 
53 #if defined(__APPLE__)
54 #include <TargetConditionals.h>
55 // GetLLDBSharedCacheUUID() needs to call dlsym()
56 #include <dlfcn.h>
57 #endif
58 
59 #ifndef __APPLE__
61 #else
62 #include <uuid/uuid.h>
63 #endif
64 
65 #include <bitset>
66 #include <memory>
67 
68 #if LLVM_SUPPORT_XCODE_SIGNPOSTS
69 // Unfortunately the signpost header pulls in the system MachO header, too.
70 #undef CPU_TYPE_ARM
71 #undef CPU_TYPE_ARM64
72 #undef CPU_TYPE_ARM64_32
73 #undef CPU_TYPE_I386
74 #undef CPU_TYPE_X86_64
75 #undef MH_BINDATLOAD
76 #undef MH_BUNDLE
77 #undef MH_CIGAM
78 #undef MH_CIGAM_64
79 #undef MH_CORE
80 #undef MH_DSYM
81 #undef MH_DYLDLINK
82 #undef MH_DYLIB
83 #undef MH_DYLIB_STUB
84 #undef MH_DYLINKER
85 #undef MH_DYLINKER
86 #undef MH_EXECUTE
87 #undef MH_FVMLIB
88 #undef MH_INCRLINK
89 #undef MH_KEXT_BUNDLE
90 #undef MH_MAGIC
91 #undef MH_MAGIC_64
92 #undef MH_NOUNDEFS
93 #undef MH_OBJECT
94 #undef MH_OBJECT
95 #undef MH_PRELOAD
96 
97 #undef LC_BUILD_VERSION
98 #undef LC_VERSION_MIN_MACOSX
99 #undef LC_VERSION_MIN_IPHONEOS
100 #undef LC_VERSION_MIN_TVOS
101 #undef LC_VERSION_MIN_WATCHOS
102 
103 #undef PLATFORM_MACOS
104 #undef PLATFORM_MACCATALYST
105 #undef PLATFORM_IOS
106 #undef PLATFORM_IOSSIMULATOR
107 #undef PLATFORM_TVOS
108 #undef PLATFORM_TVOSSIMULATOR
109 #undef PLATFORM_WATCHOS
110 #undef PLATFORM_WATCHOSSIMULATOR
111 #endif
112 
113 #define THUMB_ADDRESS_BIT_MASK 0xfffffffffffffffeull
114 using namespace lldb;
115 using namespace lldb_private;
116 using namespace llvm::MachO;
117 
119 
120 // Some structure definitions needed for parsing the dyld shared cache files
121 // found on iOS devices.
122 
124  char magic[16]; // e.g. "dyld_v0 i386", "dyld_v1 armv7", etc.
125  uint32_t mappingOffset; // file offset to first dyld_cache_mapping_info
126  uint32_t mappingCount; // number of dyld_cache_mapping_info entries
129  uint64_t dyldBaseAddress;
132  uint64_t slideInfoOffset;
133  uint64_t slideInfoSize;
136  uint8_t uuid[16]; // v1 and above, also recorded in dyld_all_image_infos v13
137  // and later
138 };
139 
141  uint64_t address;
142  uint64_t size;
143  uint64_t fileOffset;
146 };
147 
155 };
160 };
161 
162 static void PrintRegisterValue(RegisterContext *reg_ctx, const char *name,
163  const char *alt_name, size_t reg_byte_size,
164  Stream &data) {
165  const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name);
166  if (reg_info == nullptr)
167  reg_info = reg_ctx->GetRegisterInfoByName(alt_name);
168  if (reg_info) {
169  lldb_private::RegisterValue reg_value;
170  if (reg_ctx->ReadRegister(reg_info, reg_value)) {
171  if (reg_info->byte_size >= reg_byte_size)
172  data.Write(reg_value.GetBytes(), reg_byte_size);
173  else {
174  data.Write(reg_value.GetBytes(), reg_info->byte_size);
175  for (size_t i = 0, n = reg_byte_size - reg_info->byte_size; i < n; ++i)
176  data.PutChar(0);
177  }
178  return;
179  }
180  }
181  // Just write zeros if all else fails
182  for (size_t i = 0; i < reg_byte_size; ++i)
183  data.PutChar(0);
184 }
185 
187 public:
189  const DataExtractor &data)
190  : RegisterContextDarwin_x86_64(thread, 0) {
191  SetRegisterDataFrom_LC_THREAD(data);
192  }
193 
194  void InvalidateAllRegisters() override {
195  // Do nothing... registers are always valid...
196  }
197 
199  lldb::offset_t offset = 0;
200  SetError(GPRRegSet, Read, -1);
201  SetError(FPURegSet, Read, -1);
202  SetError(EXCRegSet, Read, -1);
203  bool done = false;
204 
205  while (!done) {
206  int flavor = data.GetU32(&offset);
207  if (flavor == 0)
208  done = true;
209  else {
210  uint32_t i;
211  uint32_t count = data.GetU32(&offset);
212  switch (flavor) {
213  case GPRRegSet:
214  for (i = 0; i < count; ++i)
215  (&gpr.rax)[i] = data.GetU64(&offset);
216  SetError(GPRRegSet, Read, 0);
217  done = true;
218 
219  break;
220  case FPURegSet:
221  // TODO: fill in FPU regs....
222  // SetError (FPURegSet, Read, -1);
223  done = true;
224 
225  break;
226  case EXCRegSet:
227  exc.trapno = data.GetU32(&offset);
228  exc.err = data.GetU32(&offset);
229  exc.faultvaddr = data.GetU64(&offset);
230  SetError(EXCRegSet, Read, 0);
231  done = true;
232  break;
233  case 7:
234  case 8:
235  case 9:
236  // fancy flavors that encapsulate of the above flavors...
237  break;
238 
239  default:
240  done = true;
241  break;
242  }
243  }
244  }
245  }
246 
247  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
248  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
249  if (reg_ctx_sp) {
250  RegisterContext *reg_ctx = reg_ctx_sp.get();
251 
252  data.PutHex32(GPRRegSet); // Flavor
253  data.PutHex32(GPRWordCount);
254  PrintRegisterValue(reg_ctx, "rax", nullptr, 8, data);
255  PrintRegisterValue(reg_ctx, "rbx", nullptr, 8, data);
256  PrintRegisterValue(reg_ctx, "rcx", nullptr, 8, data);
257  PrintRegisterValue(reg_ctx, "rdx", nullptr, 8, data);
258  PrintRegisterValue(reg_ctx, "rdi", nullptr, 8, data);
259  PrintRegisterValue(reg_ctx, "rsi", nullptr, 8, data);
260  PrintRegisterValue(reg_ctx, "rbp", nullptr, 8, data);
261  PrintRegisterValue(reg_ctx, "rsp", nullptr, 8, data);
262  PrintRegisterValue(reg_ctx, "r8", nullptr, 8, data);
263  PrintRegisterValue(reg_ctx, "r9", nullptr, 8, data);
264  PrintRegisterValue(reg_ctx, "r10", nullptr, 8, data);
265  PrintRegisterValue(reg_ctx, "r11", nullptr, 8, data);
266  PrintRegisterValue(reg_ctx, "r12", nullptr, 8, data);
267  PrintRegisterValue(reg_ctx, "r13", nullptr, 8, data);
268  PrintRegisterValue(reg_ctx, "r14", nullptr, 8, data);
269  PrintRegisterValue(reg_ctx, "r15", nullptr, 8, data);
270  PrintRegisterValue(reg_ctx, "rip", nullptr, 8, data);
271  PrintRegisterValue(reg_ctx, "rflags", nullptr, 8, data);
272  PrintRegisterValue(reg_ctx, "cs", nullptr, 8, data);
273  PrintRegisterValue(reg_ctx, "fs", nullptr, 8, data);
274  PrintRegisterValue(reg_ctx, "gs", nullptr, 8, data);
275 
276  // // Write out the FPU registers
277  // const size_t fpu_byte_size = sizeof(FPU);
278  // size_t bytes_written = 0;
279  // data.PutHex32 (FPURegSet);
280  // data.PutHex32 (fpu_byte_size/sizeof(uint64_t));
281  // bytes_written += data.PutHex32(0); // uint32_t pad[0]
282  // bytes_written += data.PutHex32(0); // uint32_t pad[1]
283  // bytes_written += WriteRegister (reg_ctx, "fcw", "fctrl", 2,
284  // data); // uint16_t fcw; // "fctrl"
285  // bytes_written += WriteRegister (reg_ctx, "fsw" , "fstat", 2,
286  // data); // uint16_t fsw; // "fstat"
287  // bytes_written += WriteRegister (reg_ctx, "ftw" , "ftag", 1,
288  // data); // uint8_t ftw; // "ftag"
289  // bytes_written += data.PutHex8 (0); // uint8_t pad1;
290  // bytes_written += WriteRegister (reg_ctx, "fop" , NULL, 2,
291  // data); // uint16_t fop; // "fop"
292  // bytes_written += WriteRegister (reg_ctx, "fioff", "ip", 4,
293  // data); // uint32_t ip; // "fioff"
294  // bytes_written += WriteRegister (reg_ctx, "fiseg", NULL, 2,
295  // data); // uint16_t cs; // "fiseg"
296  // bytes_written += data.PutHex16 (0); // uint16_t pad2;
297  // bytes_written += WriteRegister (reg_ctx, "dp", "fooff" , 4,
298  // data); // uint32_t dp; // "fooff"
299  // bytes_written += WriteRegister (reg_ctx, "foseg", NULL, 2,
300  // data); // uint16_t ds; // "foseg"
301  // bytes_written += data.PutHex16 (0); // uint16_t pad3;
302  // bytes_written += WriteRegister (reg_ctx, "mxcsr", NULL, 4,
303  // data); // uint32_t mxcsr;
304  // bytes_written += WriteRegister (reg_ctx, "mxcsrmask", NULL,
305  // 4, data);// uint32_t mxcsrmask;
306  // bytes_written += WriteRegister (reg_ctx, "stmm0", NULL,
307  // sizeof(MMSReg), data);
308  // bytes_written += WriteRegister (reg_ctx, "stmm1", NULL,
309  // sizeof(MMSReg), data);
310  // bytes_written += WriteRegister (reg_ctx, "stmm2", NULL,
311  // sizeof(MMSReg), data);
312  // bytes_written += WriteRegister (reg_ctx, "stmm3", NULL,
313  // sizeof(MMSReg), data);
314  // bytes_written += WriteRegister (reg_ctx, "stmm4", NULL,
315  // sizeof(MMSReg), data);
316  // bytes_written += WriteRegister (reg_ctx, "stmm5", NULL,
317  // sizeof(MMSReg), data);
318  // bytes_written += WriteRegister (reg_ctx, "stmm6", NULL,
319  // sizeof(MMSReg), data);
320  // bytes_written += WriteRegister (reg_ctx, "stmm7", NULL,
321  // sizeof(MMSReg), data);
322  // bytes_written += WriteRegister (reg_ctx, "xmm0" , NULL,
323  // sizeof(XMMReg), data);
324  // bytes_written += WriteRegister (reg_ctx, "xmm1" , NULL,
325  // sizeof(XMMReg), data);
326  // bytes_written += WriteRegister (reg_ctx, "xmm2" , NULL,
327  // sizeof(XMMReg), data);
328  // bytes_written += WriteRegister (reg_ctx, "xmm3" , NULL,
329  // sizeof(XMMReg), data);
330  // bytes_written += WriteRegister (reg_ctx, "xmm4" , NULL,
331  // sizeof(XMMReg), data);
332  // bytes_written += WriteRegister (reg_ctx, "xmm5" , NULL,
333  // sizeof(XMMReg), data);
334  // bytes_written += WriteRegister (reg_ctx, "xmm6" , NULL,
335  // sizeof(XMMReg), data);
336  // bytes_written += WriteRegister (reg_ctx, "xmm7" , NULL,
337  // sizeof(XMMReg), data);
338  // bytes_written += WriteRegister (reg_ctx, "xmm8" , NULL,
339  // sizeof(XMMReg), data);
340  // bytes_written += WriteRegister (reg_ctx, "xmm9" , NULL,
341  // sizeof(XMMReg), data);
342  // bytes_written += WriteRegister (reg_ctx, "xmm10", NULL,
343  // sizeof(XMMReg), data);
344  // bytes_written += WriteRegister (reg_ctx, "xmm11", NULL,
345  // sizeof(XMMReg), data);
346  // bytes_written += WriteRegister (reg_ctx, "xmm12", NULL,
347  // sizeof(XMMReg), data);
348  // bytes_written += WriteRegister (reg_ctx, "xmm13", NULL,
349  // sizeof(XMMReg), data);
350  // bytes_written += WriteRegister (reg_ctx, "xmm14", NULL,
351  // sizeof(XMMReg), data);
352  // bytes_written += WriteRegister (reg_ctx, "xmm15", NULL,
353  // sizeof(XMMReg), data);
354  //
355  // // Fill rest with zeros
356  // for (size_t i=0, n = fpu_byte_size - bytes_written; i<n; ++
357  // i)
358  // data.PutChar(0);
359 
360  // Write out the EXC registers
361  data.PutHex32(EXCRegSet);
362  data.PutHex32(EXCWordCount);
363  PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data);
364  PrintRegisterValue(reg_ctx, "err", nullptr, 4, data);
365  PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 8, data);
366  return true;
367  }
368  return false;
369  }
370 
371 protected:
372  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; }
373 
374  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; }
375 
376  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; }
377 
378  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
379  return 0;
380  }
381 
382  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
383  return 0;
384  }
385 
386  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
387  return 0;
388  }
389 };
390 
392 public:
394  const DataExtractor &data)
395  : RegisterContextDarwin_i386(thread, 0) {
396  SetRegisterDataFrom_LC_THREAD(data);
397  }
398 
399  void InvalidateAllRegisters() override {
400  // Do nothing... registers are always valid...
401  }
402 
404  lldb::offset_t offset = 0;
405  SetError(GPRRegSet, Read, -1);
406  SetError(FPURegSet, Read, -1);
407  SetError(EXCRegSet, Read, -1);
408  bool done = false;
409 
410  while (!done) {
411  int flavor = data.GetU32(&offset);
412  if (flavor == 0)
413  done = true;
414  else {
415  uint32_t i;
416  uint32_t count = data.GetU32(&offset);
417  switch (flavor) {
418  case GPRRegSet:
419  for (i = 0; i < count; ++i)
420  (&gpr.eax)[i] = data.GetU32(&offset);
421  SetError(GPRRegSet, Read, 0);
422  done = true;
423 
424  break;
425  case FPURegSet:
426  // TODO: fill in FPU regs....
427  // SetError (FPURegSet, Read, -1);
428  done = true;
429 
430  break;
431  case EXCRegSet:
432  exc.trapno = data.GetU32(&offset);
433  exc.err = data.GetU32(&offset);
434  exc.faultvaddr = data.GetU32(&offset);
435  SetError(EXCRegSet, Read, 0);
436  done = true;
437  break;
438  case 7:
439  case 8:
440  case 9:
441  // fancy flavors that encapsulate of the above flavors...
442  break;
443 
444  default:
445  done = true;
446  break;
447  }
448  }
449  }
450  }
451 
452  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
453  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
454  if (reg_ctx_sp) {
455  RegisterContext *reg_ctx = reg_ctx_sp.get();
456 
457  data.PutHex32(GPRRegSet); // Flavor
458  data.PutHex32(GPRWordCount);
459  PrintRegisterValue(reg_ctx, "eax", nullptr, 4, data);
460  PrintRegisterValue(reg_ctx, "ebx", nullptr, 4, data);
461  PrintRegisterValue(reg_ctx, "ecx", nullptr, 4, data);
462  PrintRegisterValue(reg_ctx, "edx", nullptr, 4, data);
463  PrintRegisterValue(reg_ctx, "edi", nullptr, 4, data);
464  PrintRegisterValue(reg_ctx, "esi", nullptr, 4, data);
465  PrintRegisterValue(reg_ctx, "ebp", nullptr, 4, data);
466  PrintRegisterValue(reg_ctx, "esp", nullptr, 4, data);
467  PrintRegisterValue(reg_ctx, "ss", nullptr, 4, data);
468  PrintRegisterValue(reg_ctx, "eflags", nullptr, 4, data);
469  PrintRegisterValue(reg_ctx, "eip", nullptr, 4, data);
470  PrintRegisterValue(reg_ctx, "cs", nullptr, 4, data);
471  PrintRegisterValue(reg_ctx, "ds", nullptr, 4, data);
472  PrintRegisterValue(reg_ctx, "es", nullptr, 4, data);
473  PrintRegisterValue(reg_ctx, "fs", nullptr, 4, data);
474  PrintRegisterValue(reg_ctx, "gs", nullptr, 4, data);
475 
476  // Write out the EXC registers
477  data.PutHex32(EXCRegSet);
478  data.PutHex32(EXCWordCount);
479  PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data);
480  PrintRegisterValue(reg_ctx, "err", nullptr, 4, data);
481  PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 4, data);
482  return true;
483  }
484  return false;
485  }
486 
487 protected:
488  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; }
489 
490  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; }
491 
492  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; }
493 
494  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
495  return 0;
496  }
497 
498  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
499  return 0;
500  }
501 
502  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
503  return 0;
504  }
505 };
506 
508 public:
510  const DataExtractor &data)
511  : RegisterContextDarwin_arm(thread, 0) {
512  SetRegisterDataFrom_LC_THREAD(data);
513  }
514 
515  void InvalidateAllRegisters() override {
516  // Do nothing... registers are always valid...
517  }
518 
520  lldb::offset_t offset = 0;
521  SetError(GPRRegSet, Read, -1);
522  SetError(FPURegSet, Read, -1);
523  SetError(EXCRegSet, Read, -1);
524  bool done = false;
525 
526  while (!done) {
527  int flavor = data.GetU32(&offset);
528  uint32_t count = data.GetU32(&offset);
529  lldb::offset_t next_thread_state = offset + (count * 4);
530  switch (flavor) {
531  case GPRAltRegSet:
532  case GPRRegSet:
533  // On ARM, the CPSR register is also included in the count but it is
534  // not included in gpr.r so loop until (count-1).
535  for (uint32_t i = 0; i < (count - 1); ++i) {
536  gpr.r[i] = data.GetU32(&offset);
537  }
538  // Save cpsr explicitly.
539  gpr.cpsr = data.GetU32(&offset);
540 
541  SetError(GPRRegSet, Read, 0);
542  offset = next_thread_state;
543  break;
544 
545  case FPURegSet: {
546  uint8_t *fpu_reg_buf = (uint8_t *)&fpu.floats.s[0];
547  const int fpu_reg_buf_size = sizeof(fpu.floats);
548  if (data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle,
549  fpu_reg_buf) == fpu_reg_buf_size) {
550  offset += fpu_reg_buf_size;
551  fpu.fpscr = data.GetU32(&offset);
552  SetError(FPURegSet, Read, 0);
553  } else {
554  done = true;
555  }
556  }
557  offset = next_thread_state;
558  break;
559 
560  case EXCRegSet:
561  if (count == 3) {
562  exc.exception = data.GetU32(&offset);
563  exc.fsr = data.GetU32(&offset);
564  exc.far = data.GetU32(&offset);
565  SetError(EXCRegSet, Read, 0);
566  }
567  done = true;
568  offset = next_thread_state;
569  break;
570 
571  // Unknown register set flavor, stop trying to parse.
572  default:
573  done = true;
574  }
575  }
576  }
577 
578  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
579  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
580  if (reg_ctx_sp) {
581  RegisterContext *reg_ctx = reg_ctx_sp.get();
582 
583  data.PutHex32(GPRRegSet); // Flavor
584  data.PutHex32(GPRWordCount);
585  PrintRegisterValue(reg_ctx, "r0", nullptr, 4, data);
586  PrintRegisterValue(reg_ctx, "r1", nullptr, 4, data);
587  PrintRegisterValue(reg_ctx, "r2", nullptr, 4, data);
588  PrintRegisterValue(reg_ctx, "r3", nullptr, 4, data);
589  PrintRegisterValue(reg_ctx, "r4", nullptr, 4, data);
590  PrintRegisterValue(reg_ctx, "r5", nullptr, 4, data);
591  PrintRegisterValue(reg_ctx, "r6", nullptr, 4, data);
592  PrintRegisterValue(reg_ctx, "r7", nullptr, 4, data);
593  PrintRegisterValue(reg_ctx, "r8", nullptr, 4, data);
594  PrintRegisterValue(reg_ctx, "r9", nullptr, 4, data);
595  PrintRegisterValue(reg_ctx, "r10", nullptr, 4, data);
596  PrintRegisterValue(reg_ctx, "r11", nullptr, 4, data);
597  PrintRegisterValue(reg_ctx, "r12", nullptr, 4, data);
598  PrintRegisterValue(reg_ctx, "sp", nullptr, 4, data);
599  PrintRegisterValue(reg_ctx, "lr", nullptr, 4, data);
600  PrintRegisterValue(reg_ctx, "pc", nullptr, 4, data);
601  PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data);
602 
603  // Write out the EXC registers
604  // data.PutHex32 (EXCRegSet);
605  // data.PutHex32 (EXCWordCount);
606  // WriteRegister (reg_ctx, "exception", NULL, 4, data);
607  // WriteRegister (reg_ctx, "fsr", NULL, 4, data);
608  // WriteRegister (reg_ctx, "far", NULL, 4, data);
609  return true;
610  }
611  return false;
612  }
613 
614 protected:
615  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
616 
617  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
618 
619  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
620 
621  int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
622 
623  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
624  return 0;
625  }
626 
627  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
628  return 0;
629  }
630 
631  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
632  return 0;
633  }
634 
635  int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
636  return -1;
637  }
638 };
639 
641 public:
643  const DataExtractor &data)
644  : RegisterContextDarwin_arm64(thread, 0) {
645  SetRegisterDataFrom_LC_THREAD(data);
646  }
647 
648  void InvalidateAllRegisters() override {
649  // Do nothing... registers are always valid...
650  }
651 
653  lldb::offset_t offset = 0;
654  SetError(GPRRegSet, Read, -1);
655  SetError(FPURegSet, Read, -1);
656  SetError(EXCRegSet, Read, -1);
657  bool done = false;
658  while (!done) {
659  int flavor = data.GetU32(&offset);
660  uint32_t count = data.GetU32(&offset);
661  lldb::offset_t next_thread_state = offset + (count * 4);
662  switch (flavor) {
663  case GPRRegSet:
664  // x0-x29 + fp + lr + sp + pc (== 33 64-bit registers) plus cpsr (1
665  // 32-bit register)
666  if (count >= (33 * 2) + 1) {
667  for (uint32_t i = 0; i < 29; ++i)
668  gpr.x[i] = data.GetU64(&offset);
669  gpr.fp = data.GetU64(&offset);
670  gpr.lr = data.GetU64(&offset);
671  gpr.sp = data.GetU64(&offset);
672  gpr.pc = data.GetU64(&offset);
673  gpr.cpsr = data.GetU32(&offset);
674  SetError(GPRRegSet, Read, 0);
675  }
676  offset = next_thread_state;
677  break;
678  case FPURegSet: {
679  uint8_t *fpu_reg_buf = (uint8_t *)&fpu.v[0];
680  const int fpu_reg_buf_size = sizeof(fpu);
681  if (fpu_reg_buf_size == count * sizeof(uint32_t) &&
682  data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle,
683  fpu_reg_buf) == fpu_reg_buf_size) {
684  SetError(FPURegSet, Read, 0);
685  } else {
686  done = true;
687  }
688  }
689  offset = next_thread_state;
690  break;
691  case EXCRegSet:
692  if (count == 4) {
693  exc.far = data.GetU64(&offset);
694  exc.esr = data.GetU32(&offset);
695  exc.exception = data.GetU32(&offset);
696  SetError(EXCRegSet, Read, 0);
697  }
698  offset = next_thread_state;
699  break;
700  default:
701  done = true;
702  break;
703  }
704  }
705  }
706 
707  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
708  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
709  if (reg_ctx_sp) {
710  RegisterContext *reg_ctx = reg_ctx_sp.get();
711 
712  data.PutHex32(GPRRegSet); // Flavor
713  data.PutHex32(GPRWordCount);
714  PrintRegisterValue(reg_ctx, "x0", nullptr, 8, data);
715  PrintRegisterValue(reg_ctx, "x1", nullptr, 8, data);
716  PrintRegisterValue(reg_ctx, "x2", nullptr, 8, data);
717  PrintRegisterValue(reg_ctx, "x3", nullptr, 8, data);
718  PrintRegisterValue(reg_ctx, "x4", nullptr, 8, data);
719  PrintRegisterValue(reg_ctx, "x5", nullptr, 8, data);
720  PrintRegisterValue(reg_ctx, "x6", nullptr, 8, data);
721  PrintRegisterValue(reg_ctx, "x7", nullptr, 8, data);
722  PrintRegisterValue(reg_ctx, "x8", nullptr, 8, data);
723  PrintRegisterValue(reg_ctx, "x9", nullptr, 8, data);
724  PrintRegisterValue(reg_ctx, "x10", nullptr, 8, data);
725  PrintRegisterValue(reg_ctx, "x11", nullptr, 8, data);
726  PrintRegisterValue(reg_ctx, "x12", nullptr, 8, data);
727  PrintRegisterValue(reg_ctx, "x13", nullptr, 8, data);
728  PrintRegisterValue(reg_ctx, "x14", nullptr, 8, data);
729  PrintRegisterValue(reg_ctx, "x15", nullptr, 8, data);
730  PrintRegisterValue(reg_ctx, "x16", nullptr, 8, data);
731  PrintRegisterValue(reg_ctx, "x17", nullptr, 8, data);
732  PrintRegisterValue(reg_ctx, "x18", nullptr, 8, data);
733  PrintRegisterValue(reg_ctx, "x19", nullptr, 8, data);
734  PrintRegisterValue(reg_ctx, "x20", nullptr, 8, data);
735  PrintRegisterValue(reg_ctx, "x21", nullptr, 8, data);
736  PrintRegisterValue(reg_ctx, "x22", nullptr, 8, data);
737  PrintRegisterValue(reg_ctx, "x23", nullptr, 8, data);
738  PrintRegisterValue(reg_ctx, "x24", nullptr, 8, data);
739  PrintRegisterValue(reg_ctx, "x25", nullptr, 8, data);
740  PrintRegisterValue(reg_ctx, "x26", nullptr, 8, data);
741  PrintRegisterValue(reg_ctx, "x27", nullptr, 8, data);
742  PrintRegisterValue(reg_ctx, "x28", nullptr, 8, data);
743  PrintRegisterValue(reg_ctx, "fp", nullptr, 8, data);
744  PrintRegisterValue(reg_ctx, "lr", nullptr, 8, data);
745  PrintRegisterValue(reg_ctx, "sp", nullptr, 8, data);
746  PrintRegisterValue(reg_ctx, "pc", nullptr, 8, data);
747  PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data);
748  data.PutHex32(0); // uint32_t pad at the end
749 
750  // Write out the EXC registers
751  data.PutHex32(EXCRegSet);
752  data.PutHex32(EXCWordCount);
753  PrintRegisterValue(reg_ctx, "far", NULL, 8, data);
754  PrintRegisterValue(reg_ctx, "esr", NULL, 4, data);
755  PrintRegisterValue(reg_ctx, "exception", NULL, 4, data);
756  return true;
757  }
758  return false;
759  }
760 
761 protected:
762  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
763 
764  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
765 
766  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
767 
768  int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
769 
770  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
771  return 0;
772  }
773 
774  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
775  return 0;
776  }
777 
778  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
779  return 0;
780  }
781 
782  int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
783  return -1;
784  }
785 };
786 
788  switch (magic) {
789  case MH_MAGIC:
790  case MH_CIGAM:
791  return sizeof(struct llvm::MachO::mach_header);
792 
793  case MH_MAGIC_64:
794  case MH_CIGAM_64:
795  return sizeof(struct llvm::MachO::mach_header_64);
796  break;
797 
798  default:
799  break;
800  }
801  return 0;
802 }
803 
804 #define MACHO_NLIST_ARM_SYMBOL_IS_THUMB 0x0008
805 
807 
809  PluginManager::RegisterPlugin(
810  GetPluginNameStatic(), GetPluginDescriptionStatic(), CreateInstance,
811  CreateMemoryInstance, GetModuleSpecifications, SaveCore);
812 }
813 
815  PluginManager::UnregisterPlugin(CreateInstance);
816 }
817 
819  static ConstString g_name("mach-o");
820  return g_name;
821 }
822 
824  return "Mach-o object file reader (32 and 64 bit)";
825 }
826 
827 ObjectFile *ObjectFileMachO::CreateInstance(const lldb::ModuleSP &module_sp,
828  DataBufferSP &data_sp,
829  lldb::offset_t data_offset,
830  const FileSpec *file,
831  lldb::offset_t file_offset,
832  lldb::offset_t length) {
833  if (!data_sp) {
834  data_sp = MapFileData(*file, length, file_offset);
835  if (!data_sp)
836  return nullptr;
837  data_offset = 0;
838  }
839 
840  if (!ObjectFileMachO::MagicBytesMatch(data_sp, data_offset, length))
841  return nullptr;
842 
843  // Update the data to contain the entire file if it doesn't already
844  if (data_sp->GetByteSize() < length) {
845  data_sp = MapFileData(*file, length, file_offset);
846  if (!data_sp)
847  return nullptr;
848  data_offset = 0;
849  }
850  auto objfile_up = std::make_unique<ObjectFileMachO>(
851  module_sp, data_sp, data_offset, file, file_offset, length);
852  if (!objfile_up || !objfile_up->ParseHeader())
853  return nullptr;
854 
855  return objfile_up.release();
856 }
857 
859  const lldb::ModuleSP &module_sp, DataBufferSP &data_sp,
860  const ProcessSP &process_sp, lldb::addr_t header_addr) {
861  if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) {
862  std::unique_ptr<ObjectFile> objfile_up(
863  new ObjectFileMachO(module_sp, data_sp, process_sp, header_addr));
864  if (objfile_up.get() && objfile_up->ParseHeader())
865  return objfile_up.release();
866  }
867  return nullptr;
868 }
869 
871  const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp,
872  lldb::offset_t data_offset, lldb::offset_t file_offset,
874  const size_t initial_count = specs.GetSize();
875 
876  if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) {
877  DataExtractor data;
878  data.SetData(data_sp);
879  llvm::MachO::mach_header header;
880  if (ParseHeader(data, &data_offset, header)) {
881  size_t header_and_load_cmds =
882  header.sizeofcmds + MachHeaderSizeFromMagic(header.magic);
883  if (header_and_load_cmds >= data_sp->GetByteSize()) {
884  data_sp = MapFileData(file, header_and_load_cmds, file_offset);
885  data.SetData(data_sp);
886  data_offset = MachHeaderSizeFromMagic(header.magic);
887  }
888  if (data_sp) {
889  ModuleSpec base_spec;
890  base_spec.GetFileSpec() = file;
891  base_spec.SetObjectOffset(file_offset);
892  base_spec.SetObjectSize(length);
893  GetAllArchSpecs(header, data, data_offset, base_spec, specs);
894  }
895  }
896  }
897  return specs.GetSize() - initial_count;
898 }
899 
901  static ConstString g_segment_name_TEXT("__TEXT");
902  return g_segment_name_TEXT;
903 }
904 
906  static ConstString g_segment_name_DATA("__DATA");
907  return g_segment_name_DATA;
908 }
909 
911  static ConstString g_segment_name("__DATA_DIRTY");
912  return g_segment_name;
913 }
914 
916  static ConstString g_segment_name("__DATA_CONST");
917  return g_segment_name;
918 }
919 
921  static ConstString g_segment_name_OBJC("__OBJC");
922  return g_segment_name_OBJC;
923 }
924 
926  static ConstString g_section_name_LINKEDIT("__LINKEDIT");
927  return g_section_name_LINKEDIT;
928 }
929 
931  static ConstString g_section_name("__DWARF");
932  return g_section_name;
933 }
934 
936  static ConstString g_section_name_eh_frame("__eh_frame");
937  return g_section_name_eh_frame;
938 }
939 
940 bool ObjectFileMachO::MagicBytesMatch(DataBufferSP &data_sp,
941  lldb::addr_t data_offset,
942  lldb::addr_t data_length) {
943  DataExtractor data;
944  data.SetData(data_sp, data_offset, data_length);
945  lldb::offset_t offset = 0;
946  uint32_t magic = data.GetU32(&offset);
947  return MachHeaderSizeFromMagic(magic) != 0;
948 }
949 
950 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp,
951  DataBufferSP &data_sp,
952  lldb::offset_t data_offset,
953  const FileSpec *file,
954  lldb::offset_t file_offset,
955  lldb::offset_t length)
956  : ObjectFile(module_sp, file, file_offset, length, data_sp, data_offset),
957  m_mach_segments(), m_mach_sections(), m_entry_point_address(),
958  m_thread_context_offsets(), m_thread_context_offsets_valid(false),
959  m_reexported_dylibs(), m_allow_assembly_emulation_unwind_plans(true) {
960  ::memset(&m_header, 0, sizeof(m_header));
961  ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
962 }
963 
964 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp,
965  lldb::DataBufferSP &header_data_sp,
966  const lldb::ProcessSP &process_sp,
967  lldb::addr_t header_addr)
968  : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
969  m_mach_segments(), m_mach_sections(), m_entry_point_address(),
970  m_thread_context_offsets(), m_thread_context_offsets_valid(false),
971  m_reexported_dylibs(), m_allow_assembly_emulation_unwind_plans(true) {
972  ::memset(&m_header, 0, sizeof(m_header));
973  ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
974 }
975 
977  lldb::offset_t *data_offset_ptr,
978  llvm::MachO::mach_header &header) {
980  // Leave magic in the original byte order
981  header.magic = data.GetU32(data_offset_ptr);
982  bool can_parse = false;
983  bool is_64_bit = false;
984  switch (header.magic) {
985  case MH_MAGIC:
987  data.SetAddressByteSize(4);
988  can_parse = true;
989  break;
990 
991  case MH_MAGIC_64:
993  data.SetAddressByteSize(8);
994  can_parse = true;
995  is_64_bit = true;
996  break;
997 
998  case MH_CIGAM:
1001  : eByteOrderBig);
1002  data.SetAddressByteSize(4);
1003  can_parse = true;
1004  break;
1005 
1006  case MH_CIGAM_64:
1009  : eByteOrderBig);
1010  data.SetAddressByteSize(8);
1011  is_64_bit = true;
1012  can_parse = true;
1013  break;
1014 
1015  default:
1016  break;
1017  }
1018 
1019  if (can_parse) {
1020  data.GetU32(data_offset_ptr, &header.cputype, 6);
1021  if (is_64_bit)
1022  *data_offset_ptr += 4;
1023  return true;
1024  } else {
1025  memset(&header, 0, sizeof(header));
1026  }
1027  return false;
1028 }
1029 
1031  ModuleSP module_sp(GetModule());
1032  if (!module_sp)
1033  return false;
1034 
1035  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
1036  bool can_parse = false;
1037  lldb::offset_t offset = 0;
1039  // Leave magic in the original byte order
1040  m_header.magic = m_data.GetU32(&offset);
1041  switch (m_header.magic) {
1042  case MH_MAGIC:
1045  can_parse = true;
1046  break;
1047 
1048  case MH_MAGIC_64:
1051  can_parse = true;
1052  break;
1053 
1054  case MH_CIGAM:
1057  : eByteOrderBig);
1059  can_parse = true;
1060  break;
1061 
1062  case MH_CIGAM_64:
1065  : eByteOrderBig);
1067  can_parse = true;
1068  break;
1069 
1070  default:
1071  break;
1072  }
1073 
1074  if (can_parse) {
1075  m_data.GetU32(&offset, &m_header.cputype, 6);
1076 
1077  ModuleSpecList all_specs;
1078  ModuleSpec base_spec;
1080  base_spec, all_specs);
1081 
1082  for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
1083  ArchSpec mach_arch =
1085 
1086  // Check if the module has a required architecture
1087  const ArchSpec &module_arch = module_sp->GetArchitecture();
1088  if (module_arch.IsValid() && !module_arch.IsCompatibleMatch(mach_arch))
1089  continue;
1090 
1091  if (SetModulesArchitecture(mach_arch)) {
1092  const size_t header_and_lc_size =
1093  m_header.sizeofcmds + MachHeaderSizeFromMagic(m_header.magic);
1094  if (m_data.GetByteSize() < header_and_lc_size) {
1095  DataBufferSP data_sp;
1096  ProcessSP process_sp(m_process_wp.lock());
1097  if (process_sp) {
1098  data_sp = ReadMemory(process_sp, m_memory_addr, header_and_lc_size);
1099  } else {
1100  // Read in all only the load command data from the file on disk
1101  data_sp = MapFileData(m_file, header_and_lc_size, m_file_offset);
1102  if (data_sp->GetByteSize() != header_and_lc_size)
1103  continue;
1104  }
1105  if (data_sp)
1106  m_data.SetData(data_sp);
1107  }
1108  }
1109  return true;
1110  }
1111  // None found.
1112  return false;
1113  } else {
1114  memset(&m_header, 0, sizeof(struct llvm::MachO::mach_header));
1115  }
1116  return false;
1117 }
1118 
1120  return m_data.GetByteOrder();
1121 }
1122 
1124  return m_header.filetype == MH_EXECUTE;
1125 }
1126 
1128  return m_header.filetype == MH_DYLINKER;
1129 }
1130 
1132  return m_header.flags & MH_DYLIB_IN_CACHE;
1133 }
1134 
1136  return m_data.GetAddressByteSize();
1137 }
1138 
1140  Symtab *symtab = GetSymtab();
1141  if (!symtab)
1142  return AddressClass::eUnknown;
1143 
1144  Symbol *symbol = symtab->FindSymbolContainingFileAddress(file_addr);
1145  if (symbol) {
1146  if (symbol->ValueIsAddress()) {
1147  SectionSP section_sp(symbol->GetAddressRef().GetSection());
1148  if (section_sp) {
1149  const lldb::SectionType section_type = section_sp->GetType();
1150  switch (section_type) {
1151  case eSectionTypeInvalid:
1152  return AddressClass::eUnknown;
1153 
1154  case eSectionTypeCode:
1155  if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1156  // For ARM we have a bit in the n_desc field of the symbol that
1157  // tells us ARM/Thumb which is bit 0x0008.
1158  if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB)
1159  return AddressClass::eCodeAlternateISA;
1160  }
1161  return AddressClass::eCode;
1162 
1163  case eSectionTypeContainer:
1164  return AddressClass::eUnknown;
1165 
1166  case eSectionTypeData:
1170  case eSectionTypeData4:
1171  case eSectionTypeData8:
1172  case eSectionTypeData16:
1174  case eSectionTypeZeroFill:
1177  case eSectionTypeGoSymtab:
1178  return AddressClass::eData;
1179 
1180  case eSectionTypeDebug:
1215  return AddressClass::eDebug;
1216 
1217  case eSectionTypeEHFrame:
1218  case eSectionTypeARMexidx:
1219  case eSectionTypeARMextab:
1221  return AddressClass::eRuntime;
1222 
1228  case eSectionTypeOther:
1229  return AddressClass::eUnknown;
1230  }
1231  }
1232  }
1233 
1234  const SymbolType symbol_type = symbol->GetType();
1235  switch (symbol_type) {
1236  case eSymbolTypeAny:
1237  return AddressClass::eUnknown;
1238  case eSymbolTypeAbsolute:
1239  return AddressClass::eUnknown;
1240 
1241  case eSymbolTypeCode:
1242  case eSymbolTypeTrampoline:
1243  case eSymbolTypeResolver:
1244  if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1245  // For ARM we have a bit in the n_desc field of the symbol that tells
1246  // us ARM/Thumb which is bit 0x0008.
1247  if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB)
1248  return AddressClass::eCodeAlternateISA;
1249  }
1250  return AddressClass::eCode;
1251 
1252  case eSymbolTypeData:
1253  return AddressClass::eData;
1254  case eSymbolTypeRuntime:
1255  return AddressClass::eRuntime;
1256  case eSymbolTypeException:
1257  return AddressClass::eRuntime;
1258  case eSymbolTypeSourceFile:
1259  return AddressClass::eDebug;
1260  case eSymbolTypeHeaderFile:
1261  return AddressClass::eDebug;
1262  case eSymbolTypeObjectFile:
1263  return AddressClass::eDebug;
1265  return AddressClass::eDebug;
1266  case eSymbolTypeBlock:
1267  return AddressClass::eDebug;
1268  case eSymbolTypeLocal:
1269  return AddressClass::eData;
1270  case eSymbolTypeParam:
1271  return AddressClass::eData;
1272  case eSymbolTypeVariable:
1273  return AddressClass::eData;
1275  return AddressClass::eDebug;
1276  case eSymbolTypeLineEntry:
1277  return AddressClass::eDebug;
1278  case eSymbolTypeLineHeader:
1279  return AddressClass::eDebug;
1280  case eSymbolTypeScopeBegin:
1281  return AddressClass::eDebug;
1282  case eSymbolTypeScopeEnd:
1283  return AddressClass::eDebug;
1284  case eSymbolTypeAdditional:
1285  return AddressClass::eUnknown;
1286  case eSymbolTypeCompiler:
1287  return AddressClass::eDebug;
1289  return AddressClass::eDebug;
1290  case eSymbolTypeUndefined:
1291  return AddressClass::eUnknown;
1292  case eSymbolTypeObjCClass:
1293  return AddressClass::eRuntime;
1295  return AddressClass::eRuntime;
1296  case eSymbolTypeObjCIVar:
1297  return AddressClass::eRuntime;
1298  case eSymbolTypeReExported:
1299  return AddressClass::eRuntime;
1300  }
1301  }
1302  return AddressClass::eUnknown;
1303 }
1304 
1306  ModuleSP module_sp(GetModule());
1307  if (module_sp) {
1308  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
1309  if (m_symtab_up == nullptr) {
1310  m_symtab_up = std::make_unique<Symtab>(this);
1311  std::lock_guard<std::recursive_mutex> symtab_guard(
1312  m_symtab_up->GetMutex());
1313  ParseSymtab();
1314  m_symtab_up->Finalize();
1315  }
1316  }
1317  return m_symtab_up.get();
1318 }
1319 
1321  if (m_dysymtab.cmd == 0) {
1322  ModuleSP module_sp(GetModule());
1323  if (module_sp) {
1325  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1326  const lldb::offset_t load_cmd_offset = offset;
1327 
1328  llvm::MachO::load_command lc;
1329  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
1330  break;
1331  if (lc.cmd == LC_DYSYMTAB) {
1332  m_dysymtab.cmd = lc.cmd;
1333  m_dysymtab.cmdsize = lc.cmdsize;
1334  if (m_data.GetU32(&offset, &m_dysymtab.ilocalsym,
1335  (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2) ==
1336  nullptr) {
1337  // Clear m_dysymtab if we were unable to read all items from the
1338  // load command
1339  ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
1340  }
1341  }
1342  offset = load_cmd_offset + lc.cmdsize;
1343  }
1344  }
1345  }
1346  if (m_dysymtab.cmd)
1347  return m_dysymtab.nlocalsym <= 1;
1348  return false;
1349 }
1350 
1352  EncryptedFileRanges result;
1354 
1355  llvm::MachO::encryption_info_command encryption_cmd;
1356  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1357  const lldb::offset_t load_cmd_offset = offset;
1358  if (m_data.GetU32(&offset, &encryption_cmd, 2) == nullptr)
1359  break;
1360 
1361  // LC_ENCRYPTION_INFO and LC_ENCRYPTION_INFO_64 have the same sizes for the
1362  // 3 fields we care about, so treat them the same.
1363  if (encryption_cmd.cmd == LC_ENCRYPTION_INFO ||
1364  encryption_cmd.cmd == LC_ENCRYPTION_INFO_64) {
1365  if (m_data.GetU32(&offset, &encryption_cmd.cryptoff, 3)) {
1366  if (encryption_cmd.cryptid != 0) {
1368  entry.SetRangeBase(encryption_cmd.cryptoff);
1369  entry.SetByteSize(encryption_cmd.cryptsize);
1370  result.Append(entry);
1371  }
1372  }
1373  }
1374  offset = load_cmd_offset + encryption_cmd.cmdsize;
1375  }
1376 
1377  return result;
1378 }
1379 
1381  llvm::MachO::segment_command_64 &seg_cmd, uint32_t cmd_idx) {
1382  if (m_length == 0 || seg_cmd.filesize == 0)
1383  return;
1384 
1385  if (IsSharedCacheBinary() && !IsInMemory()) {
1386  // In shared cache images, the load commands are relative to the
1387  // shared cache file, and not the specific image we are
1388  // examining. Let's fix this up so that it looks like a normal
1389  // image.
1390  if (strncmp(seg_cmd.segname, "__TEXT", sizeof(seg_cmd.segname)) == 0)
1391  m_text_address = seg_cmd.vmaddr;
1392  if (strncmp(seg_cmd.segname, "__LINKEDIT", sizeof(seg_cmd.segname)) == 0)
1393  m_linkedit_original_offset = seg_cmd.fileoff;
1394 
1395  seg_cmd.fileoff = seg_cmd.vmaddr - m_text_address;
1396  }
1397 
1398  if (seg_cmd.fileoff > m_length) {
1399  // We have a load command that says it extends past the end of the file.
1400  // This is likely a corrupt file. We don't have any way to return an error
1401  // condition here (this method was likely invoked from something like
1402  // ObjectFile::GetSectionList()), so we just null out the section contents,
1403  // and dump a message to stdout. The most common case here is core file
1404  // debugging with a truncated file.
1405  const char *lc_segment_name =
1406  seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1407  GetModule()->ReportWarning(
1408  "load command %u %s has a fileoff (0x%" PRIx64
1409  ") that extends beyond the end of the file (0x%" PRIx64
1410  "), ignoring this section",
1411  cmd_idx, lc_segment_name, seg_cmd.fileoff, m_length);
1412 
1413  seg_cmd.fileoff = 0;
1414  seg_cmd.filesize = 0;
1415  }
1416 
1417  if (seg_cmd.fileoff + seg_cmd.filesize > m_length) {
1418  // We have a load command that says it extends past the end of the file.
1419  // This is likely a corrupt file. We don't have any way to return an error
1420  // condition here (this method was likely invoked from something like
1421  // ObjectFile::GetSectionList()), so we just null out the section contents,
1422  // and dump a message to stdout. The most common case here is core file
1423  // debugging with a truncated file.
1424  const char *lc_segment_name =
1425  seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1426  GetModule()->ReportWarning(
1427  "load command %u %s has a fileoff + filesize (0x%" PRIx64
1428  ") that extends beyond the end of the file (0x%" PRIx64
1429  "), the segment will be truncated to match",
1430  cmd_idx, lc_segment_name, seg_cmd.fileoff + seg_cmd.filesize, m_length);
1431 
1432  // Truncate the length
1433  seg_cmd.filesize = m_length - seg_cmd.fileoff;
1434  }
1435 }
1436 
1437 static uint32_t
1438 GetSegmentPermissions(const llvm::MachO::segment_command_64 &seg_cmd) {
1439  uint32_t result = 0;
1440  if (seg_cmd.initprot & VM_PROT_READ)
1441  result |= ePermissionsReadable;
1442  if (seg_cmd.initprot & VM_PROT_WRITE)
1443  result |= ePermissionsWritable;
1444  if (seg_cmd.initprot & VM_PROT_EXECUTE)
1445  result |= ePermissionsExecutable;
1446  return result;
1447 }
1448 
1450  ConstString section_name) {
1451 
1452  if (flags & (S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS))
1453  return eSectionTypeCode;
1454 
1455  uint32_t mach_sect_type = flags & SECTION_TYPE;
1456  static ConstString g_sect_name_objc_data("__objc_data");
1457  static ConstString g_sect_name_objc_msgrefs("__objc_msgrefs");
1458  static ConstString g_sect_name_objc_selrefs("__objc_selrefs");
1459  static ConstString g_sect_name_objc_classrefs("__objc_classrefs");
1460  static ConstString g_sect_name_objc_superrefs("__objc_superrefs");
1461  static ConstString g_sect_name_objc_const("__objc_const");
1462  static ConstString g_sect_name_objc_classlist("__objc_classlist");
1463  static ConstString g_sect_name_cfstring("__cfstring");
1464 
1465  static ConstString g_sect_name_dwarf_debug_abbrev("__debug_abbrev");
1466  static ConstString g_sect_name_dwarf_debug_aranges("__debug_aranges");
1467  static ConstString g_sect_name_dwarf_debug_frame("__debug_frame");
1468  static ConstString g_sect_name_dwarf_debug_info("__debug_info");
1469  static ConstString g_sect_name_dwarf_debug_line("__debug_line");
1470  static ConstString g_sect_name_dwarf_debug_loc("__debug_loc");
1471  static ConstString g_sect_name_dwarf_debug_loclists("__debug_loclists");
1472  static ConstString g_sect_name_dwarf_debug_macinfo("__debug_macinfo");
1473  static ConstString g_sect_name_dwarf_debug_names("__debug_names");
1474  static ConstString g_sect_name_dwarf_debug_pubnames("__debug_pubnames");
1475  static ConstString g_sect_name_dwarf_debug_pubtypes("__debug_pubtypes");
1476  static ConstString g_sect_name_dwarf_debug_ranges("__debug_ranges");
1477  static ConstString g_sect_name_dwarf_debug_str("__debug_str");
1478  static ConstString g_sect_name_dwarf_debug_types("__debug_types");
1479  static ConstString g_sect_name_dwarf_apple_names("__apple_names");
1480  static ConstString g_sect_name_dwarf_apple_types("__apple_types");
1481  static ConstString g_sect_name_dwarf_apple_namespaces("__apple_namespac");
1482  static ConstString g_sect_name_dwarf_apple_objc("__apple_objc");
1483  static ConstString g_sect_name_eh_frame("__eh_frame");
1484  static ConstString g_sect_name_compact_unwind("__unwind_info");
1485  static ConstString g_sect_name_text("__text");
1486  static ConstString g_sect_name_data("__data");
1487  static ConstString g_sect_name_go_symtab("__gosymtab");
1488 
1489  if (section_name == g_sect_name_dwarf_debug_abbrev)
1491  if (section_name == g_sect_name_dwarf_debug_aranges)
1493  if (section_name == g_sect_name_dwarf_debug_frame)
1495  if (section_name == g_sect_name_dwarf_debug_info)
1497  if (section_name == g_sect_name_dwarf_debug_line)
1499  if (section_name == g_sect_name_dwarf_debug_loc)
1501  if (section_name == g_sect_name_dwarf_debug_loclists)
1503  if (section_name == g_sect_name_dwarf_debug_macinfo)
1505  if (section_name == g_sect_name_dwarf_debug_names)
1507  if (section_name == g_sect_name_dwarf_debug_pubnames)
1509  if (section_name == g_sect_name_dwarf_debug_pubtypes)
1511  if (section_name == g_sect_name_dwarf_debug_ranges)
1513  if (section_name == g_sect_name_dwarf_debug_str)
1515  if (section_name == g_sect_name_dwarf_debug_types)
1517  if (section_name == g_sect_name_dwarf_apple_names)
1519  if (section_name == g_sect_name_dwarf_apple_types)
1521  if (section_name == g_sect_name_dwarf_apple_namespaces)
1523  if (section_name == g_sect_name_dwarf_apple_objc)
1525  if (section_name == g_sect_name_objc_selrefs)
1527  if (section_name == g_sect_name_objc_msgrefs)
1529  if (section_name == g_sect_name_eh_frame)
1530  return eSectionTypeEHFrame;
1531  if (section_name == g_sect_name_compact_unwind)
1533  if (section_name == g_sect_name_cfstring)
1535  if (section_name == g_sect_name_go_symtab)
1536  return eSectionTypeGoSymtab;
1537  if (section_name == g_sect_name_objc_data ||
1538  section_name == g_sect_name_objc_classrefs ||
1539  section_name == g_sect_name_objc_superrefs ||
1540  section_name == g_sect_name_objc_const ||
1541  section_name == g_sect_name_objc_classlist) {
1542  return eSectionTypeDataPointers;
1543  }
1544 
1545  switch (mach_sect_type) {
1546  // TODO: categorize sections by other flags for regular sections
1547  case S_REGULAR:
1548  if (section_name == g_sect_name_text)
1549  return eSectionTypeCode;
1550  if (section_name == g_sect_name_data)
1551  return eSectionTypeData;
1552  return eSectionTypeOther;
1553  case S_ZEROFILL:
1554  return eSectionTypeZeroFill;
1555  case S_CSTRING_LITERALS: // section with only literal C strings
1556  return eSectionTypeDataCString;
1557  case S_4BYTE_LITERALS: // section with only 4 byte literals
1558  return eSectionTypeData4;
1559  case S_8BYTE_LITERALS: // section with only 8 byte literals
1560  return eSectionTypeData8;
1561  case S_LITERAL_POINTERS: // section with only pointers to literals
1562  return eSectionTypeDataPointers;
1563  case S_NON_LAZY_SYMBOL_POINTERS: // section with only non-lazy symbol pointers
1564  return eSectionTypeDataPointers;
1565  case S_LAZY_SYMBOL_POINTERS: // section with only lazy symbol pointers
1566  return eSectionTypeDataPointers;
1567  case S_SYMBOL_STUBS: // section with only symbol stubs, byte size of stub in
1568  // the reserved2 field
1569  return eSectionTypeCode;
1570  case S_MOD_INIT_FUNC_POINTERS: // section with only function pointers for
1571  // initialization
1572  return eSectionTypeDataPointers;
1573  case S_MOD_TERM_FUNC_POINTERS: // section with only function pointers for
1574  // termination
1575  return eSectionTypeDataPointers;
1576  case S_COALESCED:
1577  return eSectionTypeOther;
1578  case S_GB_ZEROFILL:
1579  return eSectionTypeZeroFill;
1580  case S_INTERPOSING: // section with only pairs of function pointers for
1581  // interposing
1582  return eSectionTypeCode;
1583  case S_16BYTE_LITERALS: // section with only 16 byte literals
1584  return eSectionTypeData16;
1585  case S_DTRACE_DOF:
1586  return eSectionTypeDebug;
1587  case S_LAZY_DYLIB_SYMBOL_POINTERS:
1588  return eSectionTypeDataPointers;
1589  default:
1590  return eSectionTypeOther;
1591  }
1592 }
1593 
1599  bool FileAddressesChanged = false;
1600 
1604 };
1605 
1607  const llvm::MachO::load_command &load_cmd_, lldb::offset_t offset,
1608  uint32_t cmd_idx, SegmentParsingContext &context) {
1609  llvm::MachO::segment_command_64 load_cmd;
1610  memcpy(&load_cmd, &load_cmd_, sizeof(load_cmd_));
1611 
1612  if (!m_data.GetU8(&offset, (uint8_t *)load_cmd.segname, 16))
1613  return;
1614 
1615  ModuleSP module_sp = GetModule();
1616  const bool is_core = GetType() == eTypeCoreFile;
1617  const bool is_dsym = (m_header.filetype == MH_DSYM);
1618  bool add_section = true;
1619  bool add_to_unified = true;
1620  ConstString const_segname(
1621  load_cmd.segname, strnlen(load_cmd.segname, sizeof(load_cmd.segname)));
1622 
1623  SectionSP unified_section_sp(
1624  context.UnifiedList.FindSectionByName(const_segname));
1625  if (is_dsym && unified_section_sp) {
1626  if (const_segname == GetSegmentNameLINKEDIT()) {
1627  // We need to keep the __LINKEDIT segment private to this object file
1628  // only
1629  add_to_unified = false;
1630  } else {
1631  // This is the dSYM file and this section has already been created by the
1632  // object file, no need to create it.
1633  add_section = false;
1634  }
1635  }
1636  load_cmd.vmaddr = m_data.GetAddress(&offset);
1637  load_cmd.vmsize = m_data.GetAddress(&offset);
1638  load_cmd.fileoff = m_data.GetAddress(&offset);
1639  load_cmd.filesize = m_data.GetAddress(&offset);
1640  if (!m_data.GetU32(&offset, &load_cmd.maxprot, 4))
1641  return;
1642 
1643  SanitizeSegmentCommand(load_cmd, cmd_idx);
1644 
1645  const uint32_t segment_permissions = GetSegmentPermissions(load_cmd);
1646  const bool segment_is_encrypted =
1647  (load_cmd.flags & SG_PROTECTED_VERSION_1) != 0;
1648 
1649  // Keep a list of mach segments around in case we need to get at data that
1650  // isn't stored in the abstracted Sections.
1651  m_mach_segments.push_back(load_cmd);
1652 
1653  // Use a segment ID of the segment index shifted left by 8 so they never
1654  // conflict with any of the sections.
1655  SectionSP segment_sp;
1656  if (add_section && (const_segname || is_core)) {
1657  segment_sp = std::make_shared<Section>(
1658  module_sp, // Module to which this section belongs
1659  this, // Object file to which this sections belongs
1660  ++context.NextSegmentIdx
1661  << 8, // Section ID is the 1 based segment index
1662  // shifted right by 8 bits as not to collide with any of the 256
1663  // section IDs that are possible
1664  const_segname, // Name of this section
1665  eSectionTypeContainer, // This section is a container of other
1666  // sections.
1667  load_cmd.vmaddr, // File VM address == addresses as they are
1668  // found in the object file
1669  load_cmd.vmsize, // VM size in bytes of this section
1670  load_cmd.fileoff, // Offset to the data for this section in
1671  // the file
1672  load_cmd.filesize, // Size in bytes of this section as found
1673  // in the file
1674  0, // Segments have no alignment information
1675  load_cmd.flags); // Flags for this section
1676 
1677  segment_sp->SetIsEncrypted(segment_is_encrypted);
1678  m_sections_up->AddSection(segment_sp);
1679  segment_sp->SetPermissions(segment_permissions);
1680  if (add_to_unified)
1681  context.UnifiedList.AddSection(segment_sp);
1682  } else if (unified_section_sp) {
1683  // If this is a dSYM and the file addresses in the dSYM differ from the
1684  // file addresses in the ObjectFile, we must use the file base address for
1685  // the Section from the dSYM for the DWARF to resolve correctly.
1686  // This only happens with binaries in the shared cache in practice;
1687  // normally a mismatch like this would give a binary & dSYM that do not
1688  // match UUIDs. When a binary is included in the shared cache, its
1689  // segments are rearranged to optimize the shared cache, so its file
1690  // addresses will differ from what the ObjectFile had originally,
1691  // and what the dSYM has.
1692  if (is_dsym && unified_section_sp->GetFileAddress() != load_cmd.vmaddr) {
1694  if (log) {
1695  log->Printf(
1696  "Installing dSYM's %s segment file address over ObjectFile's "
1697  "so symbol table/debug info resolves correctly for %s",
1698  const_segname.AsCString(),
1699  module_sp->GetFileSpec().GetFilename().AsCString());
1700  }
1701 
1702  // Make sure we've parsed the symbol table from the ObjectFile before
1703  // we go around changing its Sections.
1704  module_sp->GetObjectFile()->GetSymtab();
1705  // eh_frame would present the same problems but we parse that on a per-
1706  // function basis as-needed so it's more difficult to remove its use of
1707  // the Sections. Realistically, the environments where this code path
1708  // will be taken will not have eh_frame sections.
1709 
1710  unified_section_sp->SetFileAddress(load_cmd.vmaddr);
1711 
1712  // Notify the module that the section addresses have been changed once
1713  // we're done so any file-address caches can be updated.
1714  context.FileAddressesChanged = true;
1715  }
1716  m_sections_up->AddSection(unified_section_sp);
1717  }
1718 
1719  llvm::MachO::section_64 sect64;
1720  ::memset(&sect64, 0, sizeof(sect64));
1721  // Push a section into our mach sections for the section at index zero
1722  // (NO_SECT) if we don't have any mach sections yet...
1723  if (m_mach_sections.empty())
1724  m_mach_sections.push_back(sect64);
1725  uint32_t segment_sect_idx;
1726  const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1;
1727 
1728  const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8;
1729  for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects;
1730  ++segment_sect_idx) {
1731  if (m_data.GetU8(&offset, (uint8_t *)sect64.sectname,
1732  sizeof(sect64.sectname)) == nullptr)
1733  break;
1734  if (m_data.GetU8(&offset, (uint8_t *)sect64.segname,
1735  sizeof(sect64.segname)) == nullptr)
1736  break;
1737  sect64.addr = m_data.GetAddress(&offset);
1738  sect64.size = m_data.GetAddress(&offset);
1739 
1740  if (m_data.GetU32(&offset, &sect64.offset, num_u32s) == nullptr)
1741  break;
1742 
1743  if (IsSharedCacheBinary() && !IsInMemory()) {
1744  sect64.offset = sect64.addr - m_text_address;
1745  }
1746 
1747  // Keep a list of mach sections around in case we need to get at data that
1748  // isn't stored in the abstracted Sections.
1749  m_mach_sections.push_back(sect64);
1750 
1751  if (add_section) {
1752  ConstString section_name(
1753  sect64.sectname, strnlen(sect64.sectname, sizeof(sect64.sectname)));
1754  if (!const_segname) {
1755  // We have a segment with no name so we need to conjure up segments
1756  // that correspond to the section's segname if there isn't already such
1757  // a section. If there is such a section, we resize the section so that
1758  // it spans all sections. We also mark these sections as fake so
1759  // address matches don't hit if they land in the gaps between the child
1760  // sections.
1761  const_segname.SetTrimmedCStringWithLength(sect64.segname,
1762  sizeof(sect64.segname));
1763  segment_sp = context.UnifiedList.FindSectionByName(const_segname);
1764  if (segment_sp.get()) {
1765  Section *segment = segment_sp.get();
1766  // Grow the section size as needed.
1767  const lldb::addr_t sect64_min_addr = sect64.addr;
1768  const lldb::addr_t sect64_max_addr = sect64_min_addr + sect64.size;
1769  const lldb::addr_t curr_seg_byte_size = segment->GetByteSize();
1770  const lldb::addr_t curr_seg_min_addr = segment->GetFileAddress();
1771  const lldb::addr_t curr_seg_max_addr =
1772  curr_seg_min_addr + curr_seg_byte_size;
1773  if (sect64_min_addr >= curr_seg_min_addr) {
1774  const lldb::addr_t new_seg_byte_size =
1775  sect64_max_addr - curr_seg_min_addr;
1776  // Only grow the section size if needed
1777  if (new_seg_byte_size > curr_seg_byte_size)
1778  segment->SetByteSize(new_seg_byte_size);
1779  } else {
1780  // We need to change the base address of the segment and adjust the
1781  // child section offsets for all existing children.
1782  const lldb::addr_t slide_amount =
1783  sect64_min_addr - curr_seg_min_addr;
1784  segment->Slide(slide_amount, false);
1785  segment->GetChildren().Slide(-slide_amount, false);
1786  segment->SetByteSize(curr_seg_max_addr - sect64_min_addr);
1787  }
1788 
1789  // Grow the section size as needed.
1790  if (sect64.offset) {
1791  const lldb::addr_t segment_min_file_offset =
1792  segment->GetFileOffset();
1793  const lldb::addr_t segment_max_file_offset =
1794  segment_min_file_offset + segment->GetFileSize();
1795 
1796  const lldb::addr_t section_min_file_offset = sect64.offset;
1797  const lldb::addr_t section_max_file_offset =
1798  section_min_file_offset + sect64.size;
1799  const lldb::addr_t new_file_offset =
1800  std::min(section_min_file_offset, segment_min_file_offset);
1801  const lldb::addr_t new_file_size =
1802  std::max(section_max_file_offset, segment_max_file_offset) -
1803  new_file_offset;
1804  segment->SetFileOffset(new_file_offset);
1805  segment->SetFileSize(new_file_size);
1806  }
1807  } else {
1808  // Create a fake section for the section's named segment
1809  segment_sp = std::make_shared<Section>(
1810  segment_sp, // Parent section
1811  module_sp, // Module to which this section belongs
1812  this, // Object file to which this section belongs
1813  ++context.NextSegmentIdx
1814  << 8, // Section ID is the 1 based segment index
1815  // shifted right by 8 bits as not to
1816  // collide with any of the 256 section IDs
1817  // that are possible
1818  const_segname, // Name of this section
1819  eSectionTypeContainer, // This section is a container of
1820  // other sections.
1821  sect64.addr, // File VM address == addresses as they are
1822  // found in the object file
1823  sect64.size, // VM size in bytes of this section
1824  sect64.offset, // Offset to the data for this section in
1825  // the file
1826  sect64.offset ? sect64.size : 0, // Size in bytes of
1827  // this section as
1828  // found in the file
1829  sect64.align,
1830  load_cmd.flags); // Flags for this section
1831  segment_sp->SetIsFake(true);
1832  segment_sp->SetPermissions(segment_permissions);
1833  m_sections_up->AddSection(segment_sp);
1834  if (add_to_unified)
1835  context.UnifiedList.AddSection(segment_sp);
1836  segment_sp->SetIsEncrypted(segment_is_encrypted);
1837  }
1838  }
1839  assert(segment_sp.get());
1840 
1841  lldb::SectionType sect_type = GetSectionType(sect64.flags, section_name);
1842 
1843  SectionSP section_sp(new Section(
1844  segment_sp, module_sp, this, ++context.NextSectionIdx, section_name,
1845  sect_type, sect64.addr - segment_sp->GetFileAddress(), sect64.size,
1846  sect64.offset, sect64.offset == 0 ? 0 : sect64.size, sect64.align,
1847  sect64.flags));
1848  // Set the section to be encrypted to match the segment
1849 
1850  bool section_is_encrypted = false;
1851  if (!segment_is_encrypted && load_cmd.filesize != 0)
1852  section_is_encrypted = context.EncryptedRanges.FindEntryThatContains(
1853  sect64.offset) != nullptr;
1854 
1855  section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted);
1856  section_sp->SetPermissions(segment_permissions);
1857  segment_sp->GetChildren().AddSection(section_sp);
1858 
1859  if (segment_sp->IsFake()) {
1860  segment_sp.reset();
1861  const_segname.Clear();
1862  }
1863  }
1864  }
1865  if (segment_sp && is_dsym) {
1866  if (first_segment_sectID <= context.NextSectionIdx) {
1867  lldb::user_id_t sect_uid;
1868  for (sect_uid = first_segment_sectID; sect_uid <= context.NextSectionIdx;
1869  ++sect_uid) {
1870  SectionSP curr_section_sp(
1871  segment_sp->GetChildren().FindSectionByID(sect_uid));
1872  SectionSP next_section_sp;
1873  if (sect_uid + 1 <= context.NextSectionIdx)
1874  next_section_sp =
1875  segment_sp->GetChildren().FindSectionByID(sect_uid + 1);
1876 
1877  if (curr_section_sp.get()) {
1878  if (curr_section_sp->GetByteSize() == 0) {
1879  if (next_section_sp.get() != nullptr)
1880  curr_section_sp->SetByteSize(next_section_sp->GetFileAddress() -
1881  curr_section_sp->GetFileAddress());
1882  else
1883  curr_section_sp->SetByteSize(load_cmd.vmsize);
1884  }
1885  }
1886  }
1887  }
1888  }
1889 }
1890 
1892  const llvm::MachO::load_command &load_cmd, lldb::offset_t offset) {
1893  m_dysymtab.cmd = load_cmd.cmd;
1894  m_dysymtab.cmdsize = load_cmd.cmdsize;
1895  m_data.GetU32(&offset, &m_dysymtab.ilocalsym,
1896  (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2);
1897 }
1898 
1899 void ObjectFileMachO::CreateSections(SectionList &unified_section_list) {
1900  if (m_sections_up)
1901  return;
1902 
1903  m_sections_up = std::make_unique<SectionList>();
1904 
1906  // bool dump_sections = false;
1907  ModuleSP module_sp(GetModule());
1908 
1909  offset = MachHeaderSizeFromMagic(m_header.magic);
1910 
1911  SegmentParsingContext context(GetEncryptedFileRanges(), unified_section_list);
1912  llvm::MachO::load_command load_cmd;
1913  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1914  const lldb::offset_t load_cmd_offset = offset;
1915  if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
1916  break;
1917 
1918  if (load_cmd.cmd == LC_SEGMENT || load_cmd.cmd == LC_SEGMENT_64)
1919  ProcessSegmentCommand(load_cmd, offset, i, context);
1920  else if (load_cmd.cmd == LC_DYSYMTAB)
1921  ProcessDysymtabCommand(load_cmd, offset);
1922 
1923  offset = load_cmd_offset + load_cmd.cmdsize;
1924  }
1925 
1926  if (context.FileAddressesChanged && module_sp)
1927  module_sp->SectionFileAddressesChanged();
1928 }
1929 
1931 public:
1933  : m_section_list(section_list), m_section_infos() {
1934  // Get the number of sections down to a depth of 1 to include all segments
1935  // and their sections, but no other sections that may be added for debug
1936  // map or
1937  m_section_infos.resize(section_list->GetNumSections(1));
1938  }
1939 
1940  SectionSP GetSection(uint8_t n_sect, addr_t file_addr) {
1941  if (n_sect == 0)
1942  return SectionSP();
1943  if (n_sect < m_section_infos.size()) {
1944  if (!m_section_infos[n_sect].section_sp) {
1945  SectionSP section_sp(m_section_list->FindSectionByID(n_sect));
1946  m_section_infos[n_sect].section_sp = section_sp;
1947  if (section_sp) {
1948  m_section_infos[n_sect].vm_range.SetBaseAddress(
1949  section_sp->GetFileAddress());
1950  m_section_infos[n_sect].vm_range.SetByteSize(
1951  section_sp->GetByteSize());
1952  } else {
1953  std::string filename = "<unknown>";
1954  SectionSP first_section_sp(m_section_list->GetSectionAtIndex(0));
1955  if (first_section_sp)
1956  filename = first_section_sp->GetObjectFile()->GetFileSpec().GetPath();
1957 
1958  Host::SystemLog(Host::eSystemLogError,
1959  "error: unable to find section %d for a symbol in "
1960  "%s, corrupt file?\n",
1961  n_sect, filename.c_str());
1962  }
1963  }
1964  if (m_section_infos[n_sect].vm_range.Contains(file_addr)) {
1965  // Symbol is in section.
1966  return m_section_infos[n_sect].section_sp;
1967  } else if (m_section_infos[n_sect].vm_range.GetByteSize() == 0 &&
1968  m_section_infos[n_sect].vm_range.GetBaseAddress() ==
1969  file_addr) {
1970  // Symbol is in section with zero size, but has the same start address
1971  // as the section. This can happen with linker symbols (symbols that
1972  // start with the letter 'l' or 'L'.
1973  return m_section_infos[n_sect].section_sp;
1974  }
1975  }
1977  }
1978 
1979 protected:
1980  struct SectionInfo {
1982 
1984  SectionSP section_sp;
1985  };
1987  std::vector<SectionInfo> m_section_infos;
1988 };
1989 
1990 #define TRIE_SYMBOL_IS_THUMB (1ULL << 63)
1991 struct TrieEntry {
1992  void Dump() const {
1993  printf("0x%16.16llx 0x%16.16llx 0x%16.16llx \"%s\"",
1994  static_cast<unsigned long long>(address),
1995  static_cast<unsigned long long>(flags),
1996  static_cast<unsigned long long>(other), name.GetCString());
1997  if (import_name)
1998  printf(" -> \"%s\"\n", import_name.GetCString());
1999  else
2000  printf("\n");
2001  }
2004  uint64_t flags =
2005  0; // EXPORT_SYMBOL_FLAGS_REEXPORT, EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER,
2006  // TRIE_SYMBOL_IS_THUMB
2007  uint64_t other = 0;
2009 };
2010 
2014 
2016 
2017  void Dump(uint32_t idx) const {
2018  printf("[%3u] 0x%16.16llx: ", idx,
2019  static_cast<unsigned long long>(nodeOffset));
2020  entry.Dump();
2021  }
2022 
2023  bool operator<(const TrieEntryWithOffset &other) const {
2024  return (nodeOffset < other.nodeOffset);
2025  }
2026 };
2027 
2029  const bool is_arm, addr_t text_seg_base_addr,
2030  std::vector<llvm::StringRef> &nameSlices,
2031  std::set<lldb::addr_t> &resolver_addresses,
2032  std::vector<TrieEntryWithOffset> &reexports,
2033  std::vector<TrieEntryWithOffset> &ext_symbols) {
2034  if (!data.ValidOffset(offset))
2035  return true;
2036 
2037  // Terminal node -- end of a branch, possibly add this to
2038  // the symbol table or resolver table.
2039  const uint64_t terminalSize = data.GetULEB128(&offset);
2040  lldb::offset_t children_offset = offset + terminalSize;
2041  if (terminalSize != 0) {
2042  TrieEntryWithOffset e(offset);
2043  e.entry.flags = data.GetULEB128(&offset);
2044  const char *import_name = nullptr;
2045  if (e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
2046  e.entry.address = 0;
2047  e.entry.other = data.GetULEB128(&offset); // dylib ordinal
2048  import_name = data.GetCStr(&offset);
2049  } else {
2050  e.entry.address = data.GetULEB128(&offset);
2051  if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
2052  e.entry.address += text_seg_base_addr;
2053  if (e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
2054  e.entry.other = data.GetULEB128(&offset);
2055  uint64_t resolver_addr = e.entry.other;
2056  if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
2057  resolver_addr += text_seg_base_addr;
2058  if (is_arm)
2059  resolver_addr &= THUMB_ADDRESS_BIT_MASK;
2060  resolver_addresses.insert(resolver_addr);
2061  } else
2062  e.entry.other = 0;
2063  }
2064  bool add_this_entry = false;
2065  if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT) &&
2066  import_name && import_name[0]) {
2067  // add symbols that are reexport symbols with a valid import name.
2068  add_this_entry = true;
2069  } else if (e.entry.flags == 0 &&
2070  (import_name == nullptr || import_name[0] == '\0')) {
2071  // add externally visible symbols, in case the nlist record has
2072  // been stripped/omitted.
2073  add_this_entry = true;
2074  }
2075  if (add_this_entry) {
2076  std::string name;
2077  if (!nameSlices.empty()) {
2078  for (auto name_slice : nameSlices)
2079  name.append(name_slice.data(), name_slice.size());
2080  }
2081  if (name.size() > 1) {
2082  // Skip the leading '_'
2083  e.entry.name.SetCStringWithLength(name.c_str() + 1, name.size() - 1);
2084  }
2085  if (import_name) {
2086  // Skip the leading '_'
2087  e.entry.import_name.SetCString(import_name + 1);
2088  }
2089  if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT)) {
2090  reexports.push_back(e);
2091  } else {
2092  if (is_arm && (e.entry.address & 1)) {
2095  }
2096  ext_symbols.push_back(e);
2097  }
2098  }
2099  }
2100 
2101  const uint8_t childrenCount = data.GetU8(&children_offset);
2102  for (uint8_t i = 0; i < childrenCount; ++i) {
2103  const char *cstr = data.GetCStr(&children_offset);
2104  if (cstr)
2105  nameSlices.push_back(llvm::StringRef(cstr));
2106  else
2107  return false; // Corrupt data
2108  lldb::offset_t childNodeOffset = data.GetULEB128(&children_offset);
2109  if (childNodeOffset) {
2110  if (!ParseTrieEntries(data, childNodeOffset, is_arm, text_seg_base_addr,
2111  nameSlices, resolver_addresses, reexports,
2112  ext_symbols)) {
2113  return false;
2114  }
2115  }
2116  nameSlices.pop_back();
2117  }
2118  return true;
2119 }
2120 
2121 static SymbolType GetSymbolType(const char *&symbol_name,
2122  bool &demangled_is_synthesized,
2123  const SectionSP &text_section_sp,
2124  const SectionSP &data_section_sp,
2125  const SectionSP &data_dirty_section_sp,
2126  const SectionSP &data_const_section_sp,
2127  const SectionSP &symbol_section) {
2129 
2130  const char *symbol_sect_name = symbol_section->GetName().AsCString();
2131  if (symbol_section->IsDescendant(text_section_sp.get())) {
2132  if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS |
2133  S_ATTR_SELF_MODIFYING_CODE |
2134  S_ATTR_SOME_INSTRUCTIONS))
2135  type = eSymbolTypeData;
2136  else
2137  type = eSymbolTypeCode;
2138  } else if (symbol_section->IsDescendant(data_section_sp.get()) ||
2139  symbol_section->IsDescendant(data_dirty_section_sp.get()) ||
2140  symbol_section->IsDescendant(data_const_section_sp.get())) {
2141  if (symbol_sect_name &&
2142  ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) {
2143  type = eSymbolTypeRuntime;
2144 
2145  if (symbol_name) {
2146  llvm::StringRef symbol_name_ref(symbol_name);
2147  if (symbol_name_ref.startswith("OBJC_")) {
2148  static const llvm::StringRef g_objc_v2_prefix_class("OBJC_CLASS_$_");
2149  static const llvm::StringRef g_objc_v2_prefix_metaclass(
2150  "OBJC_METACLASS_$_");
2151  static const llvm::StringRef g_objc_v2_prefix_ivar("OBJC_IVAR_$_");
2152  if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
2153  symbol_name = symbol_name + g_objc_v2_prefix_class.size();
2154  type = eSymbolTypeObjCClass;
2155  demangled_is_synthesized = true;
2156  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_metaclass)) {
2157  symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
2158  type = eSymbolTypeObjCMetaClass;
2159  demangled_is_synthesized = true;
2160  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_ivar)) {
2161  symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
2162  type = eSymbolTypeObjCIVar;
2163  demangled_is_synthesized = true;
2164  }
2165  }
2166  }
2167  } else if (symbol_sect_name &&
2168  ::strstr(symbol_sect_name, "__gcc_except_tab") ==
2169  symbol_sect_name) {
2170  type = eSymbolTypeException;
2171  } else {
2172  type = eSymbolTypeData;
2173  }
2174  } else if (symbol_sect_name &&
2175  ::strstr(symbol_sect_name, "__IMPORT") == symbol_sect_name) {
2176  type = eSymbolTypeTrampoline;
2177  }
2178  return type;
2179 }
2180 
2181 // Read the UUID out of a dyld_shared_cache file on-disk.
2183  const ByteOrder byte_order,
2184  const uint32_t addr_byte_size) {
2185  UUID dsc_uuid;
2186  DataBufferSP DscData = MapFileData(
2187  dyld_shared_cache, sizeof(struct lldb_copy_dyld_cache_header_v1), 0);
2188  if (!DscData)
2189  return dsc_uuid;
2190  DataExtractor dsc_header_data(DscData, byte_order, addr_byte_size);
2191 
2192  char version_str[7];
2193  lldb::offset_t offset = 0;
2194  memcpy(version_str, dsc_header_data.GetData(&offset, 6), 6);
2195  version_str[6] = '\0';
2196  if (strcmp(version_str, "dyld_v") == 0) {
2197  offset = offsetof(struct lldb_copy_dyld_cache_header_v1, uuid);
2198  dsc_uuid = UUID::fromOptionalData(
2199  dsc_header_data.GetData(&offset, sizeof(uuid_t)), sizeof(uuid_t));
2200  }
2202  if (log && dsc_uuid.IsValid()) {
2203  LLDB_LOGF(log, "Shared cache %s has UUID %s",
2204  dyld_shared_cache.GetPath().c_str(),
2205  dsc_uuid.GetAsString().c_str());
2206  }
2207  return dsc_uuid;
2208 }
2209 
2210 static llvm::Optional<struct nlist_64>
2211 ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset,
2212  size_t nlist_byte_size) {
2213  struct nlist_64 nlist;
2214  if (!nlist_data.ValidOffsetForDataOfSize(nlist_data_offset, nlist_byte_size))
2215  return {};
2216  nlist.n_strx = nlist_data.GetU32_unchecked(&nlist_data_offset);
2217  nlist.n_type = nlist_data.GetU8_unchecked(&nlist_data_offset);
2218  nlist.n_sect = nlist_data.GetU8_unchecked(&nlist_data_offset);
2219  nlist.n_desc = nlist_data.GetU16_unchecked(&nlist_data_offset);
2220  nlist.n_value = nlist_data.GetAddress_unchecked(&nlist_data_offset);
2221  return nlist;
2222 }
2223 
2224 enum { DebugSymbols = true, NonDebugSymbols = false };
2225 
2227  LLDB_SCOPED_TIMERF("ObjectFileMachO::ParseSymtab () module = %s",
2228  m_file.GetFilename().AsCString(""));
2229  ModuleSP module_sp(GetModule());
2230  if (!module_sp)
2231  return 0;
2232 
2233  Progress progress(llvm::formatv("Parsing symbol table for {0}",
2234  m_file.GetFilename().AsCString("<Unknown>")));
2235 
2236  llvm::MachO::symtab_command symtab_load_command = {0, 0, 0, 0, 0, 0};
2237  llvm::MachO::linkedit_data_command function_starts_load_command = {0, 0, 0, 0};
2238  llvm::MachO::linkedit_data_command exports_trie_load_command = {0, 0, 0, 0};
2239  llvm::MachO::dyld_info_command dyld_info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2240  // The data element of type bool indicates that this entry is thumb
2241  // code.
2242  typedef AddressDataArray<lldb::addr_t, bool, 100> FunctionStarts;
2243 
2244  // Record the address of every function/data that we add to the symtab.
2245  // We add symbols to the table in the order of most information (nlist
2246  // records) to least (function starts), and avoid duplicating symbols
2247  // via this set.
2248  llvm::DenseSet<addr_t> symbols_added;
2249 
2250  // We are using a llvm::DenseSet for "symbols_added" so we must be sure we
2251  // do not add the tombstone or empty keys to the set.
2252  auto add_symbol_addr = [&symbols_added](lldb::addr_t file_addr) {
2253  // Don't add the tombstone or empty keys.
2254  if (file_addr == UINT64_MAX || file_addr == UINT64_MAX - 1)
2255  return;
2256  symbols_added.insert(file_addr);
2257  };
2258  FunctionStarts function_starts;
2260  uint32_t i;
2261  FileSpecList dylib_files;
2263  llvm::StringRef g_objc_v2_prefix_class("_OBJC_CLASS_$_");
2264  llvm::StringRef g_objc_v2_prefix_metaclass("_OBJC_METACLASS_$_");
2265  llvm::StringRef g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
2266 
2267  for (i = 0; i < m_header.ncmds; ++i) {
2268  const lldb::offset_t cmd_offset = offset;
2269  // Read in the load command and load command size
2270  llvm::MachO::load_command lc;
2271  if (m_data.GetU32(&offset, &lc, 2) == nullptr)
2272  break;
2273  // Watch for the symbol table load command
2274  switch (lc.cmd) {
2275  case LC_SYMTAB:
2276  symtab_load_command.cmd = lc.cmd;
2277  symtab_load_command.cmdsize = lc.cmdsize;
2278  // Read in the rest of the symtab load command
2279  if (m_data.GetU32(&offset, &symtab_load_command.symoff, 4) ==
2280  nullptr) // fill in symoff, nsyms, stroff, strsize fields
2281  return 0;
2282  break;
2283 
2284  case LC_DYLD_INFO:
2285  case LC_DYLD_INFO_ONLY:
2286  if (m_data.GetU32(&offset, &dyld_info.rebase_off, 10)) {
2287  dyld_info.cmd = lc.cmd;
2288  dyld_info.cmdsize = lc.cmdsize;
2289  } else {
2290  memset(&dyld_info, 0, sizeof(dyld_info));
2291  }
2292  break;
2293 
2294  case LC_LOAD_DYLIB:
2295  case LC_LOAD_WEAK_DYLIB:
2296  case LC_REEXPORT_DYLIB:
2297  case LC_LOADFVMLIB:
2298  case LC_LOAD_UPWARD_DYLIB: {
2299  uint32_t name_offset = cmd_offset + m_data.GetU32(&offset);
2300  const char *path = m_data.PeekCStr(name_offset);
2301  if (path) {
2302  FileSpec file_spec(path);
2303  // Strip the path if there is @rpath, @executable, etc so we just use
2304  // the basename
2305  if (path[0] == '@')
2306  file_spec.GetDirectory().Clear();
2307 
2308  if (lc.cmd == LC_REEXPORT_DYLIB) {
2309  m_reexported_dylibs.AppendIfUnique(file_spec);
2310  }
2311 
2312  dylib_files.Append(file_spec);
2313  }
2314  } break;
2315 
2316  case LC_DYLD_EXPORTS_TRIE:
2317  exports_trie_load_command.cmd = lc.cmd;
2318  exports_trie_load_command.cmdsize = lc.cmdsize;
2319  if (m_data.GetU32(&offset, &exports_trie_load_command.dataoff, 2) ==
2320  nullptr) // fill in offset and size fields
2321  memset(&exports_trie_load_command, 0,
2322  sizeof(exports_trie_load_command));
2323  break;
2324  case LC_FUNCTION_STARTS:
2325  function_starts_load_command.cmd = lc.cmd;
2326  function_starts_load_command.cmdsize = lc.cmdsize;
2327  if (m_data.GetU32(&offset, &function_starts_load_command.dataoff, 2) ==
2328  nullptr) // fill in data offset and size fields
2329  memset(&function_starts_load_command, 0,
2330  sizeof(function_starts_load_command));
2331  break;
2332 
2333  default:
2334  break;
2335  }
2336  offset = cmd_offset + lc.cmdsize;
2337  }
2338 
2339  if (!symtab_load_command.cmd)
2340  return 0;
2341 
2342  Symtab *symtab = m_symtab_up.get();
2343  SectionList *section_list = GetSectionList();
2344  if (section_list == nullptr)
2345  return 0;
2346 
2347  const uint32_t addr_byte_size = m_data.GetAddressByteSize();
2348  const ByteOrder byte_order = m_data.GetByteOrder();
2349  bool bit_width_32 = addr_byte_size == 4;
2350  const size_t nlist_byte_size =
2351  bit_width_32 ? sizeof(struct nlist) : sizeof(struct nlist_64);
2352 
2353  DataExtractor nlist_data(nullptr, 0, byte_order, addr_byte_size);
2354  DataExtractor strtab_data(nullptr, 0, byte_order, addr_byte_size);
2355  DataExtractor function_starts_data(nullptr, 0, byte_order, addr_byte_size);
2356  DataExtractor indirect_symbol_index_data(nullptr, 0, byte_order,
2357  addr_byte_size);
2358  DataExtractor dyld_trie_data(nullptr, 0, byte_order, addr_byte_size);
2359 
2360  const addr_t nlist_data_byte_size =
2361  symtab_load_command.nsyms * nlist_byte_size;
2362  const addr_t strtab_data_byte_size = symtab_load_command.strsize;
2363  addr_t strtab_addr = LLDB_INVALID_ADDRESS;
2364 
2365  ProcessSP process_sp(m_process_wp.lock());
2366  Process *process = process_sp.get();
2367 
2368  uint32_t memory_module_load_level = eMemoryModuleLoadLevelComplete;
2369  bool is_shared_cache_image = IsSharedCacheBinary();
2370  bool is_local_shared_cache_image = is_shared_cache_image && !IsInMemory();
2371  SectionSP linkedit_section_sp(
2372  section_list->FindSectionByName(GetSegmentNameLINKEDIT()));
2373 
2374  if (process && m_header.filetype != llvm::MachO::MH_OBJECT &&
2375  !is_local_shared_cache_image) {
2376  Target &target = process->GetTarget();
2377 
2378  memory_module_load_level = target.GetMemoryModuleLoadLevel();
2379 
2380  // Reading mach file from memory in a process or core file...
2381 
2382  if (linkedit_section_sp) {
2383  addr_t linkedit_load_addr =
2384  linkedit_section_sp->GetLoadBaseAddress(&target);
2385  if (linkedit_load_addr == LLDB_INVALID_ADDRESS) {
2386  // We might be trying to access the symbol table before the
2387  // __LINKEDIT's load address has been set in the target. We can't
2388  // fail to read the symbol table, so calculate the right address
2389  // manually
2390  linkedit_load_addr = CalculateSectionLoadAddressForMemoryImage(
2391  m_memory_addr, GetMachHeaderSection(), linkedit_section_sp.get());
2392  }
2393 
2394  const addr_t linkedit_file_offset = linkedit_section_sp->GetFileOffset();
2395  const addr_t symoff_addr = linkedit_load_addr +
2396  symtab_load_command.symoff -
2397  linkedit_file_offset;
2398  strtab_addr = linkedit_load_addr + symtab_load_command.stroff -
2399  linkedit_file_offset;
2400 
2401  // Always load dyld - the dynamic linker - from memory if we didn't
2402  // find a binary anywhere else. lldb will not register
2403  // dylib/framework/bundle loads/unloads if we don't have the dyld
2404  // symbols, we force dyld to load from memory despite the user's
2405  // target.memory-module-load-level setting.
2406  if (memory_module_load_level == eMemoryModuleLoadLevelComplete ||
2407  m_header.filetype == llvm::MachO::MH_DYLINKER) {
2408  DataBufferSP nlist_data_sp(
2409  ReadMemory(process_sp, symoff_addr, nlist_data_byte_size));
2410  if (nlist_data_sp)
2411  nlist_data.SetData(nlist_data_sp, 0, nlist_data_sp->GetByteSize());
2412  if (m_dysymtab.nindirectsyms != 0) {
2413  const addr_t indirect_syms_addr = linkedit_load_addr +
2414  m_dysymtab.indirectsymoff -
2415  linkedit_file_offset;
2416  DataBufferSP indirect_syms_data_sp(ReadMemory(
2417  process_sp, indirect_syms_addr, m_dysymtab.nindirectsyms * 4));
2418  if (indirect_syms_data_sp)
2419  indirect_symbol_index_data.SetData(
2420  indirect_syms_data_sp, 0,
2421  indirect_syms_data_sp->GetByteSize());
2422  // If this binary is outside the shared cache,
2423  // cache the string table.
2424  // Binaries in the shared cache all share a giant string table,
2425  // and we can't share the string tables across multiple
2426  // ObjectFileMachO's, so we'd end up re-reading this mega-strtab
2427  // for every binary in the shared cache - it would be a big perf
2428  // problem. For binaries outside the shared cache, it's faster to
2429  // read the entire strtab at once instead of piece-by-piece as we
2430  // process the nlist records.
2431  if (!is_shared_cache_image) {
2432  DataBufferSP strtab_data_sp(
2433  ReadMemory(process_sp, strtab_addr, strtab_data_byte_size));
2434  if (strtab_data_sp) {
2435  strtab_data.SetData(strtab_data_sp, 0,
2436  strtab_data_sp->GetByteSize());
2437  }
2438  }
2439  }
2440  if (memory_module_load_level >= eMemoryModuleLoadLevelPartial) {
2441  if (function_starts_load_command.cmd) {
2442  const addr_t func_start_addr =
2443  linkedit_load_addr + function_starts_load_command.dataoff -
2444  linkedit_file_offset;
2445  DataBufferSP func_start_data_sp(
2446  ReadMemory(process_sp, func_start_addr,
2447  function_starts_load_command.datasize));
2448  if (func_start_data_sp)
2449  function_starts_data.SetData(func_start_data_sp, 0,
2450  func_start_data_sp->GetByteSize());
2451  }
2452  }
2453  }
2454  }
2455  } else {
2456  if (is_local_shared_cache_image) {
2457  // The load commands in shared cache images are relative to the
2458  // beginning of the shared cache, not the library image. The
2459  // data we get handed when creating the ObjectFileMachO starts
2460  // at the beginning of a specific library and spans to the end
2461  // of the cache to be able to reach the shared LINKEDIT
2462  // segments. We need to convert the load command offsets to be
2463  // relative to the beginning of our specific image.
2464  lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset();
2465  lldb::offset_t linkedit_slide =
2466  linkedit_offset - m_linkedit_original_offset;
2467  symtab_load_command.symoff += linkedit_slide;
2468  symtab_load_command.stroff += linkedit_slide;
2469  dyld_info.export_off += linkedit_slide;
2470  m_dysymtab.indirectsymoff += linkedit_slide;
2471  function_starts_load_command.dataoff += linkedit_slide;
2472  exports_trie_load_command.dataoff += linkedit_slide;
2473  }
2474 
2475  nlist_data.SetData(m_data, symtab_load_command.symoff,
2476  nlist_data_byte_size);
2477  strtab_data.SetData(m_data, symtab_load_command.stroff,
2478  strtab_data_byte_size);
2479 
2480  // We shouldn't have exports data from both the LC_DYLD_INFO command
2481  // AND the LC_DYLD_EXPORTS_TRIE command in the same binary:
2482  lldbassert(!((dyld_info.export_size > 0)
2483  && (exports_trie_load_command.datasize > 0)));
2484  if (dyld_info.export_size > 0) {
2485  dyld_trie_data.SetData(m_data, dyld_info.export_off,
2486  dyld_info.export_size);
2487  } else if (exports_trie_load_command.datasize > 0) {
2488  dyld_trie_data.SetData(m_data, exports_trie_load_command.dataoff,
2489  exports_trie_load_command.datasize);
2490  }
2491 
2492  if (m_dysymtab.nindirectsyms != 0) {
2493  indirect_symbol_index_data.SetData(m_data, m_dysymtab.indirectsymoff,
2494  m_dysymtab.nindirectsyms * 4);
2495  }
2496  if (function_starts_load_command.cmd) {
2497  function_starts_data.SetData(m_data, function_starts_load_command.dataoff,
2498  function_starts_load_command.datasize);
2499  }
2500  }
2501 
2502  const bool have_strtab_data = strtab_data.GetByteSize() > 0;
2503 
2504  ConstString g_segment_name_TEXT = GetSegmentNameTEXT();
2505  ConstString g_segment_name_DATA = GetSegmentNameDATA();
2506  ConstString g_segment_name_DATA_DIRTY = GetSegmentNameDATA_DIRTY();
2507  ConstString g_segment_name_DATA_CONST = GetSegmentNameDATA_CONST();
2508  ConstString g_segment_name_OBJC = GetSegmentNameOBJC();
2509  ConstString g_section_name_eh_frame = GetSectionNameEHFrame();
2510  SectionSP text_section_sp(
2511  section_list->FindSectionByName(g_segment_name_TEXT));
2512  SectionSP data_section_sp(
2513  section_list->FindSectionByName(g_segment_name_DATA));
2514  SectionSP data_dirty_section_sp(
2515  section_list->FindSectionByName(g_segment_name_DATA_DIRTY));
2516  SectionSP data_const_section_sp(
2517  section_list->FindSectionByName(g_segment_name_DATA_CONST));
2518  SectionSP objc_section_sp(
2519  section_list->FindSectionByName(g_segment_name_OBJC));
2520  SectionSP eh_frame_section_sp;
2521  if (text_section_sp.get())
2522  eh_frame_section_sp = text_section_sp->GetChildren().FindSectionByName(
2523  g_section_name_eh_frame);
2524  else
2525  eh_frame_section_sp =
2526  section_list->FindSectionByName(g_section_name_eh_frame);
2527 
2528  const bool is_arm = (m_header.cputype == llvm::MachO::CPU_TYPE_ARM);
2529  const bool always_thumb = GetArchitecture().IsAlwaysThumbInstructions();
2530 
2531  // lldb works best if it knows the start address of all functions in a
2532  // module. Linker symbols or debug info are normally the best source of
2533  // information for start addr / size but they may be stripped in a released
2534  // binary. Two additional sources of information exist in Mach-O binaries:
2535  // LC_FUNCTION_STARTS - a list of ULEB128 encoded offsets of each
2536  // function's start address in the
2537  // binary, relative to the text section.
2538  // eh_frame - the eh_frame FDEs have the start addr & size of
2539  // each function
2540  // LC_FUNCTION_STARTS is the fastest source to read in, and is present on
2541  // all modern binaries.
2542  // Binaries built to run on older releases may need to use eh_frame
2543  // information.
2544 
2545  if (text_section_sp && function_starts_data.GetByteSize()) {
2546  FunctionStarts::Entry function_start_entry;
2547  function_start_entry.data = false;
2548  lldb::offset_t function_start_offset = 0;
2549  function_start_entry.addr = text_section_sp->GetFileAddress();
2550  uint64_t delta;
2551  while ((delta = function_starts_data.GetULEB128(&function_start_offset)) >
2552  0) {
2553  // Now append the current entry
2554  function_start_entry.addr += delta;
2555  if (is_arm) {
2556  if (function_start_entry.addr & 1) {
2557  function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2558  function_start_entry.data = true;
2559  } else if (always_thumb) {
2560  function_start_entry.data = true;
2561  }
2562  }
2563  function_starts.Append(function_start_entry);
2564  }
2565  } else {
2566  // If m_type is eTypeDebugInfo, then this is a dSYM - it will have the
2567  // load command claiming an eh_frame but it doesn't actually have the
2568  // eh_frame content. And if we have a dSYM, we don't need to do any of
2569  // this fill-in-the-missing-symbols works anyway - the debug info should
2570  // give us all the functions in the module.
2571  if (text_section_sp.get() && eh_frame_section_sp.get() &&
2572  m_type != eTypeDebugInfo) {
2573  DWARFCallFrameInfo eh_frame(*this, eh_frame_section_sp,
2574  DWARFCallFrameInfo::EH);
2576  eh_frame.GetFunctionAddressAndSizeVector(functions);
2577  addr_t text_base_addr = text_section_sp->GetFileAddress();
2578  size_t count = functions.GetSize();
2579  for (size_t i = 0; i < count; ++i) {
2581  functions.GetEntryAtIndex(i);
2582  if (func) {
2583  FunctionStarts::Entry function_start_entry;
2584  function_start_entry.addr = func->base - text_base_addr;
2585  if (is_arm) {
2586  if (function_start_entry.addr & 1) {
2587  function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2588  function_start_entry.data = true;
2589  } else if (always_thumb) {
2590  function_start_entry.data = true;
2591  }
2592  }
2593  function_starts.Append(function_start_entry);
2594  }
2595  }
2596  }
2597  }
2598 
2599  const size_t function_starts_count = function_starts.GetSize();
2600 
2601  // For user process binaries (executables, dylibs, frameworks, bundles), if
2602  // we don't have LC_FUNCTION_STARTS/eh_frame section in this binary, we're
2603  // going to assume the binary has been stripped. Don't allow assembly
2604  // language instruction emulation because we don't know proper function
2605  // start boundaries.
2606  //
2607  // For all other types of binaries (kernels, stand-alone bare board
2608  // binaries, kexts), they may not have LC_FUNCTION_STARTS / eh_frame
2609  // sections - we should not make any assumptions about them based on that.
2610  if (function_starts_count == 0 && CalculateStrata() == eStrataUser) {
2612  Log *unwind_or_symbol_log(lldb_private::GetLogIfAnyCategoriesSet(
2614 
2615  if (unwind_or_symbol_log)
2616  module_sp->LogMessage(
2617  unwind_or_symbol_log,
2618  "no LC_FUNCTION_STARTS, will not allow assembly profiled unwinds");
2619  }
2620 
2621  const user_id_t TEXT_eh_frame_sectID = eh_frame_section_sp.get()
2622  ? eh_frame_section_sp->GetID()
2623  : static_cast<user_id_t>(NO_SECT);
2624 
2625  lldb::offset_t nlist_data_offset = 0;
2626 
2627  uint32_t N_SO_index = UINT32_MAX;
2628 
2629  MachSymtabSectionInfo section_info(section_list);
2630  std::vector<uint32_t> N_FUN_indexes;
2631  std::vector<uint32_t> N_NSYM_indexes;
2632  std::vector<uint32_t> N_INCL_indexes;
2633  std::vector<uint32_t> N_BRAC_indexes;
2634  std::vector<uint32_t> N_COMM_indexes;
2635  typedef std::multimap<uint64_t, uint32_t> ValueToSymbolIndexMap;
2636  typedef llvm::DenseMap<uint32_t, uint32_t> NListIndexToSymbolIndexMap;
2637  typedef llvm::DenseMap<const char *, uint32_t> ConstNameToSymbolIndexMap;
2638  ValueToSymbolIndexMap N_FUN_addr_to_sym_idx;
2639  ValueToSymbolIndexMap N_STSYM_addr_to_sym_idx;
2640  ConstNameToSymbolIndexMap N_GSYM_name_to_sym_idx;
2641  // Any symbols that get merged into another will get an entry in this map
2642  // so we know
2643  NListIndexToSymbolIndexMap m_nlist_idx_to_sym_idx;
2644  uint32_t nlist_idx = 0;
2645  Symbol *symbol_ptr = nullptr;
2646 
2647  uint32_t sym_idx = 0;
2648  Symbol *sym = nullptr;
2649  size_t num_syms = 0;
2650  std::string memory_symbol_name;
2651  uint32_t unmapped_local_symbols_found = 0;
2652 
2653  std::vector<TrieEntryWithOffset> reexport_trie_entries;
2654  std::vector<TrieEntryWithOffset> external_sym_trie_entries;
2655  std::set<lldb::addr_t> resolver_addresses;
2656 
2657  if (dyld_trie_data.GetByteSize() > 0) {
2658  ConstString text_segment_name("__TEXT");
2659  SectionSP text_segment_sp =
2660  GetSectionList()->FindSectionByName(text_segment_name);
2661  lldb::addr_t text_segment_file_addr = LLDB_INVALID_ADDRESS;
2662  if (text_segment_sp)
2663  text_segment_file_addr = text_segment_sp->GetFileAddress();
2664  std::vector<llvm::StringRef> nameSlices;
2665  ParseTrieEntries(dyld_trie_data, 0, is_arm, text_segment_file_addr,
2666  nameSlices, resolver_addresses, reexport_trie_entries,
2667  external_sym_trie_entries);
2668  }
2669 
2670  typedef std::set<ConstString> IndirectSymbols;
2671  IndirectSymbols indirect_symbol_names;
2672 
2673 #if TARGET_OS_IPHONE
2674 
2675  // Some recent builds of the dyld_shared_cache (hereafter: DSC) have been
2676  // optimized by moving LOCAL symbols out of the memory mapped portion of
2677  // the DSC. The symbol information has all been retained, but it isn't
2678  // available in the normal nlist data. However, there *are* duplicate
2679  // entries of *some*
2680  // LOCAL symbols in the normal nlist data. To handle this situation
2681  // correctly, we must first attempt
2682  // to parse any DSC unmapped symbol information. If we find any, we set a
2683  // flag that tells the normal nlist parser to ignore all LOCAL symbols.
2684 
2685  if (IsSharedCacheBinary()) {
2686  // Before we can start mapping the DSC, we need to make certain the
2687  // target process is actually using the cache we can find.
2688 
2689  // Next we need to determine the correct path for the dyld shared cache.
2690 
2691  ArchSpec header_arch = GetArchitecture();
2692  char dsc_path[PATH_MAX];
2693  char dsc_path_development[PATH_MAX];
2694 
2695  snprintf(
2696  dsc_path, sizeof(dsc_path), "%s%s%s",
2697  "/System/Library/Caches/com.apple.dyld/", /* IPHONE_DYLD_SHARED_CACHE_DIR
2698  */
2699  "dyld_shared_cache_", /* DYLD_SHARED_CACHE_BASE_NAME */
2700  header_arch.GetArchitectureName());
2701 
2702  snprintf(
2703  dsc_path_development, sizeof(dsc_path), "%s%s%s%s",
2704  "/System/Library/Caches/com.apple.dyld/", /* IPHONE_DYLD_SHARED_CACHE_DIR
2705  */
2706  "dyld_shared_cache_", /* DYLD_SHARED_CACHE_BASE_NAME */
2707  header_arch.GetArchitectureName(), ".development");
2708 
2709  FileSpec dsc_nondevelopment_filespec(dsc_path);
2710  FileSpec dsc_development_filespec(dsc_path_development);
2711  FileSpec dsc_filespec;
2712 
2713  UUID dsc_uuid;
2714  UUID process_shared_cache_uuid;
2715  addr_t process_shared_cache_base_addr;
2716 
2717  if (process) {
2718  GetProcessSharedCacheUUID(process, process_shared_cache_base_addr,
2719  process_shared_cache_uuid);
2720  }
2721 
2722  // First see if we can find an exact match for the inferior process
2723  // shared cache UUID in the development or non-development shared caches
2724  // on disk.
2725  if (process_shared_cache_uuid.IsValid()) {
2726  if (FileSystem::Instance().Exists(dsc_development_filespec)) {
2727  UUID dsc_development_uuid = GetSharedCacheUUID(
2728  dsc_development_filespec, byte_order, addr_byte_size);
2729  if (dsc_development_uuid.IsValid() &&
2730  dsc_development_uuid == process_shared_cache_uuid) {
2731  dsc_filespec = dsc_development_filespec;
2732  dsc_uuid = dsc_development_uuid;
2733  }
2734  }
2735  if (!dsc_uuid.IsValid() &&
2736  FileSystem::Instance().Exists(dsc_nondevelopment_filespec)) {
2737  UUID dsc_nondevelopment_uuid = GetSharedCacheUUID(
2738  dsc_nondevelopment_filespec, byte_order, addr_byte_size);
2739  if (dsc_nondevelopment_uuid.IsValid() &&
2740  dsc_nondevelopment_uuid == process_shared_cache_uuid) {
2741  dsc_filespec = dsc_nondevelopment_filespec;
2742  dsc_uuid = dsc_nondevelopment_uuid;
2743  }
2744  }
2745  }
2746 
2747  // Failing a UUID match, prefer the development dyld_shared cache if both
2748  // are present.
2749  if (!FileSystem::Instance().Exists(dsc_filespec)) {
2750  if (FileSystem::Instance().Exists(dsc_development_filespec)) {
2751  dsc_filespec = dsc_development_filespec;
2752  } else {
2753  dsc_filespec = dsc_nondevelopment_filespec;
2754  }
2755  }
2756 
2757  /* The dyld_cache_header has a pointer to the
2758  dyld_cache_local_symbols_info structure (localSymbolsOffset).
2759  The dyld_cache_local_symbols_info structure gives us three things:
2760  1. The start and count of the nlist records in the dyld_shared_cache
2761  file
2762  2. The start and size of the strings for these nlist records
2763  3. The start and count of dyld_cache_local_symbols_entry entries
2764 
2765  There is one dyld_cache_local_symbols_entry per dylib/framework in the
2766  dyld shared cache.
2767  The "dylibOffset" field is the Mach-O header of this dylib/framework in
2768  the dyld shared cache.
2769  The dyld_cache_local_symbols_entry also lists the start of this
2770  dylib/framework's nlist records
2771  and the count of how many nlist records there are for this
2772  dylib/framework.
2773  */
2774 
2775  // Process the dyld shared cache header to find the unmapped symbols
2776 
2777  DataBufferSP dsc_data_sp = MapFileData(
2778  dsc_filespec, sizeof(struct lldb_copy_dyld_cache_header_v1), 0);
2779  if (!dsc_uuid.IsValid()) {
2780  dsc_uuid = GetSharedCacheUUID(dsc_filespec, byte_order, addr_byte_size);
2781  }
2782  if (dsc_data_sp) {
2783  DataExtractor dsc_header_data(dsc_data_sp, byte_order, addr_byte_size);
2784 
2785  bool uuid_match = true;
2786  if (dsc_uuid.IsValid() && process) {
2787  if (process_shared_cache_uuid.IsValid() &&
2788  dsc_uuid != process_shared_cache_uuid) {
2789  // The on-disk dyld_shared_cache file is not the same as the one in
2790  // this process' memory, don't use it.
2791  uuid_match = false;
2792  ModuleSP module_sp(GetModule());
2793  if (module_sp)
2794  module_sp->ReportWarning("process shared cache does not match "
2795  "on-disk dyld_shared_cache file, some "
2796  "symbol names will be missing.");
2797  }
2798  }
2799 
2800  offset = offsetof(struct lldb_copy_dyld_cache_header_v1, mappingOffset);
2801 
2802  uint32_t mappingOffset = dsc_header_data.GetU32(&offset);
2803 
2804  // If the mappingOffset points to a location inside the header, we've
2805  // opened an old dyld shared cache, and should not proceed further.
2806  if (uuid_match &&
2807  mappingOffset >= sizeof(struct lldb_copy_dyld_cache_header_v1)) {
2808 
2809  DataBufferSP dsc_mapping_info_data_sp = MapFileData(
2810  dsc_filespec, sizeof(struct lldb_copy_dyld_cache_mapping_info),
2811  mappingOffset);
2812 
2813  DataExtractor dsc_mapping_info_data(dsc_mapping_info_data_sp,
2814  byte_order, addr_byte_size);
2815  offset = 0;
2816 
2817  // The File addresses (from the in-memory Mach-O load commands) for
2818  // the shared libraries in the shared library cache need to be
2819  // adjusted by an offset to match up with the dylibOffset identifying
2820  // field in the dyld_cache_local_symbol_entry's. This offset is
2821  // recorded in mapping_offset_value.
2822  const uint64_t mapping_offset_value =
2823  dsc_mapping_info_data.GetU64(&offset);
2824 
2825  offset =
2826  offsetof(struct lldb_copy_dyld_cache_header_v1, localSymbolsOffset);
2827  uint64_t localSymbolsOffset = dsc_header_data.GetU64(&offset);
2828  uint64_t localSymbolsSize = dsc_header_data.GetU64(&offset);
2829 
2830  if (localSymbolsOffset && localSymbolsSize) {
2831  // Map the local symbols
2832  DataBufferSP dsc_local_symbols_data_sp =
2833  MapFileData(dsc_filespec, localSymbolsSize, localSymbolsOffset);
2834 
2835  if (dsc_local_symbols_data_sp) {
2836  DataExtractor dsc_local_symbols_data(dsc_local_symbols_data_sp,
2837  byte_order, addr_byte_size);
2838 
2839  offset = 0;
2840 
2841  typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
2842  typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
2843  UndefinedNameToDescMap undefined_name_to_desc;
2844  SymbolIndexToName reexport_shlib_needs_fixup;
2845 
2846  // Read the local_symbols_infos struct in one shot
2847  struct lldb_copy_dyld_cache_local_symbols_info local_symbols_info;
2848  dsc_local_symbols_data.GetU32(&offset,
2849  &local_symbols_info.nlistOffset, 6);
2850 
2851  SectionSP text_section_sp(
2852  section_list->FindSectionByName(GetSegmentNameTEXT()));
2853 
2854  uint32_t header_file_offset =
2855  (text_section_sp->GetFileAddress() - mapping_offset_value);
2856 
2857  offset = local_symbols_info.entriesOffset;
2858  for (uint32_t entry_index = 0;
2859  entry_index < local_symbols_info.entriesCount; entry_index++) {
2861  local_symbols_entry;
2862  local_symbols_entry.dylibOffset =
2863  dsc_local_symbols_data.GetU32(&offset);
2864  local_symbols_entry.nlistStartIndex =
2865  dsc_local_symbols_data.GetU32(&offset);
2866  local_symbols_entry.nlistCount =
2867  dsc_local_symbols_data.GetU32(&offset);
2868 
2869  if (header_file_offset == local_symbols_entry.dylibOffset) {
2870  unmapped_local_symbols_found = local_symbols_entry.nlistCount;
2871 
2872  // The normal nlist code cannot correctly size the Symbols
2873  // array, we need to allocate it here.
2874  sym = symtab->Resize(
2875  symtab_load_command.nsyms + m_dysymtab.nindirectsyms +
2876  unmapped_local_symbols_found - m_dysymtab.nlocalsym);
2877  num_syms = symtab->GetNumSymbols();
2878 
2879  nlist_data_offset =
2880  local_symbols_info.nlistOffset +
2881  (nlist_byte_size * local_symbols_entry.nlistStartIndex);
2882  uint32_t string_table_offset = local_symbols_info.stringsOffset;
2883 
2884  for (uint32_t nlist_index = 0;
2885  nlist_index < local_symbols_entry.nlistCount;
2886  nlist_index++) {
2887  /////////////////////////////
2888  {
2889  llvm::Optional<struct nlist_64> nlist_maybe =
2890  ParseNList(dsc_local_symbols_data, nlist_data_offset,
2891  nlist_byte_size);
2892  if (!nlist_maybe)
2893  break;
2894  struct nlist_64 nlist = *nlist_maybe;
2895 
2897  const char *symbol_name = dsc_local_symbols_data.PeekCStr(
2898  string_table_offset + nlist.n_strx);
2899 
2900  if (symbol_name == NULL) {
2901  // No symbol should be NULL, even the symbols with no
2902  // string values should have an offset zero which
2903  // points to an empty C-string
2904  Host::SystemLog(
2905  Host::eSystemLogError,
2906  "error: DSC unmapped local symbol[%u] has invalid "
2907  "string table offset 0x%x in %s, ignoring symbol\n",
2908  entry_index, nlist.n_strx,
2909  module_sp->GetFileSpec().GetPath().c_str());
2910  continue;
2911  }
2912  if (symbol_name[0] == '\0')
2913  symbol_name = NULL;
2914 
2915  const char *symbol_name_non_abi_mangled = NULL;
2916 
2917  SectionSP symbol_section;
2918  uint32_t symbol_byte_size = 0;
2919  bool add_nlist = true;
2920  bool is_debug = ((nlist.n_type & N_STAB) != 0);
2921  bool demangled_is_synthesized = false;
2922  bool is_gsym = false;
2923  bool set_value = true;
2924 
2925  assert(sym_idx < num_syms);
2926 
2927  sym[sym_idx].SetDebug(is_debug);
2928 
2929  if (is_debug) {
2930  switch (nlist.n_type) {
2931  case N_GSYM:
2932  // global symbol: name,,NO_SECT,type,0
2933  // Sometimes the N_GSYM value contains the address.
2934 
2935  // FIXME: In the .o files, we have a GSYM and a debug
2936  // symbol for all the ObjC data. They
2937  // have the same address, but we want to ensure that
2938  // we always find only the real symbol, 'cause we
2939  // don't currently correctly attribute the
2940  // GSYM one to the ObjCClass/Ivar/MetaClass
2941  // symbol type. This is a temporary hack to make
2942  // sure the ObjectiveC symbols get treated correctly.
2943  // To do this right, we should coalesce all the GSYM
2944  // & global symbols that have the same address.
2945 
2946  is_gsym = true;
2947  sym[sym_idx].SetExternal(true);
2948 
2949  if (symbol_name && symbol_name[0] == '_' &&
2950  symbol_name[1] == 'O') {
2951  llvm::StringRef symbol_name_ref(symbol_name);
2952  if (symbol_name_ref.startswith(
2953  g_objc_v2_prefix_class)) {
2954  symbol_name_non_abi_mangled = symbol_name + 1;
2955  symbol_name =
2956  symbol_name + g_objc_v2_prefix_class.size();
2957  type = eSymbolTypeObjCClass;
2958  demangled_is_synthesized = true;
2959 
2960  } else if (symbol_name_ref.startswith(
2961  g_objc_v2_prefix_metaclass)) {
2962  symbol_name_non_abi_mangled = symbol_name + 1;
2963  symbol_name =
2964  symbol_name + g_objc_v2_prefix_metaclass.size();
2965  type = eSymbolTypeObjCMetaClass;
2966  demangled_is_synthesized = true;
2967  } else if (symbol_name_ref.startswith(
2968  g_objc_v2_prefix_ivar)) {
2969  symbol_name_non_abi_mangled = symbol_name + 1;
2970  symbol_name =
2971  symbol_name + g_objc_v2_prefix_ivar.size();
2972  type = eSymbolTypeObjCIVar;
2973  demangled_is_synthesized = true;
2974  }
2975  } else {
2976  if (nlist.n_value != 0)
2977  symbol_section = section_info.GetSection(
2978  nlist.n_sect, nlist.n_value);
2979  type = eSymbolTypeData;
2980  }
2981  break;
2982 
2983  case N_FNAME:
2984  // procedure name (f77 kludge): name,,NO_SECT,0,0
2985  type = eSymbolTypeCompiler;
2986  break;
2987 
2988  case N_FUN:
2989  // procedure: name,,n_sect,linenumber,address
2990  if (symbol_name) {
2991  type = eSymbolTypeCode;
2992  symbol_section = section_info.GetSection(
2993  nlist.n_sect, nlist.n_value);
2994 
2995  N_FUN_addr_to_sym_idx.insert(
2996  std::make_pair(nlist.n_value, sym_idx));
2997  // We use the current number of symbols in the
2998  // symbol table in lieu of using nlist_idx in case
2999  // we ever start trimming entries out
3000  N_FUN_indexes.push_back(sym_idx);
3001  } else {
3002  type = eSymbolTypeCompiler;
3003 
3004  if (!N_FUN_indexes.empty()) {
3005  // Copy the size of the function into the
3006  // original
3007  // STAB entry so we don't have
3008  // to hunt for it later
3009  symtab->SymbolAtIndex(N_FUN_indexes.back())
3010  ->SetByteSize(nlist.n_value);
3011  N_FUN_indexes.pop_back();
3012  // We don't really need the end function STAB as
3013  // it contains the size which we already placed
3014  // with the original symbol, so don't add it if
3015  // we want a minimal symbol table
3016  add_nlist = false;
3017  }
3018  }
3019  break;
3020 
3021  case N_STSYM:
3022  // static symbol: name,,n_sect,type,address
3023  N_STSYM_addr_to_sym_idx.insert(
3024  std::make_pair(nlist.n_value, sym_idx));
3025  symbol_section = section_info.GetSection(nlist.n_sect,
3026  nlist.n_value);
3027  if (symbol_name && symbol_name[0]) {
3028  type = ObjectFile::GetSymbolTypeFromName(
3029  symbol_name + 1, eSymbolTypeData);
3030  }
3031  break;
3032 
3033  case N_LCSYM:
3034  // .lcomm symbol: name,,n_sect,type,address
3035  symbol_section = section_info.GetSection(nlist.n_sect,
3036  nlist.n_value);
3037  type = eSymbolTypeCommonBlock;
3038  break;
3039 
3040  case N_BNSYM:
3041  // We use the current number of symbols in the symbol
3042  // table in lieu of using nlist_idx in case we ever
3043  // start trimming entries out Skip these if we want
3044  // minimal symbol tables
3045  add_nlist = false;
3046  break;
3047 
3048  case N_ENSYM:
3049  // Set the size of the N_BNSYM to the terminating
3050  // index of this N_ENSYM so that we can always skip
3051  // the entire symbol if we need to navigate more
3052  // quickly at the source level when parsing STABS
3053  // Skip these if we want minimal symbol tables
3054  add_nlist = false;
3055  break;
3056 
3057  case N_OPT:
3058  // emitted with gcc2_compiled and in gcc source
3059  type = eSymbolTypeCompiler;
3060  break;
3061 
3062  case N_RSYM:
3063  // register sym: name,,NO_SECT,type,register
3064  type = eSymbolTypeVariable;
3065  break;
3066 
3067  case N_SLINE:
3068  // src line: 0,,n_sect,linenumber,address
3069  symbol_section = section_info.GetSection(nlist.n_sect,
3070  nlist.n_value);
3071  type = eSymbolTypeLineEntry;
3072  break;
3073 
3074  case N_SSYM:
3075  // structure elt: name,,NO_SECT,type,struct_offset
3076  type = eSymbolTypeVariableType;
3077  break;
3078 
3079  case N_SO:
3080  // source file name
3081  type = eSymbolTypeSourceFile;
3082  if (symbol_name == NULL) {
3083  add_nlist = false;
3084  if (N_SO_index != UINT32_MAX) {
3085  // Set the size of the N_SO to the terminating
3086  // index of this N_SO so that we can always skip
3087  // the entire N_SO if we need to navigate more
3088  // quickly at the source level when parsing STABS
3089  symbol_ptr = symtab->SymbolAtIndex(N_SO_index);
3090  symbol_ptr->SetByteSize(sym_idx);
3091  symbol_ptr->SetSizeIsSibling(true);
3092  }
3093  N_NSYM_indexes.clear();
3094  N_INCL_indexes.clear();
3095  N_BRAC_indexes.clear();
3096  N_COMM_indexes.clear();
3097  N_FUN_indexes.clear();
3098  N_SO_index = UINT32_MAX;
3099  } else {
3100  // We use the current number of symbols in the
3101  // symbol table in lieu of using nlist_idx in case
3102  // we ever start trimming entries out
3103  const bool N_SO_has_full_path = symbol_name[0] == '/';
3104  if (N_SO_has_full_path) {
3105  if ((N_SO_index == sym_idx - 1) &&
3106  ((sym_idx - 1) < num_syms)) {
3107  // We have two consecutive N_SO entries where
3108  // the first contains a directory and the
3109  // second contains a full path.
3110  sym[sym_idx - 1].GetMangled().SetValue(
3111  ConstString(symbol_name), false);
3112  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3113  add_nlist = false;
3114  } else {
3115  // This is the first entry in a N_SO that
3116  // contains a directory or
3117  // a full path to the source file
3118  N_SO_index = sym_idx;
3119  }
3120  } else if ((N_SO_index == sym_idx - 1) &&
3121  ((sym_idx - 1) < num_syms)) {
3122  // This is usually the second N_SO entry that
3123  // contains just the filename, so here we combine
3124  // it with the first one if we are minimizing the
3125  // symbol table
3126  const char *so_path = sym[sym_idx - 1]
3127  .GetMangled()
3128  .GetDemangledName()
3129  .AsCString();
3130  if (so_path && so_path[0]) {
3131  std::string full_so_path(so_path);
3132  const size_t double_slash_pos =
3133  full_so_path.find("//");
3134  if (double_slash_pos != std::string::npos) {
3135  // The linker has been generating bad N_SO
3136  // entries with doubled up paths
3137  // in the format "%s%s" where the first
3138  // string in the DW_AT_comp_dir, and the
3139  // second is the directory for the source
3140  // file so you end up with a path that looks
3141  // like "/tmp/src//tmp/src/"
3142  FileSpec so_dir(so_path);
3143  if (!FileSystem::Instance().Exists(so_dir)) {
3144  so_dir.SetFile(
3145  &full_so_path[double_slash_pos + 1],
3146  FileSpec::Style::native);
3147  if (FileSystem::Instance().Exists(so_dir)) {
3148  // Trim off the incorrect path
3149  full_so_path.erase(0, double_slash_pos + 1);
3150  }
3151  }
3152  }
3153  if (*full_so_path.rbegin() != '/')
3154  full_so_path += '/';
3155  full_so_path += symbol_name;
3156  sym[sym_idx - 1].GetMangled().SetValue(
3157  ConstString(full_so_path.c_str()), false);
3158  add_nlist = false;
3159  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3160  }
3161  } else {
3162  // This could be a relative path to a N_SO
3163  N_SO_index = sym_idx;
3164  }
3165  }
3166  break;
3167 
3168  case N_OSO:
3169  // object file name: name,,0,0,st_mtime
3170  type = eSymbolTypeObjectFile;
3171  break;
3172 
3173  case N_LSYM:
3174  // local sym: name,,NO_SECT,type,offset
3175  type = eSymbolTypeLocal;
3176  break;
3177 
3178  // INCL scopes
3179  case N_BINCL:
3180  // include file beginning: name,,NO_SECT,0,sum We use
3181  // the current number of symbols in the symbol table
3182  // in lieu of using nlist_idx in case we ever start
3183  // trimming entries out
3184  N_INCL_indexes.push_back(sym_idx);
3185  type = eSymbolTypeScopeBegin;
3186  break;
3187 
3188  case N_EINCL:
3189  // include file end: name,,NO_SECT,0,0
3190  // Set the size of the N_BINCL to the terminating
3191  // index of this N_EINCL so that we can always skip
3192  // the entire symbol if we need to navigate more
3193  // quickly at the source level when parsing STABS
3194  if (!N_INCL_indexes.empty()) {
3195  symbol_ptr =
3196  symtab->SymbolAtIndex(N_INCL_indexes.back());
3197  symbol_ptr->SetByteSize(sym_idx + 1);
3198  symbol_ptr->SetSizeIsSibling(true);
3199  N_INCL_indexes.pop_back();
3200  }
3201  type = eSymbolTypeScopeEnd;
3202  break;
3203 
3204  case N_SOL:
3205  // #included file name: name,,n_sect,0,address
3206  type = eSymbolTypeHeaderFile;
3207 
3208  // We currently don't use the header files on darwin
3209  add_nlist = false;
3210  break;
3211 
3212  case N_PARAMS:
3213  // compiler parameters: name,,NO_SECT,0,0
3214  type = eSymbolTypeCompiler;
3215  break;
3216 
3217  case N_VERSION:
3218  // compiler version: name,,NO_SECT,0,0
3219  type = eSymbolTypeCompiler;
3220  break;
3221 
3222  case N_OLEVEL:
3223  // compiler -O level: name,,NO_SECT,0,0
3224  type = eSymbolTypeCompiler;
3225  break;
3226 
3227  case N_PSYM:
3228  // parameter: name,,NO_SECT,type,offset
3229  type = eSymbolTypeVariable;
3230  break;
3231 
3232  case N_ENTRY:
3233  // alternate entry: name,,n_sect,linenumber,address
3234  symbol_section = section_info.GetSection(nlist.n_sect,
3235  nlist.n_value);
3236  type = eSymbolTypeLineEntry;
3237  break;
3238 
3239  // Left and Right Braces
3240  case N_LBRAC:
3241  // left bracket: 0,,NO_SECT,nesting level,address We
3242  // use the current number of symbols in the symbol
3243  // table in lieu of using nlist_idx in case we ever
3244  // start trimming entries out
3245  symbol_section = section_info.GetSection(nlist.n_sect,
3246  nlist.n_value);
3247  N_BRAC_indexes.push_back(sym_idx);
3248  type = eSymbolTypeScopeBegin;
3249  break;
3250 
3251  case N_RBRAC:
3252  // right bracket: 0,,NO_SECT,nesting level,address
3253  // Set the size of the N_LBRAC to the terminating
3254  // index of this N_RBRAC so that we can always skip
3255  // the entire symbol if we need to navigate more
3256  // quickly at the source level when parsing STABS
3257  symbol_section = section_info.GetSection(nlist.n_sect,
3258  nlist.n_value);
3259  if (!N_BRAC_indexes.empty()) {
3260  symbol_ptr =
3261  symtab->SymbolAtIndex(N_BRAC_indexes.back());
3262  symbol_ptr->SetByteSize(sym_idx + 1);
3263  symbol_ptr->SetSizeIsSibling(true);
3264  N_BRAC_indexes.pop_back();
3265  }
3266  type = eSymbolTypeScopeEnd;
3267  break;
3268 
3269  case N_EXCL:
3270  // deleted include file: name,,NO_SECT,0,sum
3271  type = eSymbolTypeHeaderFile;
3272  break;
3273 
3274  // COMM scopes
3275  case N_BCOMM:
3276  // begin common: name,,NO_SECT,0,0
3277  // We use the current number of symbols in the symbol
3278  // table in lieu of using nlist_idx in case we ever
3279  // start trimming entries out
3280  type = eSymbolTypeScopeBegin;
3281  N_COMM_indexes.push_back(sym_idx);
3282  break;
3283 
3284  case N_ECOML:
3285  // end common (local name): 0,,n_sect,0,address
3286  symbol_section = section_info.GetSection(nlist.n_sect,
3287  nlist.n_value);
3288  // Fall through
3289 
3290  case N_ECOMM:
3291  // end common: name,,n_sect,0,0
3292  // Set the size of the N_BCOMM to the terminating
3293  // index of this N_ECOMM/N_ECOML so that we can
3294  // always skip the entire symbol if we need to
3295  // navigate more quickly at the source level when
3296  // parsing STABS
3297  if (!N_COMM_indexes.empty()) {
3298  symbol_ptr =
3299  symtab->SymbolAtIndex(N_COMM_indexes.back());
3300  symbol_ptr->SetByteSize(sym_idx + 1);
3301  symbol_ptr->SetSizeIsSibling(true);
3302  N_COMM_indexes.pop_back();
3303  }
3304  type = eSymbolTypeScopeEnd;
3305  break;
3306 
3307  case N_LENG:
3308  // second stab entry with length information
3309  type = eSymbolTypeAdditional;
3310  break;
3311 
3312  default:
3313  break;
3314  }
3315  } else {
3316  // uint8_t n_pext = N_PEXT & nlist.n_type;
3317  uint8_t n_type = N_TYPE & nlist.n_type;
3318  sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
3319 
3320  switch (n_type) {
3321  case N_INDR: {
3322  const char *reexport_name_cstr =
3323  strtab_data.PeekCStr(nlist.n_value);
3324  if (reexport_name_cstr && reexport_name_cstr[0]) {
3325  type = eSymbolTypeReExported;
3326  ConstString reexport_name(
3327  reexport_name_cstr +
3328  ((reexport_name_cstr[0] == '_') ? 1 : 0));
3329  sym[sym_idx].SetReExportedSymbolName(reexport_name);
3330  set_value = false;
3331  reexport_shlib_needs_fixup[sym_idx] = reexport_name;
3332  indirect_symbol_names.insert(ConstString(
3333  symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
3334  } else
3335  type = eSymbolTypeUndefined;
3336  } break;
3337 
3338  case N_UNDF:
3339  if (symbol_name && symbol_name[0]) {
3340  ConstString undefined_name(
3341  symbol_name + ((symbol_name[0] == '_') ? 1 : 0));
3342  undefined_name_to_desc[undefined_name] = nlist.n_desc;
3343  }
3344  // Fall through
3345  case N_PBUD:
3346  type = eSymbolTypeUndefined;
3347  break;
3348 
3349  case N_ABS:
3350  type = eSymbolTypeAbsolute;
3351  break;
3352 
3353  case N_SECT: {
3354  symbol_section = section_info.GetSection(nlist.n_sect,
3355  nlist.n_value);
3356 
3357  if (symbol_section == NULL) {
3358  // TODO: warn about this?
3359  add_nlist = false;
3360  break;
3361  }
3362 
3363  if (TEXT_eh_frame_sectID == nlist.n_sect) {
3364  type = eSymbolTypeException;
3365  } else {
3366  uint32_t section_type =
3367  symbol_section->Get() & SECTION_TYPE;
3368 
3369  switch (section_type) {
3370  case S_CSTRING_LITERALS:
3371  type = eSymbolTypeData;
3372  break; // section with only literal C strings
3373  case S_4BYTE_LITERALS:
3374  type = eSymbolTypeData;
3375  break; // section with only 4 byte literals
3376  case S_8BYTE_LITERALS:
3377  type = eSymbolTypeData;
3378  break; // section with only 8 byte literals
3379  case S_LITERAL_POINTERS:
3380  type = eSymbolTypeTrampoline;
3381  break; // section with only pointers to literals
3382  case S_NON_LAZY_SYMBOL_POINTERS:
3383  type = eSymbolTypeTrampoline;
3384  break; // section with only non-lazy symbol
3385  // pointers
3386  case S_LAZY_SYMBOL_POINTERS:
3387  type = eSymbolTypeTrampoline;
3388  break; // section with only lazy symbol pointers
3389  case S_SYMBOL_STUBS:
3390  type = eSymbolTypeTrampoline;
3391  break; // section with only symbol stubs, byte
3392  // size of stub in the reserved2 field
3393  case S_MOD_INIT_FUNC_POINTERS:
3394  type = eSymbolTypeCode;
3395  break; // section with only function pointers for
3396  // initialization
3397  case S_MOD_TERM_FUNC_POINTERS:
3398  type = eSymbolTypeCode;
3399  break; // section with only function pointers for
3400  // termination
3401  case S_INTERPOSING:
3402  type = eSymbolTypeTrampoline;
3403  break; // section with only pairs of function
3404  // pointers for interposing
3405  case S_16BYTE_LITERALS:
3406  type = eSymbolTypeData;
3407  break; // section with only 16 byte literals
3408  case S_DTRACE_DOF:
3410  break;
3411  case S_LAZY_DYLIB_SYMBOL_POINTERS:
3412  type = eSymbolTypeTrampoline;
3413  break;
3414  default:
3415  switch (symbol_section->GetType()) {
3417  type = eSymbolTypeCode;
3418  break;
3419  case eSectionTypeData:
3420  case eSectionTypeDataCString: // Inlined C string
3421  // data
3422  case eSectionTypeDataCStringPointers: // Pointers
3423  // to C
3424  // string
3425  // data
3426  case eSectionTypeDataSymbolAddress: // Address of
3427  // a symbol in
3428  // the symbol
3429  // table
3430  case eSectionTypeData4:
3431  case eSectionTypeData8:
3432  case eSectionTypeData16:
3433  type = eSymbolTypeData;
3434  break;
3435  default:
3436  break;
3437  }
3438  break;
3439  }
3440 
3441  if (type == eSymbolTypeInvalid) {
3442  const char *symbol_sect_name =
3443  symbol_section->GetName().AsCString();
3444  if (symbol_section->IsDescendant(
3445  text_section_sp.get())) {
3446  if (symbol_section->IsClear(
3447  S_ATTR_PURE_INSTRUCTIONS |
3448  S_ATTR_SELF_MODIFYING_CODE |
3449  S_ATTR_SOME_INSTRUCTIONS))
3450  type = eSymbolTypeData;
3451  else
3452  type = eSymbolTypeCode;
3453  } else if (symbol_section->IsDescendant(
3454  data_section_sp.get()) ||
3455  symbol_section->IsDescendant(
3456  data_dirty_section_sp.get()) ||
3457  symbol_section->IsDescendant(
3458  data_const_section_sp.get())) {
3459  if (symbol_sect_name &&
3460  ::strstr(symbol_sect_name, "__objc") ==
3461  symbol_sect_name) {
3462  type = eSymbolTypeRuntime;
3463 
3464  if (symbol_name) {
3465  llvm::StringRef symbol_name_ref(symbol_name);
3466  if (symbol_name_ref.startswith("_OBJC_")) {
3467  llvm::StringRef
3468  g_objc_v2_prefix_class(
3469  "_OBJC_CLASS_$_");
3470  llvm::StringRef
3471  g_objc_v2_prefix_metaclass(
3472  "_OBJC_METACLASS_$_");
3473  llvm::StringRef
3474  g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
3475  if (symbol_name_ref.startswith(
3476  g_objc_v2_prefix_class)) {
3477  symbol_name_non_abi_mangled =
3478  symbol_name + 1;
3479  symbol_name =
3480  symbol_name +
3481  g_objc_v2_prefix_class.size();
3482  type = eSymbolTypeObjCClass;
3483  demangled_is_synthesized = true;
3484  } else if (
3485  symbol_name_ref.startswith(
3486  g_objc_v2_prefix_metaclass)) {
3487  symbol_name_non_abi_mangled =
3488  symbol_name + 1;
3489  symbol_name =
3490  symbol_name +
3491  g_objc_v2_prefix_metaclass.size();
3492  type = eSymbolTypeObjCMetaClass;
3493  demangled_is_synthesized = true;
3494  } else if (symbol_name_ref.startswith(
3495  g_objc_v2_prefix_ivar)) {
3496  symbol_name_non_abi_mangled =
3497  symbol_name + 1;
3498  symbol_name =
3499  symbol_name +
3500  g_objc_v2_prefix_ivar.size();
3501  type = eSymbolTypeObjCIVar;
3502  demangled_is_synthesized = true;
3503  }
3504  }
3505  }
3506  } else if (symbol_sect_name &&
3507  ::strstr(symbol_sect_name,
3508  "__gcc_except_tab") ==
3509  symbol_sect_name) {
3510  type = eSymbolTypeException;
3511  } else {
3512  type = eSymbolTypeData;
3513  }
3514  } else if (symbol_sect_name &&
3515  ::strstr(symbol_sect_name, "__IMPORT") ==
3516  symbol_sect_name) {
3517  type = eSymbolTypeTrampoline;
3518  } else if (symbol_section->IsDescendant(
3519  objc_section_sp.get())) {
3520  type = eSymbolTypeRuntime;
3521  if (symbol_name && symbol_name[0] == '.') {
3522  llvm::StringRef symbol_name_ref(symbol_name);
3523  llvm::StringRef
3524  g_objc_v1_prefix_class(".objc_class_name_");
3525  if (symbol_name_ref.startswith(
3526  g_objc_v1_prefix_class)) {
3527  symbol_name_non_abi_mangled = symbol_name;
3528  symbol_name = symbol_name +
3529  g_objc_v1_prefix_class.size();
3530  type = eSymbolTypeObjCClass;
3531  demangled_is_synthesized = true;
3532  }
3533  }
3534  }
3535  }
3536  }
3537  } break;
3538  }
3539  }
3540 
3541  if (add_nlist) {
3542  uint64_t symbol_value = nlist.n_value;
3543  if (symbol_name_non_abi_mangled) {
3544  sym[sym_idx].GetMangled().SetMangledName(
3545  ConstString(symbol_name_non_abi_mangled));
3546  sym[sym_idx].GetMangled().SetDemangledName(
3547  ConstString(symbol_name));
3548  } else {
3549  bool symbol_name_is_mangled = false;
3550 
3551  if (symbol_name && symbol_name[0] == '_') {
3552  symbol_name_is_mangled = symbol_name[1] == '_';
3553  symbol_name++; // Skip the leading underscore
3554  }
3555 
3556  if (symbol_name) {
3557  ConstString const_symbol_name(symbol_name);
3558  sym[sym_idx].GetMangled().SetValue(
3559  const_symbol_name, symbol_name_is_mangled);
3560  if (is_gsym && is_debug) {
3561  const char *gsym_name =
3562  sym[sym_idx]
3563  .GetMangled()
3564  .GetName(Mangled::ePreferMangled)
3565  .GetCString();
3566  if (gsym_name)
3567  N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
3568  }
3569  }
3570  }
3571  if (symbol_section) {
3572  const addr_t section_file_addr =
3573  symbol_section->GetFileAddress();
3574  if (symbol_byte_size == 0 &&
3575  function_starts_count > 0) {
3576  addr_t symbol_lookup_file_addr = nlist.n_value;
3577  // Do an exact address match for non-ARM addresses,
3578  // else get the closest since the symbol might be a
3579  // thumb symbol which has an address with bit zero
3580  // set
3581  FunctionStarts::Entry *func_start_entry =
3582  function_starts.FindEntry(symbol_lookup_file_addr,
3583  !is_arm);
3584  if (is_arm && func_start_entry) {
3585  // Verify that the function start address is the
3586  // symbol address (ARM) or the symbol address + 1
3587  // (thumb)
3588  if (func_start_entry->addr !=
3589  symbol_lookup_file_addr &&
3590  func_start_entry->addr !=
3591  (symbol_lookup_file_addr + 1)) {
3592  // Not the right entry, NULL it out...
3593  func_start_entry = NULL;
3594  }
3595  }
3596  if (func_start_entry) {
3597  func_start_entry->data = true;
3598 
3599  addr_t symbol_file_addr = func_start_entry->addr;
3600  uint32_t symbol_flags = 0;
3601  if (is_arm) {
3602  if (symbol_file_addr & 1)
3603  symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
3604  symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3605  }
3606 
3607  const FunctionStarts::Entry *next_func_start_entry =
3608  function_starts.FindNextEntry(func_start_entry);
3609  const addr_t section_end_file_addr =
3610  section_file_addr +
3611  symbol_section->GetByteSize();
3612  if (next_func_start_entry) {
3613  addr_t next_symbol_file_addr =
3614  next_func_start_entry->addr;
3615  // Be sure the clear the Thumb address bit when
3616  // we calculate the size from the current and
3617  // next address
3618  if (is_arm)
3619  next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3620  symbol_byte_size = std::min<lldb::addr_t>(
3621  next_symbol_file_addr - symbol_file_addr,
3622  section_end_file_addr - symbol_file_addr);
3623  } else {
3624  symbol_byte_size =
3625  section_end_file_addr - symbol_file_addr;
3626  }
3627  }
3628  }
3629  symbol_value -= section_file_addr;
3630  }
3631 
3632  if (is_debug == false) {
3633  if (type == eSymbolTypeCode) {
3634  // See if we can find a N_FUN entry for any code
3635  // symbols. If we do find a match, and the name
3636  // matches, then we can merge the two into just the
3637  // function symbol to avoid duplicate entries in
3638  // the symbol table
3639  auto range =
3640  N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
3641  if (range.first != range.second) {
3642  bool found_it = false;
3643  for (auto pos = range.first; pos != range.second;
3644  ++pos) {
3645  if (sym[sym_idx].GetMangled().GetName(
3646  Mangled::ePreferMangled) ==
3647  sym[pos->second].GetMangled().GetName(
3648  Mangled::ePreferMangled)) {
3649  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3650  // We just need the flags from the linker
3651  // symbol, so put these flags
3652  // into the N_FUN flags to avoid duplicate
3653  // symbols in the symbol table
3654  sym[pos->second].SetExternal(
3655  sym[sym_idx].IsExternal());
3656  sym[pos->second].SetFlags(nlist.n_type << 16 |
3657  nlist.n_desc);
3658  if (resolver_addresses.find(nlist.n_value) !=
3659  resolver_addresses.end())
3660  sym[pos->second].SetType(eSymbolTypeResolver);
3661  sym[sym_idx].Clear();
3662  found_it = true;
3663  break;
3664  }
3665  }
3666  if (found_it)
3667  continue;
3668  } else {
3669  if (resolver_addresses.find(nlist.n_value) !=
3670  resolver_addresses.end())
3671  type = eSymbolTypeResolver;
3672  }
3673  } else if (type == eSymbolTypeData ||
3674  type == eSymbolTypeObjCClass ||
3675  type == eSymbolTypeObjCMetaClass ||
3676  type == eSymbolTypeObjCIVar) {
3677  // See if we can find a N_STSYM entry for any data
3678  // symbols. If we do find a match, and the name
3679  // matches, then we can merge the two into just the
3680  // Static symbol to avoid duplicate entries in the
3681  // symbol table
3682  auto range = N_STSYM_addr_to_sym_idx.equal_range(
3683  nlist.n_value);
3684  if (range.first != range.second) {
3685  bool found_it = false;
3686  for (auto pos = range.first; pos != range.second;
3687  ++pos) {
3688  if (sym[sym_idx].GetMangled().GetName(
3689  Mangled::ePreferMangled) ==
3690  sym[pos->second].GetMangled().GetName(
3691  Mangled::ePreferMangled)) {
3692  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3693  // We just need the flags from the linker
3694  // symbol, so put these flags
3695  // into the N_STSYM flags to avoid duplicate
3696  // symbols in the symbol table
3697  sym[pos->second].SetExternal(
3698  sym[sym_idx].IsExternal());
3699  sym[pos->second].SetFlags(nlist.n_type << 16 |
3700  nlist.n_desc);
3701  sym[sym_idx].Clear();
3702  found_it = true;
3703  break;
3704  }
3705  }
3706  if (found_it)
3707  continue;
3708  } else {
3709  const char *gsym_name =
3710  sym[sym_idx]
3711  .GetMangled()
3712  .GetName(Mangled::ePreferMangled)
3713  .GetCString();
3714  if (gsym_name) {
3715  // Combine N_GSYM stab entries with the non
3716  // stab symbol
3717  ConstNameToSymbolIndexMap::const_iterator pos =
3718  N_GSYM_name_to_sym_idx.find(gsym_name);
3719  if (pos != N_GSYM_name_to_sym_idx.end()) {
3720  const uint32_t GSYM_sym_idx = pos->second;
3721  m_nlist_idx_to_sym_idx[nlist_idx] =
3722  GSYM_sym_idx;
3723  // Copy the address, because often the N_GSYM
3724  // address has an invalid address of zero
3725  // when the global is a common symbol
3726  sym[GSYM_sym_idx].GetAddressRef().SetSection(
3727  symbol_section);
3728  sym[GSYM_sym_idx].GetAddressRef().SetOffset(
3729  symbol_value);
3730  add_symbol_addr(sym[GSYM_sym_idx]
3731  .GetAddress()
3732  .GetFileAddress());
3733  // We just need the flags from the linker
3734  // symbol, so put these flags
3735  // into the N_GSYM flags to avoid duplicate
3736  // symbols in the symbol table
3737  sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 |
3738  nlist.n_desc);
3739  sym[sym_idx].Clear();
3740  continue;
3741  }
3742  }
3743  }
3744  }
3745  }
3746 
3747  sym[sym_idx].SetID(nlist_idx);
3748  sym[sym_idx].SetType(type);
3749  if (set_value) {
3750  sym[sym_idx].GetAddressRef().SetSection(symbol_section);
3751  sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
3752  add_symbol_addr(
3753  sym[sym_idx].GetAddress().GetFileAddress());
3754  }
3755  sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
3756 
3757  if (symbol_byte_size > 0)
3758  sym[sym_idx].SetByteSize(symbol_byte_size);
3759 
3760  if (demangled_is_synthesized)
3761  sym[sym_idx].SetDemangledNameIsSynthesized(true);
3762  ++sym_idx;
3763  } else {
3764  sym[sym_idx].Clear();
3765  }
3766  }
3767  /////////////////////////////
3768  }
3769  break; // No more entries to consider
3770  }
3771  }
3772 
3773  for (const auto &pos : reexport_shlib_needs_fixup) {
3774  const auto undef_pos = undefined_name_to_desc.find(pos.second);
3775  if (undef_pos != undefined_name_to_desc.end()) {
3776  const uint8_t dylib_ordinal =
3777  llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
3778  if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
3779  sym[pos.first].SetReExportedSymbolSharedLibrary(
3780  dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
3781  }
3782  }
3783  }
3784  }
3785  }
3786  }
3787  }
3788 
3789  // Must reset this in case it was mutated above!
3790  nlist_data_offset = 0;
3791 #endif
3792 
3793  if (nlist_data.GetByteSize() > 0) {
3794 
3795  // If the sym array was not created while parsing the DSC unmapped
3796  // symbols, create it now.
3797  if (sym == nullptr) {
3798  sym =
3799  symtab->Resize(symtab_load_command.nsyms + m_dysymtab.nindirectsyms);
3800  num_syms = symtab->GetNumSymbols();
3801  }
3802 
3803  if (unmapped_local_symbols_found) {
3804  assert(m_dysymtab.ilocalsym == 0);
3805  nlist_data_offset += (m_dysymtab.nlocalsym * nlist_byte_size);
3806  nlist_idx = m_dysymtab.nlocalsym;
3807  } else {
3808  nlist_idx = 0;
3809  }
3810 
3811  typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
3812  typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
3813  UndefinedNameToDescMap undefined_name_to_desc;
3814  SymbolIndexToName reexport_shlib_needs_fixup;
3815 
3816  // Symtab parsing is a huge mess. Everything is entangled and the code
3817  // requires access to a ridiculous amount of variables. LLDB depends
3818  // heavily on the proper merging of symbols and to get that right we need
3819  // to make sure we have parsed all the debug symbols first. Therefore we
3820  // invoke the lambda twice, once to parse only the debug symbols and then
3821  // once more to parse the remaining symbols.
3822  auto ParseSymbolLambda = [&](struct nlist_64 &nlist, uint32_t nlist_idx,
3823  bool debug_only) {
3824  const bool is_debug = ((nlist.n_type & N_STAB) != 0);
3825  if (is_debug != debug_only)
3826  return true;
3827 
3828  const char *symbol_name_non_abi_mangled = nullptr;
3829  const char *symbol_name = nullptr;
3830 
3831  if (have_strtab_data) {
3832  symbol_name = strtab_data.PeekCStr(nlist.n_strx);
3833 
3834  if (symbol_name == nullptr) {
3835  // No symbol should be NULL, even the symbols with no string values
3836  // should have an offset zero which points to an empty C-string
3837  Host::SystemLog(Host::eSystemLogError,
3838  "error: symbol[%u] has invalid string table offset "
3839  "0x%x in %s, ignoring symbol\n",
3840  nlist_idx, nlist.n_strx,
3841  module_sp->GetFileSpec().GetPath().c_str());
3842  return true;
3843  }
3844  if (symbol_name[0] == '\0')
3845  symbol_name = nullptr;
3846  } else {
3847  const addr_t str_addr = strtab_addr + nlist.n_strx;
3848  Status str_error;
3849  if (process->ReadCStringFromMemory(str_addr, memory_symbol_name,
3850  str_error))
3851  symbol_name = memory_symbol_name.c_str();
3852  }
3853 
3855  SectionSP symbol_section;
3856  lldb::addr_t symbol_byte_size = 0;
3857  bool add_nlist = true;
3858  bool is_gsym = false;
3859  bool demangled_is_synthesized = false;
3860  bool set_value = true;
3861 
3862  assert(sym_idx < num_syms);
3863  sym[sym_idx].SetDebug(is_debug);
3864 
3865  if (is_debug) {
3866  switch (nlist.n_type) {
3867  case N_GSYM:
3868  // global symbol: name,,NO_SECT,type,0
3869  // Sometimes the N_GSYM value contains the address.
3870 
3871  // FIXME: In the .o files, we have a GSYM and a debug symbol for all
3872  // the ObjC data. They
3873  // have the same address, but we want to ensure that we always find
3874  // only the real symbol, 'cause we don't currently correctly
3875  // attribute the GSYM one to the ObjCClass/Ivar/MetaClass symbol
3876  // type. This is a temporary hack to make sure the ObjectiveC
3877  // symbols get treated correctly. To do this right, we should
3878  // coalesce all the GSYM & global symbols that have the same
3879  // address.
3880  is_gsym = true;
3881  sym[sym_idx].SetExternal(true);
3882 
3883  if (symbol_name && symbol_name[0] == '_' && symbol_name[1] == 'O') {
3884  llvm::StringRef symbol_name_ref(symbol_name);
3885  if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
3886  symbol_name_non_abi_mangled = symbol_name + 1;
3887  symbol_name = symbol_name + g_objc_v2_prefix_class.size();
3888  type = eSymbolTypeObjCClass;
3889  demangled_is_synthesized = true;
3890 
3891  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_metaclass)) {
3892  symbol_name_non_abi_mangled = symbol_name + 1;
3893  symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
3894  type = eSymbolTypeObjCMetaClass;
3895  demangled_is_synthesized = true;
3896  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_ivar)) {
3897  symbol_name_non_abi_mangled = symbol_name + 1;
3898  symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
3899  type = eSymbolTypeObjCIVar;
3900  demangled_is_synthesized = true;
3901  }
3902  } else {
3903  if (nlist.n_value != 0)
3904  symbol_section =
3905  section_info.GetSection(nlist.n_sect, nlist.n_value);
3906  type = eSymbolTypeData;
3907  }
3908  break;
3909 
3910  case N_FNAME:
3911  // procedure name (f77 kludge): name,,NO_SECT,0,0
3912  type = eSymbolTypeCompiler;
3913  break;
3914 
3915  case N_FUN:
3916  // procedure: name,,n_sect,linenumber,address
3917  if (symbol_name) {
3918  type = eSymbolTypeCode;
3919  symbol_section =
3920  section_info.GetSection(nlist.n_sect, nlist.n_value);
3921 
3922  N_FUN_addr_to_sym_idx.insert(
3923  std::make_pair(nlist.n_value, sym_idx));
3924  // We use the current number of symbols in the symbol table in
3925  // lieu of using nlist_idx in case we ever start trimming entries
3926  // out
3927  N_FUN_indexes.push_back(sym_idx);
3928  } else {
3929  type = eSymbolTypeCompiler;
3930 
3931  if (!N_FUN_indexes.empty()) {
3932  // Copy the size of the function into the original STAB entry
3933  // so we don't have to hunt for it later
3934  symtab->SymbolAtIndex(N_FUN_indexes.back())
3935  ->SetByteSize(nlist.n_value);
3936  N_FUN_indexes.pop_back();
3937  // We don't really need the end function STAB as it contains
3938  // the size which we already placed with the original symbol,
3939  // so don't add it if we want a minimal symbol table
3940  add_nlist = false;
3941  }
3942  }
3943  break;
3944 
3945  case N_STSYM:
3946  // static symbol: name,,n_sect,type,address
3947  N_STSYM_addr_to_sym_idx.insert(
3948  std::make_pair(nlist.n_value, sym_idx));
3949  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3950  if (symbol_name && symbol_name[0]) {
3951  type = ObjectFile::GetSymbolTypeFromName(symbol_name + 1,
3952  eSymbolTypeData);
3953  }
3954  break;
3955 
3956  case N_LCSYM:
3957  // .lcomm symbol: name,,n_sect,type,address
3958  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3959  type = eSymbolTypeCommonBlock;
3960  break;
3961 
3962  case N_BNSYM:
3963  // We use the current number of symbols in the symbol table in lieu
3964  // of using nlist_idx in case we ever start trimming entries out
3965  // Skip these if we want minimal symbol tables
3966  add_nlist = false;
3967  break;
3968 
3969  case N_ENSYM:
3970  // Set the size of the N_BNSYM to the terminating index of this
3971  // N_ENSYM so that we can always skip the entire symbol if we need
3972  // to navigate more quickly at the source level when parsing STABS
3973  // Skip these if we want minimal symbol tables
3974  add_nlist = false;
3975  break;
3976 
3977  case N_OPT:
3978  // emitted with gcc2_compiled and in gcc source
3979  type = eSymbolTypeCompiler;
3980  break;
3981 
3982  case N_RSYM:
3983  // register sym: name,,NO_SECT,type,register
3984  type = eSymbolTypeVariable;
3985  break;
3986 
3987  case N_SLINE:
3988  // src line: 0,,n_sect,linenumber,address
3989  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3990  type = eSymbolTypeLineEntry;
3991  break;
3992 
3993  case N_SSYM:
3994  // structure elt: name,,NO_SECT,type,struct_offset
3995  type = eSymbolTypeVariableType;
3996  break;
3997 
3998  case N_SO:
3999  // source file name
4000  type = eSymbolTypeSourceFile;
4001  if (symbol_name == nullptr) {
4002  add_nlist = false;
4003  if (N_SO_index != UINT32_MAX) {
4004  // Set the size of the N_SO to the terminating index of this
4005  // N_SO so that we can always skip the entire N_SO if we need
4006  // to navigate more quickly at the source level when parsing
4007  // STABS
4008  symbol_ptr = symtab->SymbolAtIndex(N_SO_index);
4009  symbol_ptr->SetByteSize(sym_idx);
4010  symbol_ptr->SetSizeIsSibling(true);
4011  }
4012  N_NSYM_indexes.clear();
4013  N_INCL_indexes.clear();
4014  N_BRAC_indexes.clear();
4015  N_COMM_indexes.clear();
4016  N_FUN_indexes.clear();
4017  N_SO_index = UINT32_MAX;
4018  } else {
4019  // We use the current number of symbols in the symbol table in
4020  // lieu of using nlist_idx in case we ever start trimming entries
4021  // out
4022  const bool N_SO_has_full_path = symbol_name[0] == '/';
4023  if (N_SO_has_full_path) {
4024  if ((N_SO_index == sym_idx - 1) && ((sym_idx - 1) < num_syms)) {
4025  // We have two consecutive N_SO entries where the first
4026  // contains a directory and the second contains a full path.
4027  sym[sym_idx - 1].GetMangled().SetValue(ConstString(symbol_name),
4028  false);
4029  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
4030  add_nlist = false;
4031  } else {
4032  // This is the first entry in a N_SO that contains a
4033  // directory or a full path to the source file
4034  N_SO_index = sym_idx;
4035  }
4036  } else if ((N_SO_index == sym_idx - 1) &&
4037  ((sym_idx - 1) < num_syms)) {
4038  // This is usually the second N_SO entry that contains just the
4039  // filename, so here we combine it with the first one if we are
4040  // minimizing the symbol table
4041  const char *so_path =
4042  sym[sym_idx - 1].GetMangled().GetDemangledName().AsCString();
4043  if (so_path && so_path[0]) {
4044  std::string full_so_path(so_path);
4045  const size_t double_slash_pos = full_so_path.find("//");
4046  if (double_slash_pos != std::string::npos) {
4047  // The linker has been generating bad N_SO entries with
4048  // doubled up paths in the format "%s%s" where the first
4049  // string in the DW_AT_comp_dir, and the second is the
4050  // directory for the source file so you end up with a path
4051  // that looks like "/tmp/src//tmp/src/"
4052  FileSpec so_dir(so_path);
4053  if (!FileSystem::Instance().Exists(so_dir)) {
4054  so_dir.SetFile(&full_so_path[double_slash_pos + 1],
4055  FileSpec::Style::native);
4056  if (FileSystem::Instance().Exists(so_dir)) {
4057  // Trim off the incorrect path
4058  full_so_path.erase(0, double_slash_pos + 1);
4059  }
4060  }
4061  }
4062  if (*full_so_path.rbegin() != '/')
4063  full_so_path += '/';
4064  full_so_path += symbol_name;
4065  sym[sym_idx - 1].GetMangled().SetValue(
4066  ConstString(full_so_path.c_str()), false);
4067  add_nlist = false;
4068  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
4069  }
4070  } else {
4071  // This could be a relative path to a N_SO
4072  N_SO_index = sym_idx;
4073  }
4074  }
4075  break;
4076 
4077  case N_OSO:
4078  // object file name: name,,0,0,st_mtime
4079  type = eSymbolTypeObjectFile;
4080  break;
4081 
4082  case N_LSYM:
4083  // local sym: name,,NO_SECT,type,offset
4084  type = eSymbolTypeLocal;
4085  break;
4086 
4087  // INCL scopes
4088  case N_BINCL:
4089  // include file beginning: name,,NO_SECT,0,sum We use the current
4090  // number of symbols in the symbol table in lieu of using nlist_idx
4091  // in case we ever start trimming entries out
4092  N_INCL_indexes.push_back(sym_idx);
4093  type = eSymbolTypeScopeBegin;
4094  break;
4095 
4096  case N_EINCL:
4097  // include file end: name,,NO_SECT,0,0
4098  // Set the size of the N_BINCL to the terminating index of this
4099  // N_EINCL so that we can always skip the entire symbol if we need
4100  // to navigate more quickly at the source level when parsing STABS
4101  if (!N_INCL_indexes.empty()) {
4102  symbol_ptr = symtab->SymbolAtIndex(N_INCL_indexes.back());
4103  symbol_ptr->SetByteSize(sym_idx + 1);
4104  symbol_ptr->SetSizeIsSibling(true);
4105  N_INCL_indexes.pop_back();
4106  }
4107  type = eSymbolTypeScopeEnd;
4108  break;
4109 
4110  case N_SOL:
4111  // #included file name: name,,n_sect,0,address
4112  type = eSymbolTypeHeaderFile;
4113 
4114  // We currently don't use the header files on darwin
4115  add_nlist = false;
4116  break;
4117 
4118  case N_PARAMS:
4119  // compiler parameters: name,,NO_SECT,0,0
4120  type = eSymbolTypeCompiler;
4121  break;
4122 
4123  case N_VERSION:
4124  // compiler version: name,,NO_SECT,0,0
4125  type = eSymbolTypeCompiler;
4126  break;
4127 
4128  case N_OLEVEL:
4129  // compiler -O level: name,,NO_SECT,0,0
4130  type = eSymbolTypeCompiler;
4131  break;
4132 
4133  case N_PSYM:
4134  // parameter: name,,NO_SECT,type,offset
4135  type = eSymbolTypeVariable;
4136  break;
4137 
4138  case N_ENTRY:
4139  // alternate entry: name,,n_sect,linenumber,address
4140  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4141  type = eSymbolTypeLineEntry;
4142  break;
4143 
4144  // Left and Right Braces
4145  case N_LBRAC:
4146  // left bracket: 0,,NO_SECT,nesting level,address We use the
4147  // current number of symbols in the symbol table in lieu of using
4148  // nlist_idx in case we ever start trimming entries out
4149  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4150  N_BRAC_indexes.push_back(sym_idx);
4151  type = eSymbolTypeScopeBegin;
4152  break;
4153 
4154  case N_RBRAC:
4155  // right bracket: 0,,NO_SECT,nesting level,address Set the size of
4156  // the N_LBRAC to the terminating index of this N_RBRAC so that we
4157  // can always skip the entire symbol if we need to navigate more
4158  // quickly at the source level when parsing STABS
4159  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4160  if (!N_BRAC_indexes.empty()) {
4161  symbol_ptr = symtab->SymbolAtIndex(N_BRAC_indexes.back());
4162  symbol_ptr->SetByteSize(sym_idx + 1);
4163  symbol_ptr->SetSizeIsSibling(true);
4164  N_BRAC_indexes.pop_back();
4165  }
4166  type = eSymbolTypeScopeEnd;
4167  break;
4168 
4169  case N_EXCL:
4170  // deleted include file: name,,NO_SECT,0,sum
4171  type = eSymbolTypeHeaderFile;
4172  break;
4173 
4174  // COMM scopes
4175  case N_BCOMM:
4176  // begin common: name,,NO_SECT,0,0
4177  // We use the current number of symbols in the symbol table in lieu
4178  // of using nlist_idx in case we ever start trimming entries out
4179  type = eSymbolTypeScopeBegin;
4180  N_COMM_indexes.push_back(sym_idx);
4181  break;
4182 
4183  case N_ECOML:
4184  // end common (local name): 0,,n_sect,0,address
4185  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4186  LLVM_FALLTHROUGH;
4187 
4188  case N_ECOMM:
4189  // end common: name,,n_sect,0,0
4190  // Set the size of the N_BCOMM to the terminating index of this
4191  // N_ECOMM/N_ECOML so that we can always skip the entire symbol if
4192  // we need to navigate more quickly at the source level when
4193  // parsing STABS
4194  if (!N_COMM_indexes.empty()) {
4195  symbol_ptr = symtab->SymbolAtIndex(N_COMM_indexes.back());
4196  symbol_ptr->SetByteSize(sym_idx + 1);
4197  symbol_ptr->SetSizeIsSibling(true);
4198  N_COMM_indexes.pop_back();
4199  }
4200  type = eSymbolTypeScopeEnd;
4201  break;
4202 
4203  case N_LENG:
4204  // second stab entry with length information
4205  type = eSymbolTypeAdditional;
4206  break;
4207 
4208  default:
4209  break;
4210  }
4211  } else {
4212  uint8_t n_type = N_TYPE & nlist.n_type;
4213  sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
4214 
4215  switch (n_type) {
4216  case N_INDR: {
4217  const char *reexport_name_cstr = strtab_data.PeekCStr(nlist.n_value);
4218  if (reexport_name_cstr && reexport_name_cstr[0]) {
4219  type = eSymbolTypeReExported;
4220  ConstString reexport_name(reexport_name_cstr +
4221  ((reexport_name_cstr[0] == '_') ? 1 : 0));
4222  sym[sym_idx].SetReExportedSymbolName(reexport_name);
4223  set_value = false;
4224  reexport_shlib_needs_fixup[sym_idx] = reexport_name;
4225  indirect_symbol_names.insert(
4226  ConstString(symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
4227  } else
4228  type = eSymbolTypeUndefined;
4229  } break;
4230 
4231  case N_UNDF:
4232  if (symbol_name && symbol_name[0]) {
4233  ConstString undefined_name(symbol_name +
4234  ((symbol_name[0] == '_') ? 1 : 0));
4235  undefined_name_to_desc[undefined_name] = nlist.n_desc;
4236  }
4237  LLVM_FALLTHROUGH;
4238 
4239  case N_PBUD:
4240  type = eSymbolTypeUndefined;
4241  break;
4242 
4243  case N_ABS:
4244  type = eSymbolTypeAbsolute;
4245  break;
4246 
4247  case N_SECT: {
4248  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4249 
4250  if (!symbol_section) {
4251  // TODO: warn about this?
4252  add_nlist = false;
4253  break;
4254  }
4255 
4256  if (TEXT_eh_frame_sectID == nlist.n_sect) {
4257  type = eSymbolTypeException;
4258  } else {
4259  uint32_t section_type = symbol_section->Get() & SECTION_TYPE;
4260 
4261  switch (section_type) {
4262  case S_CSTRING_LITERALS:
4263  type = eSymbolTypeData;
4264  break; // section with only literal C strings
4265  case S_4BYTE_LITERALS:
4266  type = eSymbolTypeData;
4267  break; // section with only 4 byte literals
4268  case S_8BYTE_LITERALS:
4269  type = eSymbolTypeData;
4270  break; // section with only 8 byte literals
4271  case S_LITERAL_POINTERS:
4272  type = eSymbolTypeTrampoline;
4273  break; // section with only pointers to literals
4274  case S_NON_LAZY_SYMBOL_POINTERS:
4275  type = eSymbolTypeTrampoline;
4276  break; // section with only non-lazy symbol pointers
4277  case S_LAZY_SYMBOL_POINTERS:
4278  type = eSymbolTypeTrampoline;
4279  break; // section with only lazy symbol pointers
4280  case S_SYMBOL_STUBS:
4281  type = eSymbolTypeTrampoline;
4282  break; // section with only symbol stubs, byte size of stub in
4283  // the reserved2 field
4284  case S_MOD_INIT_FUNC_POINTERS:
4285  type = eSymbolTypeCode;
4286  break; // section with only function pointers for initialization
4287  case S_MOD_TERM_FUNC_POINTERS:
4288  type = eSymbolTypeCode;
4289  break; // section with only function pointers for termination
4290  case S_INTERPOSING:
4291  type = eSymbolTypeTrampoline;
4292  break; // section with only pairs of function pointers for
4293  // interposing
4294  case S_16BYTE_LITERALS:
4295  type = eSymbolTypeData;
4296  break; // section with only 16 byte literals
4297  case S_DTRACE_DOF:
4299  break;
4300  case S_LAZY_DYLIB_SYMBOL_POINTERS:
4301  type = eSymbolTypeTrampoline;
4302  break;
4303  default:
4304  switch (symbol_section->GetType()) {
4306  type = eSymbolTypeCode;
4307  break;
4308  case eSectionTypeData:
4309  case eSectionTypeDataCString: // Inlined C string data
4310  case eSectionTypeDataCStringPointers: // Pointers to C string
4311  // data
4312  case eSectionTypeDataSymbolAddress: // Address of a symbol in
4313  // the symbol table
4314  case eSectionTypeData4:
4315  case eSectionTypeData8:
4316  case eSectionTypeData16:
4317  type = eSymbolTypeData;
4318  break;
4319  default:
4320  break;
4321  }
4322  break;
4323  }
4324 
4325  if (type == eSymbolTypeInvalid) {
4326  const char *symbol_sect_name =
4327  symbol_section->GetName().AsCString();
4328  if (symbol_section->IsDescendant(text_section_sp.get())) {
4329  if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS |
4330  S_ATTR_SELF_MODIFYING_CODE |
4331  S_ATTR_SOME_INSTRUCTIONS))
4332  type = eSymbolTypeData;
4333  else
4334  type = eSymbolTypeCode;
4335  } else if (symbol_section->IsDescendant(data_section_sp.get()) ||
4336  symbol_section->IsDescendant(
4337  data_dirty_section_sp.get()) ||
4338  symbol_section->IsDescendant(
4339  data_const_section_sp.get())) {
4340  if (symbol_sect_name &&
4341  ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) {
4342  type = eSymbolTypeRuntime;
4343 
4344  if (symbol_name) {
4345  llvm::StringRef symbol_name_ref(symbol_name);
4346  if (symbol_name_ref.startswith("_OBJC_")) {
4347  llvm::StringRef g_objc_v2_prefix_class(
4348  "_OBJC_CLASS_$_");
4349  llvm::StringRef g_objc_v2_prefix_metaclass(
4350  "_OBJC_METACLASS_$_");
4351  llvm::StringRef g_objc_v2_prefix_ivar(
4352  "_OBJC_IVAR_$_");
4353  if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
4354  symbol_name_non_abi_mangled = symbol_name + 1;
4355  symbol_name =
4356  symbol_name + g_objc_v2_prefix_class.size();
4357  type = eSymbolTypeObjCClass;
4358  demangled_is_synthesized = true;
4359  } else if (symbol_name_ref.startswith(
4360  g_objc_v2_prefix_metaclass)) {
4361  symbol_name_non_abi_mangled = symbol_name + 1;
4362  symbol_name =
4363  symbol_name + g_objc_v2_prefix_metaclass.size();
4364  type = eSymbolTypeObjCMetaClass;
4365  demangled_is_synthesized = true;
4366  } else if (symbol_name_ref.startswith(
4367  g_objc_v2_prefix_ivar)) {
4368  symbol_name_non_abi_mangled = symbol_name + 1;
4369  symbol_name =
4370  symbol_name + g_objc_v2_prefix_ivar.size();
4371  type = eSymbolTypeObjCIVar;
4372  demangled_is_synthesized = true;
4373  }
4374  }
4375  }
4376  } else if (symbol_sect_name &&
4377  ::strstr(symbol_sect_name, "__gcc_except_tab") ==
4378  symbol_sect_name) {
4379  type = eSymbolTypeException;
4380  } else {
4381  type = eSymbolTypeData;
4382  }
4383  } else if (symbol_sect_name &&
4384  ::strstr(symbol_sect_name, "__IMPORT") ==
4385  symbol_sect_name) {
4386  type = eSymbolTypeTrampoline;
4387  } else if (symbol_section->IsDescendant(objc_section_sp.get())) {
4388  type = eSymbolTypeRuntime;
4389  if (symbol_name && symbol_name[0] == '.') {
4390  llvm::StringRef symbol_name_ref(symbol_name);
4391  llvm::StringRef g_objc_v1_prefix_class(
4392  ".objc_class_name_");
4393  if (symbol_name_ref.startswith(g_objc_v1_prefix_class)) {
4394  symbol_name_non_abi_mangled = symbol_name;
4395  symbol_name = symbol_name + g_objc_v1_prefix_class.size();
4396  type = eSymbolTypeObjCClass;
4397  demangled_is_synthesized = true;
4398  }
4399  }
4400  }
4401  }
4402  }
4403  } break;
4404  }
4405  }
4406 
4407  if (!add_nlist) {
4408  sym[sym_idx].Clear();
4409  return true;
4410  }
4411 
4412  uint64_t symbol_value = nlist.n_value;
4413 
4414  if (symbol_name_non_abi_mangled) {
4415  sym[sym_idx].GetMangled().SetMangledName(
4416  ConstString(symbol_name_non_abi_mangled));
4417  sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name));
4418  } else {
4419  bool symbol_name_is_mangled = false;
4420 
4421  if (symbol_name && symbol_name[0] == '_') {
4422  symbol_name_is_mangled = symbol_name[1] == '_';
4423  symbol_name++; // Skip the leading underscore
4424  }
4425 
4426  if (symbol_name) {
4427  ConstString const_symbol_name(symbol_name);
4428  sym[sym_idx].GetMangled().SetValue(const_symbol_name,
4429  symbol_name_is_mangled);
4430  }
4431  }
4432 
4433  if (is_gsym) {
4434  const char *gsym_name = sym[sym_idx]
4435  .GetMangled()
4436  .GetName(Mangled::ePreferMangled)
4437  .GetCString();
4438  if (gsym_name)
4439  N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
4440  }
4441 
4442  if (symbol_section) {
4443  const addr_t section_file_addr = symbol_section->GetFileAddress();
4444  if (symbol_byte_size == 0 && function_starts_count > 0) {
4445  addr_t symbol_lookup_file_addr = nlist.n_value;
4446  // Do an exact address match for non-ARM addresses, else get the
4447  // closest since the symbol might be a thumb symbol which has an
4448  // address with bit zero set.
4449  FunctionStarts::Entry *func_start_entry =
4450  function_starts.FindEntry(symbol_lookup_file_addr, !is_arm);
4451  if (is_arm && func_start_entry) {
4452  // Verify that the function start address is the symbol address
4453  // (ARM) or the symbol address + 1 (thumb).
4454  if (func_start_entry->addr != symbol_lookup_file_addr &&
4455  func_start_entry->addr != (symbol_lookup_file_addr + 1)) {
4456  // Not the right entry, NULL it out...
4457  func_start_entry = nullptr;
4458  }
4459  }
4460  if (func_start_entry) {
4461  func_start_entry->data = true;
4462 
4463  addr_t symbol_file_addr = func_start_entry->addr;
4464  if (is_arm)
4465  symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4466 
4467  const FunctionStarts::Entry *next_func_start_entry =
4468  function_starts.FindNextEntry(func_start_entry);
4469  const addr_t section_end_file_addr =
4470  section_file_addr + symbol_section->GetByteSize();
4471  if (next_func_start_entry) {
4472  addr_t next_symbol_file_addr = next_func_start_entry->addr;
4473  // Be sure the clear the Thumb address bit when we calculate the
4474  // size from the current and next address
4475  if (is_arm)
4476  next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4477  symbol_byte_size = std::min<lldb::addr_t>(
4478  next_symbol_file_addr - symbol_file_addr,
4479  section_end_file_addr - symbol_file_addr);
4480  } else {
4481  symbol_byte_size = section_end_file_addr - symbol_file_addr;
4482  }
4483  }
4484  }
4485  symbol_value -= section_file_addr;
4486  }
4487 
4488  if (!is_debug) {
4489  if (type == eSymbolTypeCode) {
4490  // See if we can find a N_FUN entry for any code symbols. If we do
4491  // find a match, and the name matches, then we can merge the two into
4492  // just the function symbol to avoid duplicate entries in the symbol
4493  // table.
4494  std::pair<ValueToSymbolIndexMap::const_iterator,
4495  ValueToSymbolIndexMap::const_iterator>
4496  range;
4497  range = N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
4498  if (range.first != range.second) {
4499  for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4500  pos != range.second; ++pos) {
4501  if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) ==
4502  sym[pos->second].GetMangled().GetName(
4503  Mangled::ePreferMangled)) {
4504  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4505  // We just need the flags from the linker symbol, so put these
4506  // flags into the N_FUN flags to avoid duplicate symbols in the
4507  // symbol table.
4508  sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4509  sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4510  if (resolver_addresses.find(nlist.n_value) !=
4511  resolver_addresses.end())
4512  sym[pos->second].SetType(eSymbolTypeResolver);
4513  sym[sym_idx].Clear();
4514  return true;
4515  }
4516  }
4517  } else {
4518  if (resolver_addresses.find(nlist.n_value) !=
4519  resolver_addresses.end())
4520  type = eSymbolTypeResolver;
4521  }
4522  } else if (type == eSymbolTypeData || type == eSymbolTypeObjCClass ||
4523  type == eSymbolTypeObjCMetaClass ||
4524  type == eSymbolTypeObjCIVar) {
4525  // See if we can find a N_STSYM entry for any data symbols. If we do
4526  // find a match, and the name matches, then we can merge the two into
4527  // just the Static symbol to avoid duplicate entries in the symbol
4528  // table.
4529  std::pair<ValueToSymbolIndexMap::const_iterator,
4530  ValueToSymbolIndexMap::const_iterator>
4531  range;
4532  range = N_STSYM_addr_to_sym_idx.equal_range(nlist.n_value);
4533  if (range.first != range.second) {
4534  for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4535  pos != range.second; ++pos) {
4536  if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) ==
4537  sym[pos->second].GetMangled().GetName(
4538  Mangled::ePreferMangled)) {
4539  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4540  // We just need the flags from the linker symbol, so put these
4541  // flags into the N_STSYM flags to avoid duplicate symbols in
4542  // the symbol table.
4543  sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4544  sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4545  sym[sym_idx].Clear();
4546  return true;
4547  }
4548  }
4549  } else {
4550  // Combine N_GSYM stab entries with the non stab symbol.
4551  const char *gsym_name = sym[sym_idx]
4552  .GetMangled()
4553  .GetName(Mangled::ePreferMangled)
4554  .GetCString();
4555  if (gsym_name) {
4556  ConstNameToSymbolIndexMap::const_iterator pos =
4557  N_GSYM_name_to_sym_idx.find(gsym_name);
4558  if (pos != N_GSYM_name_to_sym_idx.end()) {
4559  const uint32_t GSYM_sym_idx = pos->second;
4560  m_nlist_idx_to_sym_idx[nlist_idx] = GSYM_sym_idx;
4561  // Copy the address, because often the N_GSYM address has an
4562  // invalid address of zero when the global is a common symbol.
4563  sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section);
4564  sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value);
4565  add_symbol_addr(
4566  sym[GSYM_sym_idx].GetAddress().GetFileAddress());
4567  // We just need the flags from the linker symbol, so put these
4568  // flags into the N_GSYM flags to avoid duplicate symbols in
4569  // the symbol table.
4570  sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4571  sym[sym_idx].Clear();
4572  return true;
4573  }
4574  }
4575  }
4576  }
4577  }
4578 
4579  sym[sym_idx].SetID(nlist_idx);
4580  sym[sym_idx].SetType(type);
4581  if (set_value) {
4582  sym[sym_idx].GetAddressRef().SetSection(symbol_section);
4583  sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
4584  if (symbol_section)
4585  add_symbol_addr(sym[sym_idx].GetAddress().GetFileAddress());
4586  }
4587  sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4588  if (nlist.n_desc & N_WEAK_REF)
4589  sym[sym_idx].SetIsWeak(true);
4590 
4591  if (symbol_byte_size > 0)
4592  sym[sym_idx].SetByteSize(symbol_byte_size);
4593 
4594  if (demangled_is_synthesized)
4595  sym[sym_idx].SetDemangledNameIsSynthesized(true);
4596 
4597  ++sym_idx;
4598  return true;
4599  };
4600 
4601  // First parse all the nlists but don't process them yet. See the next
4602  // comment for an explanation why.
4603  std::vector<struct nlist_64> nlists;
4604  nlists.reserve(symtab_load_command.nsyms);
4605  for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) {
4606  if (auto nlist =
4607  ParseNList(nlist_data, nlist_data_offset, nlist_byte_size))
4608  nlists.push_back(*nlist);
4609  else
4610  break;
4611  }
4612 
4613  // Now parse all the debug symbols. This is needed to merge non-debug
4614  // symbols in the next step. Non-debug symbols are always coalesced into
4615  // the debug symbol. Doing this in one step would mean that some symbols
4616  // won't be merged.
4617  nlist_idx = 0;
4618  for (auto &nlist : nlists) {
4619  if (!ParseSymbolLambda(nlist, nlist_idx++, DebugSymbols))
4620  break;
4621  }
4622 
4623  // Finally parse all the non debug symbols.
4624  nlist_idx = 0;
4625  for (auto &nlist : nlists) {
4626  if (!ParseSymbolLambda(nlist, nlist_idx++, NonDebugSymbols))
4627  break;
4628  }
4629 
4630  for (const auto &pos : reexport_shlib_needs_fixup) {
4631  const auto undef_pos = undefined_name_to_desc.find(pos.second);
4632  if (undef_pos != undefined_name_to_desc.end()) {
4633  const uint8_t dylib_ordinal =
4634  llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
4635  if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
4636  sym[pos.first].SetReExportedSymbolSharedLibrary(
4637  dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
4638  }
4639  }
4640  }
4641 
4642  // Count how many trie symbols we'll add to the symbol table
4643  int trie_symbol_table_augment_count = 0;
4644  for (auto &e : external_sym_trie_entries) {
4645  if (symbols_added.find(e.entry.address) == symbols_added.end())
4646  trie_symbol_table_augment_count++;
4647  }
4648 
4649  if (num_syms < sym_idx + trie_symbol_table_augment_count) {
4650  num_syms = sym_idx + trie_symbol_table_augment_count;
4651  sym = symtab->Resize(num_syms);
4652  }
4653  uint32_t synthetic_sym_id = symtab_load_command.nsyms;
4654 
4655  // Add symbols from the trie to the symbol table.
4656  for (auto &e : external_sym_trie_entries) {
4657  if (symbols_added.find(e.entry.address) != symbols_added.end())
4658  continue;
4659 
4660  // Find the section that this trie address is in, use that to annotate
4661  // symbol type as we add the trie address and name to the symbol table.
4662  Address symbol_addr;
4663  if (module_sp->ResolveFileAddress(e.entry.address, symbol_addr)) {
4664  SectionSP symbol_section(symbol_addr.GetSection());
4665  const char *symbol_name = e.entry.name.GetCString();
4666  bool demangled_is_synthesized = false;
4667  SymbolType type =
4668  GetSymbolType(symbol_name, demangled_is_synthesized, text_section_sp,
4669  data_section_sp, data_dirty_section_sp,
4670  data_const_section_sp, symbol_section);
4671 
4672  sym[sym_idx].SetType(type);
4673  if (symbol_section) {
4674  sym[sym_idx].SetID(synthetic_sym_id++);
4675  sym[sym_idx].GetMangled().SetMangledName(ConstString(symbol_name));
4676  if (demangled_is_synthesized)
4677  sym[sym_idx].SetDemangledNameIsSynthesized(true);
4678  sym[sym_idx].SetIsSynthetic(true);
4679  sym[sym_idx].SetExternal(true);
4680  sym[sym_idx].GetAddressRef() = symbol_addr;
4681  add_symbol_addr(symbol_addr.GetFileAddress());
4682  if (e.entry.flags & TRIE_SYMBOL_IS_THUMB)
4684  ++sym_idx;
4685  }
4686  }
4687  }
4688 
4689  if (function_starts_count > 0) {
4690  uint32_t num_synthetic_function_symbols = 0;
4691  for (i = 0; i < function_starts_count; ++i) {
4692  if (symbols_added.find(function_starts.GetEntryRef(i).addr) ==
4693  symbols_added.end())
4694  ++num_synthetic_function_symbols;
4695  }
4696 
4697  if (num_synthetic_function_symbols > 0) {
4698  if (num_syms < sym_idx + num_synthetic_function_symbols) {
4699  num_syms = sym_idx + num_synthetic_function_symbols;
4700  sym = symtab->Resize(num_syms);
4701  }
4702  for (i = 0; i < function_starts_count; ++i) {
4703  const FunctionStarts::Entry *func_start_entry =
4704  function_starts.GetEntryAtIndex(i);
4705  if (symbols_added.find(func_start_entry->addr) == symbols_added.end()) {
4706  addr_t symbol_file_addr = func_start_entry->addr;
4707  uint32_t symbol_flags = 0;
4708  if (func_start_entry->data)
4709  symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
4710  Address symbol_addr;
4711  if (module_sp->ResolveFileAddress(symbol_file_addr, symbol_addr)) {
4712  SectionSP symbol_section(symbol_addr.GetSection());
4713  uint32_t symbol_byte_size = 0;
4714  if (symbol_section) {
4715  const addr_t section_file_addr = symbol_section->GetFileAddress();
4716  const FunctionStarts::Entry *next_func_start_entry =
4717  function_starts.FindNextEntry(func_start_entry);
4718  const addr_t section_end_file_addr =
4719  section_file_addr + symbol_section->GetByteSize();
4720  if (next_func_start_entry) {
4721  addr_t next_symbol_file_addr = next_func_start_entry->addr;
4722  if (is_arm)
4723  next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4724  symbol_byte_size = std::min<lldb::addr_t>(
4725  next_symbol_file_addr - symbol_file_addr,
4726  section_end_file_addr - symbol_file_addr);
4727  } else {
4728  symbol_byte_size = section_end_file_addr - symbol_file_addr;
4729  }
4730  sym[sym_idx].SetID(synthetic_sym_id++);
4731  // Don't set the name for any synthetic symbols, the Symbol
4732  // object will generate one if needed when the name is accessed
4733  // via accessors.
4734  sym[sym_idx].GetMangled().SetDemangledName(ConstString());
4735  sym[sym_idx].SetType(eSymbolTypeCode);
4736  sym[sym_idx].SetIsSynthetic(true);
4737  sym[sym_idx].GetAddressRef() = symbol_addr;
4738  add_symbol_addr(symbol_addr.GetFileAddress());
4739  if (symbol_flags)
4740  sym[sym_idx].SetFlags(symbol_flags);
4741  if (symbol_byte_size)
4742  sym[sym_idx].SetByteSize(symbol_byte_size);
4743  ++sym_idx;
4744  }
4745  }
4746  }
4747  }
4748  }
4749  }
4750 
4751  // Trim our symbols down to just what we ended up with after removing any
4752  // symbols.
4753  if (sym_idx < num_syms) {
4754  num_syms = sym_idx;
4755  sym = symtab->Resize(num_syms);
4756  }
4757 
4758  // Now synthesize indirect symbols
4759  if (m_dysymtab.nindirectsyms != 0) {
4760  if (indirect_symbol_index_data.GetByteSize()) {
4761  NListIndexToSymbolIndexMap::const_iterator end_index_pos =
4762  m_nlist_idx_to_sym_idx.end();
4763 
4764  for (uint32_t sect_idx = 1; sect_idx < m_mach_sections.size();
4765  ++sect_idx) {
4766  if ((m_mach_sections[sect_idx].flags & SECTION_TYPE) ==
4767  S_SYMBOL_STUBS) {
4768  uint32_t symbol_stub_byte_size = m_mach_sections[sect_idx].reserved2;
4769  if (symbol_stub_byte_size == 0)
4770  continue;
4771 
4772  const uint32_t num_symbol_stubs =
4773  m_mach_sections[sect_idx].size / symbol_stub_byte_size;
4774 
4775  if (num_symbol_stubs == 0)
4776  continue;
4777 
4778  const uint32_t symbol_stub_index_offset =
4779  m_mach_sections[sect_idx].reserved1;
4780  for (uint32_t stub_idx = 0; stub_idx < num_symbol_stubs; ++stub_idx) {
4781  const uint32_t symbol_stub_index =
4782  symbol_stub_index_offset + stub_idx;
4783  const lldb::addr_t symbol_stub_addr =
4784  m_mach_sections[sect_idx].addr +
4785  (stub_idx * symbol_stub_byte_size);
4786  lldb::offset_t symbol_stub_offset = symbol_stub_index * 4;
4787  if (indirect_symbol_index_data.ValidOffsetForDataOfSize(
4788  symbol_stub_offset, 4)) {
4789  const uint32_t stub_sym_id =
4790  indirect_symbol_index_data.GetU32(&symbol_stub_offset);
4791  if (stub_sym_id & (INDIRECT_SYMBOL_ABS | INDIRECT_SYMBOL_LOCAL))
4792  continue;
4793 
4794  NListIndexToSymbolIndexMap::const_iterator index_pos =
4795  m_nlist_idx_to_sym_idx.find(stub_sym_id);
4796  Symbol *stub_symbol = nullptr;
4797  if (index_pos != end_index_pos) {
4798  // We have a remapping from the original nlist index to a
4799  // current symbol index, so just look this up by index
4800  stub_symbol = symtab->SymbolAtIndex(index_pos->second);
4801  } else {
4802  // We need to lookup a symbol using the original nlist symbol
4803  // index since this index is coming from the S_SYMBOL_STUBS
4804  stub_symbol = symtab->FindSymbolByID(stub_sym_id);
4805  }
4806 
4807  if (stub_symbol) {
4808  Address so_addr(symbol_stub_addr, section_list);
4809 
4810  if (stub_symbol->GetType() == eSymbolTypeUndefined) {
4811  // Change the external symbol into a trampoline that makes
4812  // sense These symbols were N_UNDF N_EXT, and are useless
4813  // to us, so we can re-use them so we don't have to make up
4814  // a synthetic symbol for no good reason.
4815  if (resolver_addresses.find(symbol_stub_addr) ==
4816  resolver_addresses.end())
4817  stub_symbol->SetType(eSymbolTypeTrampoline);
4818  else
4819  stub_symbol->SetType(eSymbolTypeResolver);
4820  stub_symbol->SetExternal(false);
4821  stub_symbol->GetAddressRef() = so_addr;
4822  stub_symbol->SetByteSize(symbol_stub_byte_size);
4823  } else {
4824  // Make a synthetic symbol to describe the trampoline stub
4825  Mangled stub_symbol_mangled_name(stub_symbol->GetMangled());
4826  if (sym_idx >= num_syms) {
4827  sym = symtab->Resize(++num_syms);
4828  stub_symbol = nullptr; // this pointer no longer valid
4829  }
4830  sym[sym_idx].SetID(synthetic_sym_id++);
4831  sym[sym_idx].GetMangled() = stub_symbol_mangled_name;
4832  if (resolver_addresses.find(symbol_stub_addr) ==
4833  resolver_addresses.end())
4834  sym[sym_idx].SetType(eSymbolTypeTrampoline);
4835  else
4836  sym[sym_idx].SetType(eSymbolTypeResolver);
4837  sym[sym_idx].SetIsSynthetic(true);
4838  sym[sym_idx].GetAddressRef() = so_addr;
4839  add_symbol_addr(so_addr.GetFileAddress());
4840  sym[sym_idx].SetByteSize(symbol_stub_byte_size);
4841  ++sym_idx;
4842  }
4843  } else {
4844  if (log)
4845  log->Warning("symbol stub referencing symbol table symbol "
4846  "%u that isn't in our minimal symbol table, "
4847  "fix this!!!",
4848  stub_sym_id);
4849  }
4850  }
4851  }
4852  }
4853  }
4854  }
4855  }
4856 
4857  if (!reexport_trie_entries.empty()) {
4858  for (const auto &e : reexport_trie_entries) {
4859  if (e.entry.import_name) {
4860  // Only add indirect symbols from the Trie entries if we didn't have
4861  // a N_INDR nlist entry for this already
4862  if (indirect_symbol_names.find(e.entry.name) ==
4863  indirect_symbol_names.end()) {
4864  // Make a synthetic symbol to describe re-exported symbol.
4865  if (sym_idx >= num_syms)
4866  sym = symtab->Resize(++num_syms);
4867  sym[sym_idx].SetID(synthetic_sym_id++);
4868  sym[sym_idx].GetMangled() = Mangled(e.entry.name);
4869  sym[sym_idx].SetType(eSymbolTypeReExported);
4870  sym[sym_idx].SetIsSynthetic(true);
4871  sym[sym_idx].SetReExportedSymbolName(e.entry.import_name);
4872  if (e.entry.other > 0 && e.entry.other <= dylib_files.GetSize()) {
4873  sym[sym_idx].SetReExportedSymbolSharedLibrary(
4874  dylib_files.GetFileSpecAtIndex(e.entry.other - 1));
4875  }
4876  ++sym_idx;
4877  }
4878  }
4879  }
4880  }
4881 
4882  // StreamFile s(stdout, false);
4883  // s.Printf ("Symbol table before CalculateSymbolSizes():\n");
4884  // symtab->Dump(&s, NULL, eSortOrderNone);
4885  // Set symbol byte sizes correctly since mach-o nlist entries don't have
4886  // sizes
4887  symtab->CalculateSymbolSizes();
4888 
4889  // s.Printf ("Symbol table after CalculateSymbolSizes():\n");
4890  // symtab->Dump(&s, NULL, eSortOrderNone);
4891 
4892  return symtab->GetNumSymbols();
4893 }
4894 
4896  ModuleSP module_sp(GetModule());
4897  if (module_sp) {
4898  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
4899  s->Printf("%p: ", static_cast<void *>(this));
4900  s->Indent();
4901  if (m_header.magic == MH_MAGIC_64 || m_header.magic == MH_CIGAM_64)
4902  s->PutCString("ObjectFileMachO64");
4903  else
4904  s->PutCString("ObjectFileMachO32");
4905 
4906  *s << ", file = '" << m_file;
4907  ModuleSpecList all_specs;
4908  ModuleSpec base_spec;
4910  base_spec, all_specs);
4911  for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
4912  *s << "', triple";
4913  if (e)
4914  s->Printf("[%d]", i);
4915  *s << " = ";
4916  *s << all_specs.GetModuleSpecRefAtIndex(i)
4917  .GetArchitecture()
4918  .GetTriple()
4919  .getTriple();
4920  }
4921  *s << "\n";
4922  SectionList *sections = GetSectionList();
4923  if (sections)
4924  sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
4925  UINT32_MAX);
4926 
4927  if (m_symtab_up)
4928  m_symtab_up->Dump(s, nullptr, eSortOrderNone);
4929  }
4930 }
4931 
4932 UUID ObjectFileMachO::GetUUID(const llvm::MachO::mach_header &header,
4933  const lldb_private::DataExtractor &data,
4934  lldb::offset_t lc_offset) {
4935  uint32_t i;
4936  llvm::MachO::uuid_command load_cmd;
4937 
4938  lldb::offset_t offset = lc_offset;
4939  for (i = 0; i < header.ncmds; ++i) {
4940  const lldb::offset_t cmd_offset = offset;
4941  if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
4942  break;
4943 
4944  if (load_cmd.cmd == LC_UUID) {
4945  const uint8_t *uuid_bytes = data.PeekData(offset, 16);
4946 
4947  if (uuid_bytes) {
4948  // OpenCL on Mac OS X uses the same UUID for each of its object files.
4949  // We pretend these object files have no UUID to prevent crashing.
4950 
4951  const uint8_t opencl_uuid[] = {0x8c, 0x8e, 0xb3, 0x9b, 0x3b, 0xa8,
4952  0x4b, 0x16, 0xb6, 0xa4, 0x27, 0x63,
4953  0xbb, 0x14, 0xf0, 0x0d};
4954 
4955  if (!memcmp(uuid_bytes, opencl_uuid, 16))
4956  return UUID();
4957 
4958  return UUID::fromOptionalData(uuid_bytes, 16);
4959  }
4960  return UUID();
4961  }
4962  offset = cmd_offset + load_cmd.cmdsize;
4963  }
4964  return UUID();
4965 }
4966 
4967 static llvm::StringRef GetOSName(uint32_t cmd) {
4968  switch (cmd) {
4969  case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
4970  return llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4971  case llvm::MachO::LC_VERSION_MIN_MACOSX:
4972  return llvm::Triple::getOSTypeName(llvm::Triple::MacOSX);
4973  case llvm::MachO::LC_VERSION_MIN_TVOS:
4974  return llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4975  case llvm::MachO::LC_VERSION_MIN_WATCHOS:
4976  return llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4977  default:
4978  llvm_unreachable("unexpected LC_VERSION load command");
4979  }
4980 }
4981 
4982 namespace {
4983 struct OSEnv {
4984  llvm::StringRef os_type;
4985  llvm::StringRef environment;
4986  OSEnv(uint32_t cmd) {
4987  switch (cmd) {
4988  case llvm::MachO::PLATFORM_MACOS:
4989  os_type = llvm::Triple::getOSTypeName(llvm::Triple::MacOSX);
4990  return;
4991  case llvm::MachO::PLATFORM_IOS:
4992  os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4993  return;
4994  case llvm::MachO::PLATFORM_TVOS:
4995  os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4996  return;
4997  case llvm::MachO::PLATFORM_WATCHOS:
4998  os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4999  return;
5000  // NEED_BRIDGEOS_TRIPLE case llvm::MachO::PLATFORM_BRIDGEOS:
5001  // NEED_BRIDGEOS_TRIPLE os_type =
5002  // llvm::Triple::getOSTypeName(llvm::Triple::BridgeOS);
5003  // NEED_BRIDGEOS_TRIPLE return;
5004  case llvm::MachO::PLATFORM_MACCATALYST:
5005  os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
5006  environment = llvm::Triple::getEnvironmentTypeName(llvm::Triple::MacABI);
5007  return;
5008  case llvm::MachO::PLATFORM_IOSSIMULATOR:
5009  os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
5010  environment =
5011  llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
5012  return;
5013  case llvm::MachO::PLATFORM_TVOSSIMULATOR:
5014  os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
5015  environment =
5016  llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
5017  return;
5018  case llvm::MachO::PLATFORM_WATCHOSSIMULATOR:
5019  os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
5020  environment =
5021  llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
5022  return;
5023  default: {
5026  LLDB_LOGF(log, "unsupported platform in LC_BUILD_VERSION");
5027  }
5028  }
5029  }
5030 };
5031 
5032 struct MinOS {
5033  uint32_t major_version, minor_version, patch_version;
5034  MinOS(uint32_t version)
5035  : major_version(version >> 16), minor_version((version >> 8) & 0xffu),
5036  patch_version(version & 0xffu) {}
5037 };
5038 } // namespace
5039 
5040 void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header,
5041  const lldb_private::DataExtractor &data,
5042  lldb::offset_t lc_offset,
5043  ModuleSpec &base_spec,
5044  lldb_private::ModuleSpecList &all_specs) {
5045  auto &base_arch = base_spec.GetArchitecture();
5046  base_arch.SetArchitecture(eArchTypeMachO, header.cputype, header.cpusubtype);
5047  if (!base_arch.IsValid())
5048  return;
5049 
5050  bool found_any = false;
5051  auto add_triple = [&](const llvm::Triple &triple) {
5052  auto spec = base_spec;
5053  spec.GetArchitecture().GetTriple() = triple;
5054  if (spec.GetArchitecture().IsValid()) {
5055  spec.GetUUID() = ObjectFileMachO::GetUUID(header, data, lc_offset);
5056  all_specs.Append(spec);
5057  found_any = true;
5058  }
5059  };
5060 
5061  // Set OS to an unspecified unknown or a "*" so it can match any OS
5062  llvm::Triple base_triple = base_arch.GetTriple();
5063  base_triple.setOS(llvm::Triple::UnknownOS);
5064  base_triple.setOSName(llvm::StringRef());
5065 
5066  if (header.filetype == MH_PRELOAD) {
5067  if (header.cputype == CPU_TYPE_ARM) {
5068  // If this is a 32-bit arm binary, and it's a standalone binary, force
5069  // the Vendor to Apple so we don't accidentally pick up the generic
5070  // armv7 ABI at runtime. Apple's armv7 ABI always uses r7 for the
5071  // frame pointer register; most other armv7 ABIs use a combination of
5072  // r7 and r11.
5073  base_triple.setVendor(llvm::Triple::Apple);
5074  } else {
5075  // Set vendor to an unspecified unknown or a "*" so it can match any
5076  // vendor This is required for correct behavior of EFI debugging on
5077  // x86_64
5078  base_triple.setVendor(llvm::Triple::UnknownVendor);
5079  base_triple.setVendorName(llvm::StringRef());
5080  }
5081  return add_triple(base_triple);
5082  }
5083 
5084  llvm::MachO::load_command load_cmd;
5085 
5086  // See if there is an LC_VERSION_MIN_* load command that can give
5087  // us the OS type.
5088  lldb::offset_t offset = lc_offset;
5089  for (uint32_t i = 0; i < header.ncmds; ++i) {
5090  const lldb::offset_t cmd_offset = offset;
5091  if (data.GetU32(&offset, &load_cmd, 2) == NULL)
5092  break;
5093 
5094  llvm::MachO::version_min_command version_min;
5095  switch (load_cmd.cmd) {
5096  case llvm::MachO::LC_VERSION_MIN_MACOSX:
5097  case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
5098  case llvm::MachO::LC_VERSION_MIN_TVOS:
5099  case llvm::MachO::LC_VERSION_MIN_WATCHOS: {
5100  if (load_cmd.cmdsize != sizeof(version_min))
5101  break;
5102  if (data.ExtractBytes(cmd_offset, sizeof(version_min),
5103  data.GetByteOrder(), &version_min) == 0)
5104  break;
5105  MinOS min_os(version_min.version);
5106  llvm::SmallString<32> os_name;
5107  llvm::raw_svector_ostream os(os_name);
5108  os << GetOSName(load_cmd.cmd) << min_os.major_version << '.'
5109  << min_os.minor_version << '.' << min_os.patch_version;
5110 
5111  auto triple = base_triple;
5112  triple.setOSName(os.str());
5113 
5114  // Disambiguate legacy simulator platforms.
5115  if (load_cmd.cmd != llvm::MachO::LC_VERSION_MIN_MACOSX &&
5116  (base_triple.getArch() == llvm::Triple::x86_64 ||
5117  base_triple.getArch() == llvm::Triple::x86)) {
5118  // The combination of legacy LC_VERSION_MIN load command and
5119  // x86 architecture always indicates a simulator environment.
5120  // The combination of LC_VERSION_MIN and arm architecture only
5121  // appears for native binaries. Back-deploying simulator
5122  // binaries on Apple Silicon Macs use the modern unambigous
5123  // LC_BUILD_VERSION load commands; no special handling required.
5124  triple.setEnvironment(llvm::Triple::Simulator);
5125  }
5126  add_triple(triple);
5127  break;
5128  }
5129  default:
5130  break;
5131  }
5132 
5133  offset = cmd_offset + load_cmd.cmdsize;
5134  }
5135 
5136  // See if there are LC_BUILD_VERSION load commands that can give
5137  // us the OS type.
5138  offset = lc_offset;
5139  for (uint32_t i = 0; i < header.ncmds; ++i) {
5140  const lldb::offset_t cmd_offset = offset;
5141  if (data.GetU32(&offset, &load_cmd, 2) == NULL)
5142  break;
5143 
5144  do {
5145  if (load_cmd.cmd == llvm::MachO::LC_BUILD_VERSION) {
5146  llvm::MachO::build_version_command build_version;
5147  if (load_cmd.cmdsize < sizeof(build_version)) {
5148  // Malformed load command.
5149  break;
5150  }
5151  if (data.ExtractBytes(cmd_offset, sizeof(build_version),
5152  data.GetByteOrder(), &build_version) == 0)
5153  break;
5154  MinOS min_os(build_version.minos);
5155  OSEnv os_env(build_version.platform);
5156  llvm::SmallString<16> os_name;
5157  llvm::raw_svector_ostream os(os_name);
5158  os << os_env.os_type << min_os.major_version << '.'
5159  << min_os.minor_version << '.' << min_os.patch_version;
5160  auto triple = base_triple;
5161  triple.setOSName(os.str());
5162  os_name.clear();
5163  if (!os_env.environment.empty())
5164  triple.setEnvironmentName(os_env.environment);
5165  add_triple(triple);
5166  }
5167  } while (0);
5168  offset = cmd_offset + load_cmd.cmdsize;
5169  }
5170 
5171  if (!found_any) {
5172  if (header.filetype == MH_KEXT_BUNDLE) {
5173  base_triple.setVendor(llvm::Triple::Apple);
5174  add_triple(base_triple);
5175  } else {
5176  // We didn't find a LC_VERSION_MIN load command and this isn't a KEXT
5177  // so lets not say our Vendor is Apple, leave it as an unspecified
5178  // unknown.
5179  base_triple.setVendor(llvm::Triple::UnknownVendor);
5180  base_triple.setVendorName(llvm::StringRef());
5181  add_triple(base_triple);
5182  }
5183  }
5184 }
5185 
5187  ModuleSP module_sp, const llvm::MachO::mach_header &header,
5188  const lldb_private::DataExtractor &data, lldb::offset_t lc_offset) {
5189  ModuleSpecList all_specs;
5190  ModuleSpec base_spec;
5191  GetAllArchSpecs(header, data, MachHeaderSizeFromMagic(header.magic),
5192  base_spec, all_specs);
5193 
5194  // If the object file offers multiple alternative load commands,
5195  // pick the one that matches the module.
5196  if (module_sp) {
5197  const ArchSpec &module_arch = module_sp->GetArchitecture();
5198  for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
5199  ArchSpec mach_arch =
5201  if (module_arch.IsCompatibleMatch(mach_arch))
5202  return mach_arch;
5203  }
5204  }
5205 
5206  // Return the first arch we found.
5207  if (all_specs.GetSize() == 0)
5208  return {};
5209  return all_specs.GetModuleSpecRefAtIndex(0).GetArchitecture();
5210 }
5211 
5213  ModuleSP module_sp(GetModule());
5214  if (module_sp) {
5215  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5217  return GetUUID(m_header, m_data, offset);
5218  }
5219  return UUID();
5220 }
5221 
5223  uint32_t count = 0;
5224  ModuleSP module_sp(GetModule());
5225  if (module_sp) {
5226  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5227  llvm::MachO::load_command load_cmd;
5229  std::vector<std::string> rpath_paths;
5230  std::vector<std::string> rpath_relative_paths;
5231  std::vector<std::string> at_exec_relative_paths;
5232  uint32_t i;
5233  for (i = 0; i < m_header.ncmds; ++i) {
5234  const uint32_t cmd_offset = offset;
5235  if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
5236  break;
5237 
5238  switch (load_cmd.cmd) {
5239  case LC_RPATH:
5240  case LC_LOAD_DYLIB:
5241  case LC_LOAD_WEAK_DYLIB:
5242  case LC_REEXPORT_DYLIB:
5243  case LC_LOAD_DYLINKER:
5244  case LC_LOADFVMLIB:
5245  case LC_LOAD_UPWARD_DYLIB: {
5246  uint32_t name_offset = cmd_offset + m_data.GetU32(&offset);
5247  const char *path = m_data.PeekCStr(name_offset);
5248  if (path) {
5249  if (load_cmd.cmd == LC_RPATH)
5250  rpath_paths.push_back(path);
5251  else {
5252  if (path[0] == '@') {
5253  if (strncmp(path, "@rpath", strlen("@rpath")) == 0)
5254  rpath_relative_paths.push_back(path + strlen("@rpath"));
5255  else if (strncmp(path, "@executable_path",
5256  strlen("@executable_path")) == 0)
5257  at_exec_relative_paths.push_back(path +
5258  strlen("@executable_path"));
5259  } else {
5260  FileSpec file_spec(path);
5261  if (files.AppendIfUnique(file_spec))
5262  count++;
5263  }
5264  }
5265  }
5266  } break;
5267 
5268  default:
5269  break;
5270  }
5271  offset = cmd_offset + load_cmd.cmdsize;
5272  }
5273 
5274  FileSpec this_file_spec(m_file);
5275  FileSystem::Instance().Resolve(this_file_spec);
5276 
5277  if (!rpath_paths.empty()) {
5278  // Fixup all LC_RPATH values to be absolute paths
5279  std::string loader_path("@loader_path");
5280  std::string executable_path("@executable_path");
5281  for (auto &rpath : rpath_paths) {
5282  if (llvm::StringRef(rpath).startswith(loader_path)) {
5283  rpath.erase(0, loader_path.size());
5284  rpath.insert(0, this_file_spec.GetDirectory().GetCString());
5285  } else if (llvm::StringRef(rpath).startswith(executable_path)) {
5286  rpath.erase(0, executable_path.size());
5287  rpath.insert(0, this_file_spec.GetDirectory().GetCString());
5288  }
5289  }
5290 
5291  for (const auto &rpath_relative_path : rpath_relative_paths) {
5292  for (const auto &rpath : rpath_paths) {
5293  std::string path = rpath;
5294  path += rpath_relative_path;
5295  // It is OK to resolve this path because we must find a file on disk
5296  // for us to accept it anyway if it is rpath relative.
5297  FileSpec file_spec(path);
5298  FileSystem::Instance().Resolve(file_spec);
5299  if (FileSystem::Instance().Exists(file_spec) &&
5300  files.AppendIfUnique(file_spec)) {
5301  count++;
5302  break;
5303  }
5304  }
5305  }
5306  }
5307 
5308  // We may have @executable_paths but no RPATHS. Figure those out here.
5309  // Only do this if this object file is the executable. We have no way to
5310  // get back to the actual executable otherwise, so we won't get the right
5311  // path.
5312  if (!at_exec_relative_paths.empty() && CalculateType() == eTypeExecutable) {
5313  FileSpec exec_dir = this_file_spec.CopyByRemovingLastPathComponent();
5314  for (const auto &at_exec_relative_path : at_exec_relative_paths) {
5315  FileSpec file_spec =
5316  exec_dir.CopyByAppendingPathComponent(at_exec_relative_path);
5317  if (FileSystem::Instance().Exists(file_spec) &&
5318  files.AppendIfUnique(file_spec))
5319  count++;
5320  }
5321  }
5322  }
5323  return count;
5324 }
5325 
5327  // If the object file is not an executable it can't hold the entry point.
5328  // m_entry_point_address is initialized to an invalid address, so we can just
5329  // return that. If m_entry_point_address is valid it means we've found it
5330  // already, so return the cached value.
5331 
5332  if ((!IsExecutable() && !IsDynamicLoader()) ||
5334  return m_entry_point_address;
5335  }
5336 
5337  // Otherwise, look for the UnixThread or Thread command. The data for the
5338  // Thread command is given in /usr/include/mach-o.h, but it is basically:
5339  //
5340  // uint32_t flavor - this is the flavor argument you would pass to
5341  // thread_get_state
5342  // uint32_t count - this is the count of longs in the thread state data
5343  // struct XXX_thread_state state - this is the structure from
5344  // <machine/thread_status.h> corresponding to the flavor.
5345  // <repeat this trio>
5346  //
5347  // So we just keep reading the various register flavors till we find the GPR
5348  // one, then read the PC out of there.
5349  // FIXME: We will need to have a "RegisterContext data provider" class at some
5350  // point that can get all the registers
5351  // out of data in this form & attach them to a given thread. That should
5352  // underlie the MacOS X User process plugin, and we'll also need it for the
5353  // MacOS X Core File process plugin. When we have that we can also use it
5354  // here.
5355  //
5356  // For now we hard-code the offsets and flavors we need:
5357  //
5358  //
5359 
5360  ModuleSP module_sp(GetModule());
5361  if (module_sp) {
5362  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5363  llvm::MachO::load_command load_cmd;
5365  uint32_t i;
5366  lldb::addr_t start_address = LLDB_INVALID_ADDRESS;
5367  bool done = false;
5368 
5369  for (i = 0; i < m_header.ncmds; ++i) {
5370  const lldb::offset_t cmd_offset = offset;
5371  if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
5372  break;
5373 
5374  switch (load_cmd.cmd) {
5375  case LC_UNIXTHREAD:
5376  case LC_THREAD: {
5377  while (offset < cmd_offset + load_cmd.cmdsize) {
5378  uint32_t flavor = m_data.GetU32(&offset);
5379  uint32_t count = m_data.GetU32(&offset);
5380  if (count == 0) {
5381  // We've gotten off somehow, log and exit;
5382  return m_entry_point_address;
5383  }
5384 
5385  switch (m_header.cputype) {
5386  case llvm::MachO::CPU_TYPE_ARM:
5387  if (flavor == 1 ||
5388  flavor == 9) // ARM_THREAD_STATE/ARM_THREAD_STATE32
5389  // from mach/arm/thread_status.h
5390  {
5391  offset += 60; // This is the offset of pc in the GPR thread state
5392  // data structure.
5393  start_address = m_data.GetU32(&offset);
5394  done = true;
5395  }
5396  break;
5399  if (flavor == 6) // ARM_THREAD_STATE64 from mach/arm/thread_status.h
5400  {
5401  offset += 256; // This is the offset of pc in the GPR thread state
5402  // data structure.
5403  start_address = m_data.GetU64(&offset);
5404  done = true;
5405  }
5406  break;
5407  case llvm::MachO::CPU_TYPE_I386:
5408  if (flavor ==
5409  1) // x86_THREAD_STATE32 from mach/i386/thread_status.h
5410  {
5411  offset += 40; // This is the offset of eip in the GPR thread state
5412  // data structure.
5413  start_address = m_data.GetU32(&offset);
5414  done = true;
5415  }
5416  break;
5417  case llvm::MachO::CPU_TYPE_X86_64:
5418  if (flavor ==
5419  4) // x86_THREAD_STATE64 from mach/i386/thread_status.h
5420  {
5421  offset += 16 * 8; // This is the offset of rip in the GPR thread
5422  // state data structure.
5423  start_address = m_data.GetU64(&offset);
5424  done = true;
5425  }
5426  break;
5427  default:
5428  return m_entry_point_address;
5429  }
5430  // Haven't found the GPR flavor yet, skip over the data for this
5431  // flavor:
5432  if (done)
5433  break;
5434  offset += count * 4;
5435  }
5436  } break;
5437  case LC_MAIN: {
5438  ConstString text_segment_name("__TEXT");
5439  uint64_t entryoffset = m_data.GetU64(&offset);
5440  SectionSP text_segment_sp =
5441  GetSectionList()->FindSectionByName(text_segment_name);
5442  if (text_segment_sp) {
5443  done = true;
5444  start_address = text_segment_sp->GetFileAddress() + entryoffset;
5445  }
5446  } break;
5447 
5448  default:
5449  break;
5450  }
5451  if (done)
5452  break;
5453 
5454  // Go to the next load command:
5455  offset = cmd_offset + load_cmd.cmdsize;
5456  }
5457 
5458  if (start_address == LLDB_INVALID_ADDRESS && IsDynamicLoader()) {
5459  if (GetSymtab()) {
5460  Symbol *dyld_start_sym = GetSymtab()->FindFirstSymbolWithNameAndType(
5461  ConstString("_dyld_start"), SymbolType::eSymbolTypeCode,
5462  Symtab::eDebugAny, Symtab::eVisibilityAny);
5463  if (dyld_start_sym && dyld_start_sym->GetAddress().IsValid()) {
5464  start_address = dyld_start_sym->GetAddress().GetFileAddress();
5465  }
5466  }
5467  }
5468 
5469  if (start_address != LLDB_INVALID_ADDRESS) {
5470  // We got the start address from the load commands, so now resolve that
5471  // address in the sections of this ObjectFile:
5473  start_address, GetSectionList())) {
5475  }
5476  } else {
5477  // We couldn't read the UnixThread load command - maybe it wasn't there.
5478  // As a fallback look for the "start" symbol in the main executable.
5479 
5480  ModuleSP module_sp(GetModule());
5481 
5482  if (module_sp) {
5483  SymbolContextList contexts;
5484  SymbolContext context;
5485  module_sp->FindSymbolsWithNameAndType(ConstString("start"),
5486  eSymbolTypeCode, contexts);
5487  if (contexts.GetSize()) {
5488  if (contexts.GetContextAtIndex(0, context))
5490  }
5491  }
5492  }
5493  }
5494 
5495  return m_entry_point_address;
5496 }
5497 
5499  lldb_private::Address header_addr;
5500  SectionList *section_list = GetSectionList();
5501  if (section_list) {
5502  SectionSP text_segment_sp(
5503  section_list->FindSectionByName(GetSegmentNameTEXT()));
5504  if (text_segment_sp) {
5505  header_addr.SetSection(text_segment_sp);
5506  header_addr.SetOffset(0);
5507  }
5508  }
5509  return header_addr;
5510 }
5511 
5513  ModuleSP module_sp(GetModule());
5514  if (module_sp) {
5515  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5519  FileRangeArray::Entry file_range;
5520  llvm::MachO::thread_command thread_cmd;
5521  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5522  const uint32_t cmd_offset = offset;
5523  if (m_data.GetU32(&offset, &thread_cmd, 2) == nullptr)
5524  break;
5525 
5526  if (thread_cmd.cmd == LC_THREAD) {
5527  file_range.SetRangeBase(offset);
5528  file_range.SetByteSize(thread_cmd.cmdsize - 8);
5529  m_thread_context_offsets.Append(file_range);
5530  }
5531  offset = cmd_offset + thread_cmd.cmdsize;
5532  }
5533  }
5534  }
5536 }
5537 
5539  std::string result;
5540  ModuleSP module_sp(GetModule());
5541  if (module_sp) {
5542  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5543 
5544  // First, look over the load commands for an LC_NOTE load command with
5545  // data_owner string "kern ver str" & use that if found.
5547  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5548  const uint32_t cmd_offset = offset;
5549  llvm::MachO::load_command lc;
5550  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
5551  break;
5552  if (lc.cmd == LC_NOTE) {
5553  char data_owner[17];
5554  m_data.CopyData(offset, 16, data_owner);
5555  data_owner[16] = '\0';
5556  offset += 16;
5557  uint64_t fileoff = m_data.GetU64_unchecked(&offset);
5558  uint64_t size = m_data.GetU64_unchecked(&offset);
5559 
5560  // "kern ver str" has a uint32_t version and then a nul terminated
5561  // c-string.
5562  if (strcmp("kern ver str", data_owner) == 0) {
5563  offset = fileoff;
5564  uint32_t version;
5565  if (m_data.GetU32(&offset, &version, 1) != nullptr) {
5566  if (version == 1) {
5567  uint32_t strsize = size - sizeof(uint32_t);
5568  char *buf = (char *)malloc(strsize);
5569  if (buf) {
5570  m_data.CopyData(offset, strsize, buf);
5571  buf[strsize - 1] = '\0';
5572  result = buf;
5573  if (buf)
5574  free(buf);
5575  return result;
5576  }
5577  }
5578  }
5579  }
5580  }
5581  offset = cmd_offset + lc.cmdsize;
5582  }
5583 
5584  // Second, make a pass over the load commands looking for an obsolete
5585  // LC_IDENT load command.
5586  offset = MachHeaderSizeFromMagic(m_header.magic);
5587  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5588  const uint32_t cmd_offset = offset;
5589  llvm::MachO::ident_command ident_command;
5590  if (m_data.GetU32(&offset, &ident_command, 2) == nullptr)
5591  break;
5592  if (ident_command.cmd == LC_IDENT && ident_command.cmdsize != 0) {
5593  char *buf = (char *)malloc(ident_command.cmdsize);
5594  if (buf != nullptr && m_data.CopyData(offset, ident_command.cmdsize,
5595  buf) == ident_command.cmdsize) {
5596  buf[ident_command.cmdsize - 1] = '\0';
5597  result = buf;
5598  }
5599  if (buf)
5600  free(buf);
5601  }
5602  offset = cmd_offset + ident_command.cmdsize;
5603  }
5604  }
5605  return result;
5606 }
5607 
5609  addr_t mask = 0;
5610  ModuleSP module_sp(GetModule());
5611  if (module_sp) {
5612  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5614  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5615  const uint32_t cmd_offset = offset;
5616  llvm::MachO::load_command lc;
5617  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
5618  break;
5619  if (lc.cmd == LC_NOTE) {
5620  char data_owner[17];
5621  m_data.CopyData(offset, 16, data_owner);
5622  data_owner[16] = '\0';
5623  offset += 16;
5624  uint64_t fileoff = m_data.GetU64_unchecked(&offset);
5625 
5626  // "addrable bits" has a uint32_t version and a uint32_t
5627  // number of bits used in addressing.
5628  if (strcmp("addrable bits", data_owner) == 0) {
5629  offset = fileoff;
5630  uint32_t version;
5631  if (m_data.GetU32(&offset, &version, 1) != nullptr) {
5632  if (version == 3) {
5633  uint32_t num_addr_bits = m_data.GetU32_unchecked(&offset);
5634  if (num_addr_bits != 0) {
5635  mask = ~((1ULL << num_addr_bits) - 1);
5636  }
5637  break;
5638  }
5639  }
5640  }
5641  }
5642  offset = cmd_offset + lc.cmdsize;
5643  }
5644  }
5645  return mask;
5646 }
5647 
5649  ObjectFile::BinaryType &type) {
5650  address = LLDB_INVALID_ADDRESS;
5651  uuid.Clear();
5652  ModuleSP module_sp(GetModule());
5653  if (module_sp) {
5654  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5656  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5657  const uint32_t cmd_offset = offset;
5658  llvm::MachO::load_command lc;
5659  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
5660  break;
5661  if (lc.cmd == LC_NOTE) {