LLDB  mainline
ObjectFileMachO.cpp
Go to the documentation of this file.
1 //===-- ObjectFileMachO.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ADT/ScopeExit.h"
10 #include "llvm/ADT/StringRef.h"
11 
16 #include "lldb/Core/Debugger.h"
17 #include "lldb/Core/FileSpecList.h"
18 #include "lldb/Core/Module.h"
19 #include "lldb/Core/ModuleSpec.h"
21 #include "lldb/Core/Progress.h"
22 #include "lldb/Core/Section.h"
23 #include "lldb/Core/StreamFile.h"
24 #include "lldb/Host/Host.h"
27 #include "lldb/Symbol/ObjectFile.h"
30 #include "lldb/Target/Platform.h"
31 #include "lldb/Target/Process.h"
33 #include "lldb/Target/Target.h"
34 #include "lldb/Target/Thread.h"
35 #include "lldb/Target/ThreadList.h"
36 #include "lldb/Utility/ArchSpec.h"
38 #include "lldb/Utility/FileSpec.h"
39 #include "lldb/Utility/LLDBLog.h"
40 #include "lldb/Utility/Log.h"
41 #include "lldb/Utility/RangeMap.h"
43 #include "lldb/Utility/Status.h"
45 #include "lldb/Utility/Timer.h"
46 #include "lldb/Utility/UUID.h"
47 
48 #include "lldb/Host/SafeMachO.h"
49 
50 #include "llvm/ADT/DenseSet.h"
51 #include "llvm/Support/FormatVariadic.h"
52 #include "llvm/Support/MemoryBuffer.h"
53 
54 #include "ObjectFileMachO.h"
55 
56 #if defined(__APPLE__)
57 #include <TargetConditionals.h>
58 // GetLLDBSharedCacheUUID() needs to call dlsym()
59 #include <dlfcn.h>
60 #include <mach/mach_init.h>
61 #include <mach/vm_map.h>
62 #include <lldb/Host/SafeMachO.h>
63 #endif
64 
65 #ifndef __APPLE__
67 #else
68 #include <uuid/uuid.h>
69 #endif
70 
71 #include <bitset>
72 #include <memory>
73 
74 // Unfortunately the signpost header pulls in the system MachO header, too.
75 #ifdef CPU_TYPE_ARM
76 #undef CPU_TYPE_ARM
77 #endif
78 #ifdef CPU_TYPE_ARM64
79 #undef CPU_TYPE_ARM64
80 #endif
81 #ifdef CPU_TYPE_ARM64_32
82 #undef CPU_TYPE_ARM64_32
83 #endif
84 #ifdef CPU_TYPE_I386
85 #undef CPU_TYPE_I386
86 #endif
87 #ifdef CPU_TYPE_X86_64
88 #undef CPU_TYPE_X86_64
89 #endif
90 #ifdef MH_DYLINKER
91 #undef MH_DYLINKER
92 #endif
93 #ifdef MH_OBJECT
94 #undef MH_OBJECT
95 #endif
96 #ifdef LC_VERSION_MIN_MACOSX
97 #undef LC_VERSION_MIN_MACOSX
98 #endif
99 #ifdef LC_VERSION_MIN_IPHONEOS
100 #undef LC_VERSION_MIN_IPHONEOS
101 #endif
102 #ifdef LC_VERSION_MIN_TVOS
103 #undef LC_VERSION_MIN_TVOS
104 #endif
105 #ifdef LC_VERSION_MIN_WATCHOS
106 #undef LC_VERSION_MIN_WATCHOS
107 #endif
108 #ifdef LC_BUILD_VERSION
109 #undef LC_BUILD_VERSION
110 #endif
111 #ifdef PLATFORM_MACOS
112 #undef PLATFORM_MACOS
113 #endif
114 #ifdef PLATFORM_MACCATALYST
115 #undef PLATFORM_MACCATALYST
116 #endif
117 #ifdef PLATFORM_IOS
118 #undef PLATFORM_IOS
119 #endif
120 #ifdef PLATFORM_IOSSIMULATOR
121 #undef PLATFORM_IOSSIMULATOR
122 #endif
123 #ifdef PLATFORM_TVOS
124 #undef PLATFORM_TVOS
125 #endif
126 #ifdef PLATFORM_TVOSSIMULATOR
127 #undef PLATFORM_TVOSSIMULATOR
128 #endif
129 #ifdef PLATFORM_WATCHOS
130 #undef PLATFORM_WATCHOS
131 #endif
132 #ifdef PLATFORM_WATCHOSSIMULATOR
133 #undef PLATFORM_WATCHOSSIMULATOR
134 #endif
135 
136 #define THUMB_ADDRESS_BIT_MASK 0xfffffffffffffffeull
137 using namespace lldb;
138 using namespace lldb_private;
139 using namespace llvm::MachO;
140 
142 
143 // Some structure definitions needed for parsing the dyld shared cache files
144 // found on iOS devices.
145 
147  char magic[16]; // e.g. "dyld_v0 i386", "dyld_v1 armv7", etc.
148  uint32_t mappingOffset; // file offset to first dyld_cache_mapping_info
149  uint32_t mappingCount; // number of dyld_cache_mapping_info entries
152  uint64_t dyldBaseAddress;
155  uint64_t slideInfoOffset;
156  uint64_t slideInfoSize;
159  uint8_t uuid[16]; // v1 and above, also recorded in dyld_all_image_infos v13
160  // and later
161 };
162 
163 static void PrintRegisterValue(RegisterContext *reg_ctx, const char *name,
164  const char *alt_name, size_t reg_byte_size,
165  Stream &data) {
166  const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name);
167  if (reg_info == nullptr)
168  reg_info = reg_ctx->GetRegisterInfoByName(alt_name);
169  if (reg_info) {
170  lldb_private::RegisterValue reg_value;
171  if (reg_ctx->ReadRegister(reg_info, reg_value)) {
172  if (reg_info->byte_size >= reg_byte_size)
173  data.Write(reg_value.GetBytes(), reg_byte_size);
174  else {
175  data.Write(reg_value.GetBytes(), reg_info->byte_size);
176  for (size_t i = 0, n = reg_byte_size - reg_info->byte_size; i < n; ++i)
177  data.PutChar(0);
178  }
179  return;
180  }
181  }
182  // Just write zeros if all else fails
183  for (size_t i = 0; i < reg_byte_size; ++i)
184  data.PutChar(0);
185 }
186 
188 public:
190  const DataExtractor &data)
191  : RegisterContextDarwin_x86_64(thread, 0) {
192  SetRegisterDataFrom_LC_THREAD(data);
193  }
194 
195  void InvalidateAllRegisters() override {
196  // Do nothing... registers are always valid...
197  }
198 
200  lldb::offset_t offset = 0;
201  SetError(GPRRegSet, Read, -1);
202  SetError(FPURegSet, Read, -1);
203  SetError(EXCRegSet, Read, -1);
204  bool done = false;
205 
206  while (!done) {
207  int flavor = data.GetU32(&offset);
208  if (flavor == 0)
209  done = true;
210  else {
211  uint32_t i;
212  uint32_t count = data.GetU32(&offset);
213  switch (flavor) {
214  case GPRRegSet:
215  for (i = 0; i < count; ++i)
216  (&gpr.rax)[i] = data.GetU64(&offset);
217  SetError(GPRRegSet, Read, 0);
218  done = true;
219 
220  break;
221  case FPURegSet:
222  // TODO: fill in FPU regs....
223  // SetError (FPURegSet, Read, -1);
224  done = true;
225 
226  break;
227  case EXCRegSet:
228  exc.trapno = data.GetU32(&offset);
229  exc.err = data.GetU32(&offset);
230  exc.faultvaddr = data.GetU64(&offset);
231  SetError(EXCRegSet, Read, 0);
232  done = true;
233  break;
234  case 7:
235  case 8:
236  case 9:
237  // fancy flavors that encapsulate of the above flavors...
238  break;
239 
240  default:
241  done = true;
242  break;
243  }
244  }
245  }
246  }
247 
248  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
249  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
250  if (reg_ctx_sp) {
251  RegisterContext *reg_ctx = reg_ctx_sp.get();
252 
253  data.PutHex32(GPRRegSet); // Flavor
254  data.PutHex32(GPRWordCount);
255  PrintRegisterValue(reg_ctx, "rax", nullptr, 8, data);
256  PrintRegisterValue(reg_ctx, "rbx", nullptr, 8, data);
257  PrintRegisterValue(reg_ctx, "rcx", nullptr, 8, data);
258  PrintRegisterValue(reg_ctx, "rdx", nullptr, 8, data);
259  PrintRegisterValue(reg_ctx, "rdi", nullptr, 8, data);
260  PrintRegisterValue(reg_ctx, "rsi", nullptr, 8, data);
261  PrintRegisterValue(reg_ctx, "rbp", nullptr, 8, data);
262  PrintRegisterValue(reg_ctx, "rsp", nullptr, 8, data);
263  PrintRegisterValue(reg_ctx, "r8", nullptr, 8, data);
264  PrintRegisterValue(reg_ctx, "r9", nullptr, 8, data);
265  PrintRegisterValue(reg_ctx, "r10", nullptr, 8, data);
266  PrintRegisterValue(reg_ctx, "r11", nullptr, 8, data);
267  PrintRegisterValue(reg_ctx, "r12", nullptr, 8, data);
268  PrintRegisterValue(reg_ctx, "r13", nullptr, 8, data);
269  PrintRegisterValue(reg_ctx, "r14", nullptr, 8, data);
270  PrintRegisterValue(reg_ctx, "r15", nullptr, 8, data);
271  PrintRegisterValue(reg_ctx, "rip", nullptr, 8, data);
272  PrintRegisterValue(reg_ctx, "rflags", nullptr, 8, data);
273  PrintRegisterValue(reg_ctx, "cs", nullptr, 8, data);
274  PrintRegisterValue(reg_ctx, "fs", nullptr, 8, data);
275  PrintRegisterValue(reg_ctx, "gs", nullptr, 8, data);
276 
277  // // Write out the FPU registers
278  // const size_t fpu_byte_size = sizeof(FPU);
279  // size_t bytes_written = 0;
280  // data.PutHex32 (FPURegSet);
281  // data.PutHex32 (fpu_byte_size/sizeof(uint64_t));
282  // bytes_written += data.PutHex32(0); // uint32_t pad[0]
283  // bytes_written += data.PutHex32(0); // uint32_t pad[1]
284  // bytes_written += WriteRegister (reg_ctx, "fcw", "fctrl", 2,
285  // data); // uint16_t fcw; // "fctrl"
286  // bytes_written += WriteRegister (reg_ctx, "fsw" , "fstat", 2,
287  // data); // uint16_t fsw; // "fstat"
288  // bytes_written += WriteRegister (reg_ctx, "ftw" , "ftag", 1,
289  // data); // uint8_t ftw; // "ftag"
290  // bytes_written += data.PutHex8 (0); // uint8_t pad1;
291  // bytes_written += WriteRegister (reg_ctx, "fop" , NULL, 2,
292  // data); // uint16_t fop; // "fop"
293  // bytes_written += WriteRegister (reg_ctx, "fioff", "ip", 4,
294  // data); // uint32_t ip; // "fioff"
295  // bytes_written += WriteRegister (reg_ctx, "fiseg", NULL, 2,
296  // data); // uint16_t cs; // "fiseg"
297  // bytes_written += data.PutHex16 (0); // uint16_t pad2;
298  // bytes_written += WriteRegister (reg_ctx, "dp", "fooff" , 4,
299  // data); // uint32_t dp; // "fooff"
300  // bytes_written += WriteRegister (reg_ctx, "foseg", NULL, 2,
301  // data); // uint16_t ds; // "foseg"
302  // bytes_written += data.PutHex16 (0); // uint16_t pad3;
303  // bytes_written += WriteRegister (reg_ctx, "mxcsr", NULL, 4,
304  // data); // uint32_t mxcsr;
305  // bytes_written += WriteRegister (reg_ctx, "mxcsrmask", NULL,
306  // 4, data);// uint32_t mxcsrmask;
307  // bytes_written += WriteRegister (reg_ctx, "stmm0", NULL,
308  // sizeof(MMSReg), data);
309  // bytes_written += WriteRegister (reg_ctx, "stmm1", NULL,
310  // sizeof(MMSReg), data);
311  // bytes_written += WriteRegister (reg_ctx, "stmm2", NULL,
312  // sizeof(MMSReg), data);
313  // bytes_written += WriteRegister (reg_ctx, "stmm3", NULL,
314  // sizeof(MMSReg), data);
315  // bytes_written += WriteRegister (reg_ctx, "stmm4", NULL,
316  // sizeof(MMSReg), data);
317  // bytes_written += WriteRegister (reg_ctx, "stmm5", NULL,
318  // sizeof(MMSReg), data);
319  // bytes_written += WriteRegister (reg_ctx, "stmm6", NULL,
320  // sizeof(MMSReg), data);
321  // bytes_written += WriteRegister (reg_ctx, "stmm7", NULL,
322  // sizeof(MMSReg), data);
323  // bytes_written += WriteRegister (reg_ctx, "xmm0" , NULL,
324  // sizeof(XMMReg), data);
325  // bytes_written += WriteRegister (reg_ctx, "xmm1" , NULL,
326  // sizeof(XMMReg), data);
327  // bytes_written += WriteRegister (reg_ctx, "xmm2" , NULL,
328  // sizeof(XMMReg), data);
329  // bytes_written += WriteRegister (reg_ctx, "xmm3" , NULL,
330  // sizeof(XMMReg), data);
331  // bytes_written += WriteRegister (reg_ctx, "xmm4" , NULL,
332  // sizeof(XMMReg), data);
333  // bytes_written += WriteRegister (reg_ctx, "xmm5" , NULL,
334  // sizeof(XMMReg), data);
335  // bytes_written += WriteRegister (reg_ctx, "xmm6" , NULL,
336  // sizeof(XMMReg), data);
337  // bytes_written += WriteRegister (reg_ctx, "xmm7" , NULL,
338  // sizeof(XMMReg), data);
339  // bytes_written += WriteRegister (reg_ctx, "xmm8" , NULL,
340  // sizeof(XMMReg), data);
341  // bytes_written += WriteRegister (reg_ctx, "xmm9" , NULL,
342  // sizeof(XMMReg), data);
343  // bytes_written += WriteRegister (reg_ctx, "xmm10", NULL,
344  // sizeof(XMMReg), data);
345  // bytes_written += WriteRegister (reg_ctx, "xmm11", NULL,
346  // sizeof(XMMReg), data);
347  // bytes_written += WriteRegister (reg_ctx, "xmm12", NULL,
348  // sizeof(XMMReg), data);
349  // bytes_written += WriteRegister (reg_ctx, "xmm13", NULL,
350  // sizeof(XMMReg), data);
351  // bytes_written += WriteRegister (reg_ctx, "xmm14", NULL,
352  // sizeof(XMMReg), data);
353  // bytes_written += WriteRegister (reg_ctx, "xmm15", NULL,
354  // sizeof(XMMReg), data);
355  //
356  // // Fill rest with zeros
357  // for (size_t i=0, n = fpu_byte_size - bytes_written; i<n; ++
358  // i)
359  // data.PutChar(0);
360 
361  // Write out the EXC registers
362  data.PutHex32(EXCRegSet);
363  data.PutHex32(EXCWordCount);
364  PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data);
365  PrintRegisterValue(reg_ctx, "err", nullptr, 4, data);
366  PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 8, data);
367  return true;
368  }
369  return false;
370  }
371 
372 protected:
373  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; }
374 
375  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; }
376 
377  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; }
378 
379  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
380  return 0;
381  }
382 
383  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
384  return 0;
385  }
386 
387  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
388  return 0;
389  }
390 };
391 
393 public:
395  const DataExtractor &data)
396  : RegisterContextDarwin_i386(thread, 0) {
397  SetRegisterDataFrom_LC_THREAD(data);
398  }
399 
400  void InvalidateAllRegisters() override {
401  // Do nothing... registers are always valid...
402  }
403 
405  lldb::offset_t offset = 0;
406  SetError(GPRRegSet, Read, -1);
407  SetError(FPURegSet, Read, -1);
408  SetError(EXCRegSet, Read, -1);
409  bool done = false;
410 
411  while (!done) {
412  int flavor = data.GetU32(&offset);
413  if (flavor == 0)
414  done = true;
415  else {
416  uint32_t i;
417  uint32_t count = data.GetU32(&offset);
418  switch (flavor) {
419  case GPRRegSet:
420  for (i = 0; i < count; ++i)
421  (&gpr.eax)[i] = data.GetU32(&offset);
422  SetError(GPRRegSet, Read, 0);
423  done = true;
424 
425  break;
426  case FPURegSet:
427  // TODO: fill in FPU regs....
428  // SetError (FPURegSet, Read, -1);
429  done = true;
430 
431  break;
432  case EXCRegSet:
433  exc.trapno = data.GetU32(&offset);
434  exc.err = data.GetU32(&offset);
435  exc.faultvaddr = data.GetU32(&offset);
436  SetError(EXCRegSet, Read, 0);
437  done = true;
438  break;
439  case 7:
440  case 8:
441  case 9:
442  // fancy flavors that encapsulate of the above flavors...
443  break;
444 
445  default:
446  done = true;
447  break;
448  }
449  }
450  }
451  }
452 
453  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
454  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
455  if (reg_ctx_sp) {
456  RegisterContext *reg_ctx = reg_ctx_sp.get();
457 
458  data.PutHex32(GPRRegSet); // Flavor
459  data.PutHex32(GPRWordCount);
460  PrintRegisterValue(reg_ctx, "eax", nullptr, 4, data);
461  PrintRegisterValue(reg_ctx, "ebx", nullptr, 4, data);
462  PrintRegisterValue(reg_ctx, "ecx", nullptr, 4, data);
463  PrintRegisterValue(reg_ctx, "edx", nullptr, 4, data);
464  PrintRegisterValue(reg_ctx, "edi", nullptr, 4, data);
465  PrintRegisterValue(reg_ctx, "esi", nullptr, 4, data);
466  PrintRegisterValue(reg_ctx, "ebp", nullptr, 4, data);
467  PrintRegisterValue(reg_ctx, "esp", nullptr, 4, data);
468  PrintRegisterValue(reg_ctx, "ss", nullptr, 4, data);
469  PrintRegisterValue(reg_ctx, "eflags", nullptr, 4, data);
470  PrintRegisterValue(reg_ctx, "eip", nullptr, 4, data);
471  PrintRegisterValue(reg_ctx, "cs", nullptr, 4, data);
472  PrintRegisterValue(reg_ctx, "ds", nullptr, 4, data);
473  PrintRegisterValue(reg_ctx, "es", nullptr, 4, data);
474  PrintRegisterValue(reg_ctx, "fs", nullptr, 4, data);
475  PrintRegisterValue(reg_ctx, "gs", nullptr, 4, data);
476 
477  // Write out the EXC registers
478  data.PutHex32(EXCRegSet);
479  data.PutHex32(EXCWordCount);
480  PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data);
481  PrintRegisterValue(reg_ctx, "err", nullptr, 4, data);
482  PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 4, data);
483  return true;
484  }
485  return false;
486  }
487 
488 protected:
489  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; }
490 
491  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; }
492 
493  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; }
494 
495  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
496  return 0;
497  }
498 
499  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
500  return 0;
501  }
502 
503  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
504  return 0;
505  }
506 };
507 
509 public:
511  const DataExtractor &data)
512  : RegisterContextDarwin_arm(thread, 0) {
513  SetRegisterDataFrom_LC_THREAD(data);
514  }
515 
516  void InvalidateAllRegisters() override {
517  // Do nothing... registers are always valid...
518  }
519 
521  lldb::offset_t offset = 0;
522  SetError(GPRRegSet, Read, -1);
523  SetError(FPURegSet, Read, -1);
524  SetError(EXCRegSet, Read, -1);
525  bool done = false;
526 
527  while (!done) {
528  int flavor = data.GetU32(&offset);
529  uint32_t count = data.GetU32(&offset);
530  lldb::offset_t next_thread_state = offset + (count * 4);
531  switch (flavor) {
532  case GPRAltRegSet:
533  case GPRRegSet:
534  // On ARM, the CPSR register is also included in the count but it is
535  // not included in gpr.r so loop until (count-1).
536  for (uint32_t i = 0; i < (count - 1); ++i) {
537  gpr.r[i] = data.GetU32(&offset);
538  }
539  // Save cpsr explicitly.
540  gpr.cpsr = data.GetU32(&offset);
541 
542  SetError(GPRRegSet, Read, 0);
543  offset = next_thread_state;
544  break;
545 
546  case FPURegSet: {
547  uint8_t *fpu_reg_buf = (uint8_t *)&fpu.floats.s[0];
548  const int fpu_reg_buf_size = sizeof(fpu.floats);
549  if (data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle,
550  fpu_reg_buf) == fpu_reg_buf_size) {
551  offset += fpu_reg_buf_size;
552  fpu.fpscr = data.GetU32(&offset);
553  SetError(FPURegSet, Read, 0);
554  } else {
555  done = true;
556  }
557  }
558  offset = next_thread_state;
559  break;
560 
561  case EXCRegSet:
562  if (count == 3) {
563  exc.exception = data.GetU32(&offset);
564  exc.fsr = data.GetU32(&offset);
565  exc.far = data.GetU32(&offset);
566  SetError(EXCRegSet, Read, 0);
567  }
568  done = true;
569  offset = next_thread_state;
570  break;
571 
572  // Unknown register set flavor, stop trying to parse.
573  default:
574  done = true;
575  }
576  }
577  }
578 
579  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
580  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
581  if (reg_ctx_sp) {
582  RegisterContext *reg_ctx = reg_ctx_sp.get();
583 
584  data.PutHex32(GPRRegSet); // Flavor
585  data.PutHex32(GPRWordCount);
586  PrintRegisterValue(reg_ctx, "r0", nullptr, 4, data);
587  PrintRegisterValue(reg_ctx, "r1", nullptr, 4, data);
588  PrintRegisterValue(reg_ctx, "r2", nullptr, 4, data);
589  PrintRegisterValue(reg_ctx, "r3", nullptr, 4, data);
590  PrintRegisterValue(reg_ctx, "r4", nullptr, 4, data);
591  PrintRegisterValue(reg_ctx, "r5", nullptr, 4, data);
592  PrintRegisterValue(reg_ctx, "r6", nullptr, 4, data);
593  PrintRegisterValue(reg_ctx, "r7", nullptr, 4, data);
594  PrintRegisterValue(reg_ctx, "r8", nullptr, 4, data);
595  PrintRegisterValue(reg_ctx, "r9", nullptr, 4, data);
596  PrintRegisterValue(reg_ctx, "r10", nullptr, 4, data);
597  PrintRegisterValue(reg_ctx, "r11", nullptr, 4, data);
598  PrintRegisterValue(reg_ctx, "r12", nullptr, 4, data);
599  PrintRegisterValue(reg_ctx, "sp", nullptr, 4, data);
600  PrintRegisterValue(reg_ctx, "lr", nullptr, 4, data);
601  PrintRegisterValue(reg_ctx, "pc", nullptr, 4, data);
602  PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data);
603 
604  // Write out the EXC registers
605  // data.PutHex32 (EXCRegSet);
606  // data.PutHex32 (EXCWordCount);
607  // WriteRegister (reg_ctx, "exception", NULL, 4, data);
608  // WriteRegister (reg_ctx, "fsr", NULL, 4, data);
609  // WriteRegister (reg_ctx, "far", NULL, 4, data);
610  return true;
611  }
612  return false;
613  }
614 
615 protected:
616  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
617 
618  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
619 
620  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
621 
622  int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
623 
624  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
625  return 0;
626  }
627 
628  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
629  return 0;
630  }
631 
632  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
633  return 0;
634  }
635 
636  int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
637  return -1;
638  }
639 };
640 
642 public:
644  const DataExtractor &data)
645  : RegisterContextDarwin_arm64(thread, 0) {
646  SetRegisterDataFrom_LC_THREAD(data);
647  }
648 
649  void InvalidateAllRegisters() override {
650  // Do nothing... registers are always valid...
651  }
652 
654  lldb::offset_t offset = 0;
655  SetError(GPRRegSet, Read, -1);
656  SetError(FPURegSet, Read, -1);
657  SetError(EXCRegSet, Read, -1);
658  bool done = false;
659  while (!done) {
660  int flavor = data.GetU32(&offset);
661  uint32_t count = data.GetU32(&offset);
662  lldb::offset_t next_thread_state = offset + (count * 4);
663  switch (flavor) {
664  case GPRRegSet:
665  // x0-x29 + fp + lr + sp + pc (== 33 64-bit registers) plus cpsr (1
666  // 32-bit register)
667  if (count >= (33 * 2) + 1) {
668  for (uint32_t i = 0; i < 29; ++i)
669  gpr.x[i] = data.GetU64(&offset);
670  gpr.fp = data.GetU64(&offset);
671  gpr.lr = data.GetU64(&offset);
672  gpr.sp = data.GetU64(&offset);
673  gpr.pc = data.GetU64(&offset);
674  gpr.cpsr = data.GetU32(&offset);
675  SetError(GPRRegSet, Read, 0);
676  }
677  offset = next_thread_state;
678  break;
679  case FPURegSet: {
680  uint8_t *fpu_reg_buf = (uint8_t *)&fpu.v[0];
681  const int fpu_reg_buf_size = sizeof(fpu);
682  if (fpu_reg_buf_size == count * sizeof(uint32_t) &&
683  data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle,
684  fpu_reg_buf) == fpu_reg_buf_size) {
685  SetError(FPURegSet, Read, 0);
686  } else {
687  done = true;
688  }
689  }
690  offset = next_thread_state;
691  break;
692  case EXCRegSet:
693  if (count == 4) {
694  exc.far = data.GetU64(&offset);
695  exc.esr = data.GetU32(&offset);
696  exc.exception = data.GetU32(&offset);
697  SetError(EXCRegSet, Read, 0);
698  }
699  offset = next_thread_state;
700  break;
701  default:
702  done = true;
703  break;
704  }
705  }
706  }
707 
708  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
709  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
710  if (reg_ctx_sp) {
711  RegisterContext *reg_ctx = reg_ctx_sp.get();
712 
713  data.PutHex32(GPRRegSet); // Flavor
714  data.PutHex32(GPRWordCount);
715  PrintRegisterValue(reg_ctx, "x0", nullptr, 8, data);
716  PrintRegisterValue(reg_ctx, "x1", nullptr, 8, data);
717  PrintRegisterValue(reg_ctx, "x2", nullptr, 8, data);
718  PrintRegisterValue(reg_ctx, "x3", nullptr, 8, data);
719  PrintRegisterValue(reg_ctx, "x4", nullptr, 8, data);
720  PrintRegisterValue(reg_ctx, "x5", nullptr, 8, data);
721  PrintRegisterValue(reg_ctx, "x6", nullptr, 8, data);
722  PrintRegisterValue(reg_ctx, "x7", nullptr, 8, data);
723  PrintRegisterValue(reg_ctx, "x8", nullptr, 8, data);
724  PrintRegisterValue(reg_ctx, "x9", nullptr, 8, data);
725  PrintRegisterValue(reg_ctx, "x10", nullptr, 8, data);
726  PrintRegisterValue(reg_ctx, "x11", nullptr, 8, data);
727  PrintRegisterValue(reg_ctx, "x12", nullptr, 8, data);
728  PrintRegisterValue(reg_ctx, "x13", nullptr, 8, data);
729  PrintRegisterValue(reg_ctx, "x14", nullptr, 8, data);
730  PrintRegisterValue(reg_ctx, "x15", nullptr, 8, data);
731  PrintRegisterValue(reg_ctx, "x16", nullptr, 8, data);
732  PrintRegisterValue(reg_ctx, "x17", nullptr, 8, data);
733  PrintRegisterValue(reg_ctx, "x18", nullptr, 8, data);
734  PrintRegisterValue(reg_ctx, "x19", nullptr, 8, data);
735  PrintRegisterValue(reg_ctx, "x20", nullptr, 8, data);
736  PrintRegisterValue(reg_ctx, "x21", nullptr, 8, data);
737  PrintRegisterValue(reg_ctx, "x22", nullptr, 8, data);
738  PrintRegisterValue(reg_ctx, "x23", nullptr, 8, data);
739  PrintRegisterValue(reg_ctx, "x24", nullptr, 8, data);
740  PrintRegisterValue(reg_ctx, "x25", nullptr, 8, data);
741  PrintRegisterValue(reg_ctx, "x26", nullptr, 8, data);
742  PrintRegisterValue(reg_ctx, "x27", nullptr, 8, data);
743  PrintRegisterValue(reg_ctx, "x28", nullptr, 8, data);
744  PrintRegisterValue(reg_ctx, "fp", nullptr, 8, data);
745  PrintRegisterValue(reg_ctx, "lr", nullptr, 8, data);
746  PrintRegisterValue(reg_ctx, "sp", nullptr, 8, data);
747  PrintRegisterValue(reg_ctx, "pc", nullptr, 8, data);
748  PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data);
749  data.PutHex32(0); // uint32_t pad at the end
750 
751  // Write out the EXC registers
752  data.PutHex32(EXCRegSet);
753  data.PutHex32(EXCWordCount);
754  PrintRegisterValue(reg_ctx, "far", nullptr, 8, data);
755  PrintRegisterValue(reg_ctx, "esr", nullptr, 4, data);
756  PrintRegisterValue(reg_ctx, "exception", nullptr, 4, data);
757  return true;
758  }
759  return false;
760  }
761 
762 protected:
763  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
764 
765  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
766 
767  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
768 
769  int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
770 
771  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
772  return 0;
773  }
774 
775  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
776  return 0;
777  }
778 
779  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
780  return 0;
781  }
782 
783  int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
784  return -1;
785  }
786 };
787 
789  switch (magic) {
790  case MH_MAGIC:
791  case MH_CIGAM:
792  return sizeof(struct llvm::MachO::mach_header);
793 
794  case MH_MAGIC_64:
795  case MH_CIGAM_64:
796  return sizeof(struct llvm::MachO::mach_header_64);
797  break;
798 
799  default:
800  break;
801  }
802  return 0;
803 }
804 
805 #define MACHO_NLIST_ARM_SYMBOL_IS_THUMB 0x0008
806 
808 
810  PluginManager::RegisterPlugin(
811  GetPluginNameStatic(), GetPluginDescriptionStatic(), CreateInstance,
812  CreateMemoryInstance, GetModuleSpecifications, SaveCore);
813 }
814 
816  PluginManager::UnregisterPlugin(CreateInstance);
817 }
818 
819 ObjectFile *ObjectFileMachO::CreateInstance(const lldb::ModuleSP &module_sp,
820  DataBufferSP data_sp,
821  lldb::offset_t data_offset,
822  const FileSpec *file,
823  lldb::offset_t file_offset,
824  lldb::offset_t length) {
825  if (!data_sp) {
826  data_sp = MapFileData(*file, length, file_offset);
827  if (!data_sp)
828  return nullptr;
829  data_offset = 0;
830  }
831 
832  if (!ObjectFileMachO::MagicBytesMatch(data_sp, data_offset, length))
833  return nullptr;
834 
835  // Update the data to contain the entire file if it doesn't already
836  if (data_sp->GetByteSize() < length) {
837  data_sp = MapFileData(*file, length, file_offset);
838  if (!data_sp)
839  return nullptr;
840  data_offset = 0;
841  }
842  auto objfile_up = std::make_unique<ObjectFileMachO>(
843  module_sp, data_sp, data_offset, file, file_offset, length);
844  if (!objfile_up || !objfile_up->ParseHeader())
845  return nullptr;
846 
847  return objfile_up.release();
848 }
849 
851  const lldb::ModuleSP &module_sp, WritableDataBufferSP data_sp,
852  const ProcessSP &process_sp, lldb::addr_t header_addr) {
853  if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) {
854  std::unique_ptr<ObjectFile> objfile_up(
855  new ObjectFileMachO(module_sp, data_sp, process_sp, header_addr));
856  if (objfile_up.get() && objfile_up->ParseHeader())
857  return objfile_up.release();
858  }
859  return nullptr;
860 }
861 
863  const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp,
864  lldb::offset_t data_offset, lldb::offset_t file_offset,
866  const size_t initial_count = specs.GetSize();
867 
868  if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) {
869  DataExtractor data;
870  data.SetData(data_sp);
871  llvm::MachO::mach_header header;
872  if (ParseHeader(data, &data_offset, header)) {
873  size_t header_and_load_cmds =
874  header.sizeofcmds + MachHeaderSizeFromMagic(header.magic);
875  if (header_and_load_cmds >= data_sp->GetByteSize()) {
876  data_sp = MapFileData(file, header_and_load_cmds, file_offset);
877  data.SetData(data_sp);
878  data_offset = MachHeaderSizeFromMagic(header.magic);
879  }
880  if (data_sp) {
881  ModuleSpec base_spec;
882  base_spec.GetFileSpec() = file;
883  base_spec.SetObjectOffset(file_offset);
884  base_spec.SetObjectSize(length);
885  GetAllArchSpecs(header, data, data_offset, base_spec, specs);
886  }
887  }
888  }
889  return specs.GetSize() - initial_count;
890 }
891 
893  static ConstString g_segment_name_TEXT("__TEXT");
894  return g_segment_name_TEXT;
895 }
896 
898  static ConstString g_segment_name_DATA("__DATA");
899  return g_segment_name_DATA;
900 }
901 
903  static ConstString g_segment_name("__DATA_DIRTY");
904  return g_segment_name;
905 }
906 
908  static ConstString g_segment_name("__DATA_CONST");
909  return g_segment_name;
910 }
911 
913  static ConstString g_segment_name_OBJC("__OBJC");
914  return g_segment_name_OBJC;
915 }
916 
918  static ConstString g_section_name_LINKEDIT("__LINKEDIT");
919  return g_section_name_LINKEDIT;
920 }
921 
923  static ConstString g_section_name("__DWARF");
924  return g_section_name;
925 }
926 
928  static ConstString g_section_name_eh_frame("__eh_frame");
929  return g_section_name_eh_frame;
930 }
931 
932 bool ObjectFileMachO::MagicBytesMatch(DataBufferSP data_sp,
933  lldb::addr_t data_offset,
934  lldb::addr_t data_length) {
935  DataExtractor data;
936  data.SetData(data_sp, data_offset, data_length);
937  lldb::offset_t offset = 0;
938  uint32_t magic = data.GetU32(&offset);
939  return MachHeaderSizeFromMagic(magic) != 0;
940 }
941 
942 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp,
943  DataBufferSP data_sp,
944  lldb::offset_t data_offset,
945  const FileSpec *file,
946  lldb::offset_t file_offset,
947  lldb::offset_t length)
948  : ObjectFile(module_sp, file, file_offset, length, data_sp, data_offset),
949  m_mach_segments(), m_mach_sections(), m_entry_point_address(),
950  m_thread_context_offsets(), m_thread_context_offsets_valid(false),
951  m_reexported_dylibs(), m_allow_assembly_emulation_unwind_plans(true) {
952  ::memset(&m_header, 0, sizeof(m_header));
953  ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
954 }
955 
956 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp,
957  lldb::WritableDataBufferSP header_data_sp,
958  const lldb::ProcessSP &process_sp,
959  lldb::addr_t header_addr)
960  : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
961  m_mach_segments(), m_mach_sections(), m_entry_point_address(),
962  m_thread_context_offsets(), m_thread_context_offsets_valid(false),
963  m_reexported_dylibs(), m_allow_assembly_emulation_unwind_plans(true) {
964  ::memset(&m_header, 0, sizeof(m_header));
965  ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
966 }
967 
969  lldb::offset_t *data_offset_ptr,
970  llvm::MachO::mach_header &header) {
972  // Leave magic in the original byte order
973  header.magic = data.GetU32(data_offset_ptr);
974  bool can_parse = false;
975  bool is_64_bit = false;
976  switch (header.magic) {
977  case MH_MAGIC:
979  data.SetAddressByteSize(4);
980  can_parse = true;
981  break;
982 
983  case MH_MAGIC_64:
985  data.SetAddressByteSize(8);
986  can_parse = true;
987  is_64_bit = true;
988  break;
989 
990  case MH_CIGAM:
993  : eByteOrderBig);
994  data.SetAddressByteSize(4);
995  can_parse = true;
996  break;
997 
998  case MH_CIGAM_64:
1001  : eByteOrderBig);
1002  data.SetAddressByteSize(8);
1003  is_64_bit = true;
1004  can_parse = true;
1005  break;
1006 
1007  default:
1008  break;
1009  }
1010 
1011  if (can_parse) {
1012  data.GetU32(data_offset_ptr, &header.cputype, 6);
1013  if (is_64_bit)
1014  *data_offset_ptr += 4;
1015  return true;
1016  } else {
1017  memset(&header, 0, sizeof(header));
1018  }
1019  return false;
1020 }
1021 
1023  ModuleSP module_sp(GetModule());
1024  if (!module_sp)
1025  return false;
1026 
1027  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
1028  bool can_parse = false;
1029  lldb::offset_t offset = 0;
1031  // Leave magic in the original byte order
1032  m_header.magic = m_data.GetU32(&offset);
1033  switch (m_header.magic) {
1034  case MH_MAGIC:
1037  can_parse = true;
1038  break;
1039 
1040  case MH_MAGIC_64:
1043  can_parse = true;
1044  break;
1045 
1046  case MH_CIGAM:
1049  : eByteOrderBig);
1051  can_parse = true;
1052  break;
1053 
1054  case MH_CIGAM_64:
1057  : eByteOrderBig);
1059  can_parse = true;
1060  break;
1061 
1062  default:
1063  break;
1064  }
1065 
1066  if (can_parse) {
1067  m_data.GetU32(&offset, &m_header.cputype, 6);
1068 
1069  ModuleSpecList all_specs;
1070  ModuleSpec base_spec;
1072  base_spec, all_specs);
1073 
1074  for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
1075  ArchSpec mach_arch =
1077 
1078  // Check if the module has a required architecture
1079  const ArchSpec &module_arch = module_sp->GetArchitecture();
1080  if (module_arch.IsValid() && !module_arch.IsCompatibleMatch(mach_arch))
1081  continue;
1082 
1083  if (SetModulesArchitecture(mach_arch)) {
1084  const size_t header_and_lc_size =
1085  m_header.sizeofcmds + MachHeaderSizeFromMagic(m_header.magic);
1086  if (m_data.GetByteSize() < header_and_lc_size) {
1087  DataBufferSP data_sp;
1088  ProcessSP process_sp(m_process_wp.lock());
1089  if (process_sp) {
1090  data_sp = ReadMemory(process_sp, m_memory_addr, header_and_lc_size);
1091  } else {
1092  // Read in all only the load command data from the file on disk
1093  data_sp = MapFileData(m_file, header_and_lc_size, m_file_offset);
1094  if (data_sp->GetByteSize() != header_and_lc_size)
1095  continue;
1096  }
1097  if (data_sp)
1098  m_data.SetData(data_sp);
1099  }
1100  }
1101  return true;
1102  }
1103  // None found.
1104  return false;
1105  } else {
1106  memset(&m_header, 0, sizeof(struct llvm::MachO::mach_header));
1107  }
1108  return false;
1109 }
1110 
1112  return m_data.GetByteOrder();
1113 }
1114 
1116  return m_header.filetype == MH_EXECUTE;
1117 }
1118 
1120  return m_header.filetype == MH_DYLINKER;
1121 }
1122 
1124  return m_header.flags & MH_DYLIB_IN_CACHE;
1125 }
1126 
1128  return m_data.GetAddressByteSize();
1129 }
1130 
1132  Symtab *symtab = GetSymtab();
1133  if (!symtab)
1134  return AddressClass::eUnknown;
1135 
1136  Symbol *symbol = symtab->FindSymbolContainingFileAddress(file_addr);
1137  if (symbol) {
1138  if (symbol->ValueIsAddress()) {
1139  SectionSP section_sp(symbol->GetAddressRef().GetSection());
1140  if (section_sp) {
1141  const lldb::SectionType section_type = section_sp->GetType();
1142  switch (section_type) {
1143  case eSectionTypeInvalid:
1144  return AddressClass::eUnknown;
1145 
1146  case eSectionTypeCode:
1147  if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1148  // For ARM we have a bit in the n_desc field of the symbol that
1149  // tells us ARM/Thumb which is bit 0x0008.
1150  if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB)
1151  return AddressClass::eCodeAlternateISA;
1152  }
1153  return AddressClass::eCode;
1154 
1155  case eSectionTypeContainer:
1156  return AddressClass::eUnknown;
1157 
1158  case eSectionTypeData:
1162  case eSectionTypeData4:
1163  case eSectionTypeData8:
1164  case eSectionTypeData16:
1166  case eSectionTypeZeroFill:
1169  case eSectionTypeGoSymtab:
1170  return AddressClass::eData;
1171 
1172  case eSectionTypeDebug:
1207  return AddressClass::eDebug;
1208 
1209  case eSectionTypeEHFrame:
1210  case eSectionTypeARMexidx:
1211  case eSectionTypeARMextab:
1213  return AddressClass::eRuntime;
1214 
1220  case eSectionTypeOther:
1221  return AddressClass::eUnknown;
1222  }
1223  }
1224  }
1225 
1226  const SymbolType symbol_type = symbol->GetType();
1227  switch (symbol_type) {
1228  case eSymbolTypeAny:
1229  return AddressClass::eUnknown;
1230  case eSymbolTypeAbsolute:
1231  return AddressClass::eUnknown;
1232 
1233  case eSymbolTypeCode:
1234  case eSymbolTypeTrampoline:
1235  case eSymbolTypeResolver:
1236  if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1237  // For ARM we have a bit in the n_desc field of the symbol that tells
1238  // us ARM/Thumb which is bit 0x0008.
1239  if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB)
1240  return AddressClass::eCodeAlternateISA;
1241  }
1242  return AddressClass::eCode;
1243 
1244  case eSymbolTypeData:
1245  return AddressClass::eData;
1246  case eSymbolTypeRuntime:
1247  return AddressClass::eRuntime;
1248  case eSymbolTypeException:
1249  return AddressClass::eRuntime;
1250  case eSymbolTypeSourceFile:
1251  return AddressClass::eDebug;
1252  case eSymbolTypeHeaderFile:
1253  return AddressClass::eDebug;
1254  case eSymbolTypeObjectFile:
1255  return AddressClass::eDebug;
1257  return AddressClass::eDebug;
1258  case eSymbolTypeBlock:
1259  return AddressClass::eDebug;
1260  case eSymbolTypeLocal:
1261  return AddressClass::eData;
1262  case eSymbolTypeParam:
1263  return AddressClass::eData;
1264  case eSymbolTypeVariable:
1265  return AddressClass::eData;
1267  return AddressClass::eDebug;
1268  case eSymbolTypeLineEntry:
1269  return AddressClass::eDebug;
1270  case eSymbolTypeLineHeader:
1271  return AddressClass::eDebug;
1272  case eSymbolTypeScopeBegin:
1273  return AddressClass::eDebug;
1274  case eSymbolTypeScopeEnd:
1275  return AddressClass::eDebug;
1276  case eSymbolTypeAdditional:
1277  return AddressClass::eUnknown;
1278  case eSymbolTypeCompiler:
1279  return AddressClass::eDebug;
1281  return AddressClass::eDebug;
1282  case eSymbolTypeUndefined:
1283  return AddressClass::eUnknown;
1284  case eSymbolTypeObjCClass:
1285  return AddressClass::eRuntime;
1287  return AddressClass::eRuntime;
1288  case eSymbolTypeObjCIVar:
1289  return AddressClass::eRuntime;
1290  case eSymbolTypeReExported:
1291  return AddressClass::eRuntime;
1292  }
1293  }
1294  return AddressClass::eUnknown;
1295 }
1296 
1298  if (m_dysymtab.cmd == 0) {
1299  ModuleSP module_sp(GetModule());
1300  if (module_sp) {
1302  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1303  const lldb::offset_t load_cmd_offset = offset;
1304 
1305  llvm::MachO::load_command lc;
1306  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
1307  break;
1308  if (lc.cmd == LC_DYSYMTAB) {
1309  m_dysymtab.cmd = lc.cmd;
1310  m_dysymtab.cmdsize = lc.cmdsize;
1311  if (m_data.GetU32(&offset, &m_dysymtab.ilocalsym,
1312  (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2) ==
1313  nullptr) {
1314  // Clear m_dysymtab if we were unable to read all items from the
1315  // load command
1316  ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
1317  }
1318  }
1319  offset = load_cmd_offset + lc.cmdsize;
1320  }
1321  }
1322  }
1323  if (m_dysymtab.cmd)
1324  return m_dysymtab.nlocalsym <= 1;
1325  return false;
1326 }
1327 
1329  EncryptedFileRanges result;
1331 
1332  llvm::MachO::encryption_info_command encryption_cmd;
1333  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1334  const lldb::offset_t load_cmd_offset = offset;
1335  if (m_data.GetU32(&offset, &encryption_cmd, 2) == nullptr)
1336  break;
1337 
1338  // LC_ENCRYPTION_INFO and LC_ENCRYPTION_INFO_64 have the same sizes for the
1339  // 3 fields we care about, so treat them the same.
1340  if (encryption_cmd.cmd == LC_ENCRYPTION_INFO ||
1341  encryption_cmd.cmd == LC_ENCRYPTION_INFO_64) {
1342  if (m_data.GetU32(&offset, &encryption_cmd.cryptoff, 3)) {
1343  if (encryption_cmd.cryptid != 0) {
1345  entry.SetRangeBase(encryption_cmd.cryptoff);
1346  entry.SetByteSize(encryption_cmd.cryptsize);
1347  result.Append(entry);
1348  }
1349  }
1350  }
1351  offset = load_cmd_offset + encryption_cmd.cmdsize;
1352  }
1353 
1354  return result;
1355 }
1356 
1358  llvm::MachO::segment_command_64 &seg_cmd, uint32_t cmd_idx) {
1359  if (m_length == 0 || seg_cmd.filesize == 0)
1360  return;
1361 
1362  if (IsSharedCacheBinary() && !IsInMemory()) {
1363  // In shared cache images, the load commands are relative to the
1364  // shared cache file, and not the specific image we are
1365  // examining. Let's fix this up so that it looks like a normal
1366  // image.
1367  if (strncmp(seg_cmd.segname, "__TEXT", sizeof(seg_cmd.segname)) == 0)
1368  m_text_address = seg_cmd.vmaddr;
1369  if (strncmp(seg_cmd.segname, "__LINKEDIT", sizeof(seg_cmd.segname)) == 0)
1370  m_linkedit_original_offset = seg_cmd.fileoff;
1371 
1372  seg_cmd.fileoff = seg_cmd.vmaddr - m_text_address;
1373  }
1374 
1375  if (seg_cmd.fileoff > m_length) {
1376  // We have a load command that says it extends past the end of the file.
1377  // This is likely a corrupt file. We don't have any way to return an error
1378  // condition here (this method was likely invoked from something like
1379  // ObjectFile::GetSectionList()), so we just null out the section contents,
1380  // and dump a message to stdout. The most common case here is core file
1381  // debugging with a truncated file.
1382  const char *lc_segment_name =
1383  seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1384  GetModule()->ReportWarning(
1385  "load command %u %s has a fileoff (0x%" PRIx64
1386  ") that extends beyond the end of the file (0x%" PRIx64
1387  "), ignoring this section",
1388  cmd_idx, lc_segment_name, seg_cmd.fileoff, m_length);
1389 
1390  seg_cmd.fileoff = 0;
1391  seg_cmd.filesize = 0;
1392  }
1393 
1394  if (seg_cmd.fileoff + seg_cmd.filesize > m_length) {
1395  // We have a load command that says it extends past the end of the file.
1396  // This is likely a corrupt file. We don't have any way to return an error
1397  // condition here (this method was likely invoked from something like
1398  // ObjectFile::GetSectionList()), so we just null out the section contents,
1399  // and dump a message to stdout. The most common case here is core file
1400  // debugging with a truncated file.
1401  const char *lc_segment_name =
1402  seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1403  GetModule()->ReportWarning(
1404  "load command %u %s has a fileoff + filesize (0x%" PRIx64
1405  ") that extends beyond the end of the file (0x%" PRIx64
1406  "), the segment will be truncated to match",
1407  cmd_idx, lc_segment_name, seg_cmd.fileoff + seg_cmd.filesize, m_length);
1408 
1409  // Truncate the length
1410  seg_cmd.filesize = m_length - seg_cmd.fileoff;
1411  }
1412 }
1413 
1414 static uint32_t
1415 GetSegmentPermissions(const llvm::MachO::segment_command_64 &seg_cmd) {
1416  uint32_t result = 0;
1417  if (seg_cmd.initprot & VM_PROT_READ)
1418  result |= ePermissionsReadable;
1419  if (seg_cmd.initprot & VM_PROT_WRITE)
1420  result |= ePermissionsWritable;
1421  if (seg_cmd.initprot & VM_PROT_EXECUTE)
1422  result |= ePermissionsExecutable;
1423  return result;
1424 }
1425 
1427  ConstString section_name) {
1428 
1429  if (flags & (S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS))
1430  return eSectionTypeCode;
1431 
1432  uint32_t mach_sect_type = flags & SECTION_TYPE;
1433  static ConstString g_sect_name_objc_data("__objc_data");
1434  static ConstString g_sect_name_objc_msgrefs("__objc_msgrefs");
1435  static ConstString g_sect_name_objc_selrefs("__objc_selrefs");
1436  static ConstString g_sect_name_objc_classrefs("__objc_classrefs");
1437  static ConstString g_sect_name_objc_superrefs("__objc_superrefs");
1438  static ConstString g_sect_name_objc_const("__objc_const");
1439  static ConstString g_sect_name_objc_classlist("__objc_classlist");
1440  static ConstString g_sect_name_cfstring("__cfstring");
1441 
1442  static ConstString g_sect_name_dwarf_debug_abbrev("__debug_abbrev");
1443  static ConstString g_sect_name_dwarf_debug_aranges("__debug_aranges");
1444  static ConstString g_sect_name_dwarf_debug_frame("__debug_frame");
1445  static ConstString g_sect_name_dwarf_debug_info("__debug_info");
1446  static ConstString g_sect_name_dwarf_debug_line("__debug_line");
1447  static ConstString g_sect_name_dwarf_debug_loc("__debug_loc");
1448  static ConstString g_sect_name_dwarf_debug_loclists("__debug_loclists");
1449  static ConstString g_sect_name_dwarf_debug_macinfo("__debug_macinfo");
1450  static ConstString g_sect_name_dwarf_debug_names("__debug_names");
1451  static ConstString g_sect_name_dwarf_debug_pubnames("__debug_pubnames");
1452  static ConstString g_sect_name_dwarf_debug_pubtypes("__debug_pubtypes");
1453  static ConstString g_sect_name_dwarf_debug_ranges("__debug_ranges");
1454  static ConstString g_sect_name_dwarf_debug_str("__debug_str");
1455  static ConstString g_sect_name_dwarf_debug_types("__debug_types");
1456  static ConstString g_sect_name_dwarf_apple_names("__apple_names");
1457  static ConstString g_sect_name_dwarf_apple_types("__apple_types");
1458  static ConstString g_sect_name_dwarf_apple_namespaces("__apple_namespac");
1459  static ConstString g_sect_name_dwarf_apple_objc("__apple_objc");
1460  static ConstString g_sect_name_eh_frame("__eh_frame");
1461  static ConstString g_sect_name_compact_unwind("__unwind_info");
1462  static ConstString g_sect_name_text("__text");
1463  static ConstString g_sect_name_data("__data");
1464  static ConstString g_sect_name_go_symtab("__gosymtab");
1465 
1466  if (section_name == g_sect_name_dwarf_debug_abbrev)
1468  if (section_name == g_sect_name_dwarf_debug_aranges)
1470  if (section_name == g_sect_name_dwarf_debug_frame)
1472  if (section_name == g_sect_name_dwarf_debug_info)
1474  if (section_name == g_sect_name_dwarf_debug_line)
1476  if (section_name == g_sect_name_dwarf_debug_loc)
1478  if (section_name == g_sect_name_dwarf_debug_loclists)
1480  if (section_name == g_sect_name_dwarf_debug_macinfo)
1482  if (section_name == g_sect_name_dwarf_debug_names)
1484  if (section_name == g_sect_name_dwarf_debug_pubnames)
1486  if (section_name == g_sect_name_dwarf_debug_pubtypes)
1488  if (section_name == g_sect_name_dwarf_debug_ranges)
1490  if (section_name == g_sect_name_dwarf_debug_str)
1492  if (section_name == g_sect_name_dwarf_debug_types)
1494  if (section_name == g_sect_name_dwarf_apple_names)
1496  if (section_name == g_sect_name_dwarf_apple_types)
1498  if (section_name == g_sect_name_dwarf_apple_namespaces)
1500  if (section_name == g_sect_name_dwarf_apple_objc)
1502  if (section_name == g_sect_name_objc_selrefs)
1504  if (section_name == g_sect_name_objc_msgrefs)
1506  if (section_name == g_sect_name_eh_frame)
1507  return eSectionTypeEHFrame;
1508  if (section_name == g_sect_name_compact_unwind)
1510  if (section_name == g_sect_name_cfstring)
1512  if (section_name == g_sect_name_go_symtab)
1513  return eSectionTypeGoSymtab;
1514  if (section_name == g_sect_name_objc_data ||
1515  section_name == g_sect_name_objc_classrefs ||
1516  section_name == g_sect_name_objc_superrefs ||
1517  section_name == g_sect_name_objc_const ||
1518  section_name == g_sect_name_objc_classlist) {
1519  return eSectionTypeDataPointers;
1520  }
1521 
1522  switch (mach_sect_type) {
1523  // TODO: categorize sections by other flags for regular sections
1524  case S_REGULAR:
1525  if (section_name == g_sect_name_text)
1526  return eSectionTypeCode;
1527  if (section_name == g_sect_name_data)
1528  return eSectionTypeData;
1529  return eSectionTypeOther;
1530  case S_ZEROFILL:
1531  return eSectionTypeZeroFill;
1532  case S_CSTRING_LITERALS: // section with only literal C strings
1533  return eSectionTypeDataCString;
1534  case S_4BYTE_LITERALS: // section with only 4 byte literals
1535  return eSectionTypeData4;
1536  case S_8BYTE_LITERALS: // section with only 8 byte literals
1537  return eSectionTypeData8;
1538  case S_LITERAL_POINTERS: // section with only pointers to literals
1539  return eSectionTypeDataPointers;
1540  case S_NON_LAZY_SYMBOL_POINTERS: // section with only non-lazy symbol pointers
1541  return eSectionTypeDataPointers;
1542  case S_LAZY_SYMBOL_POINTERS: // section with only lazy symbol pointers
1543  return eSectionTypeDataPointers;
1544  case S_SYMBOL_STUBS: // section with only symbol stubs, byte size of stub in
1545  // the reserved2 field
1546  return eSectionTypeCode;
1547  case S_MOD_INIT_FUNC_POINTERS: // section with only function pointers for
1548  // initialization
1549  return eSectionTypeDataPointers;
1550  case S_MOD_TERM_FUNC_POINTERS: // section with only function pointers for
1551  // termination
1552  return eSectionTypeDataPointers;
1553  case S_COALESCED:
1554  return eSectionTypeOther;
1555  case S_GB_ZEROFILL:
1556  return eSectionTypeZeroFill;
1557  case S_INTERPOSING: // section with only pairs of function pointers for
1558  // interposing
1559  return eSectionTypeCode;
1560  case S_16BYTE_LITERALS: // section with only 16 byte literals
1561  return eSectionTypeData16;
1562  case S_DTRACE_DOF:
1563  return eSectionTypeDebug;
1564  case S_LAZY_DYLIB_SYMBOL_POINTERS:
1565  return eSectionTypeDataPointers;
1566  default:
1567  return eSectionTypeOther;
1568  }
1569 }
1570 
1576  bool FileAddressesChanged = false;
1577 
1581 };
1582 
1584  const llvm::MachO::load_command &load_cmd_, lldb::offset_t offset,
1585  uint32_t cmd_idx, SegmentParsingContext &context) {
1586  llvm::MachO::segment_command_64 load_cmd;
1587  memcpy(&load_cmd, &load_cmd_, sizeof(load_cmd_));
1588 
1589  if (!m_data.GetU8(&offset, (uint8_t *)load_cmd.segname, 16))
1590  return;
1591 
1592  ModuleSP module_sp = GetModule();
1593  const bool is_core = GetType() == eTypeCoreFile;
1594  const bool is_dsym = (m_header.filetype == MH_DSYM);
1595  bool add_section = true;
1596  bool add_to_unified = true;
1597  ConstString const_segname(
1598  load_cmd.segname, strnlen(load_cmd.segname, sizeof(load_cmd.segname)));
1599 
1600  SectionSP unified_section_sp(
1601  context.UnifiedList.FindSectionByName(const_segname));
1602  if (is_dsym && unified_section_sp) {
1603  if (const_segname == GetSegmentNameLINKEDIT()) {
1604  // We need to keep the __LINKEDIT segment private to this object file
1605  // only
1606  add_to_unified = false;
1607  } else {
1608  // This is the dSYM file and this section has already been created by the
1609  // object file, no need to create it.
1610  add_section = false;
1611  }
1612  }
1613  load_cmd.vmaddr = m_data.GetAddress(&offset);
1614  load_cmd.vmsize = m_data.GetAddress(&offset);
1615  load_cmd.fileoff = m_data.GetAddress(&offset);
1616  load_cmd.filesize = m_data.GetAddress(&offset);
1617  if (!m_data.GetU32(&offset, &load_cmd.maxprot, 4))
1618  return;
1619 
1620  SanitizeSegmentCommand(load_cmd, cmd_idx);
1621 
1622  const uint32_t segment_permissions = GetSegmentPermissions(load_cmd);
1623  const bool segment_is_encrypted =
1624  (load_cmd.flags & SG_PROTECTED_VERSION_1) != 0;
1625 
1626  // Keep a list of mach segments around in case we need to get at data that
1627  // isn't stored in the abstracted Sections.
1628  m_mach_segments.push_back(load_cmd);
1629 
1630  // Use a segment ID of the segment index shifted left by 8 so they never
1631  // conflict with any of the sections.
1632  SectionSP segment_sp;
1633  if (add_section && (const_segname || is_core)) {
1634  segment_sp = std::make_shared<Section>(
1635  module_sp, // Module to which this section belongs
1636  this, // Object file to which this sections belongs
1637  ++context.NextSegmentIdx
1638  << 8, // Section ID is the 1 based segment index
1639  // shifted right by 8 bits as not to collide with any of the 256
1640  // section IDs that are possible
1641  const_segname, // Name of this section
1642  eSectionTypeContainer, // This section is a container of other
1643  // sections.
1644  load_cmd.vmaddr, // File VM address == addresses as they are
1645  // found in the object file
1646  load_cmd.vmsize, // VM size in bytes of this section
1647  load_cmd.fileoff, // Offset to the data for this section in
1648  // the file
1649  load_cmd.filesize, // Size in bytes of this section as found
1650  // in the file
1651  0, // Segments have no alignment information
1652  load_cmd.flags); // Flags for this section
1653 
1654  segment_sp->SetIsEncrypted(segment_is_encrypted);
1655  m_sections_up->AddSection(segment_sp);
1656  segment_sp->SetPermissions(segment_permissions);
1657  if (add_to_unified)
1658  context.UnifiedList.AddSection(segment_sp);
1659  } else if (unified_section_sp) {
1660  // If this is a dSYM and the file addresses in the dSYM differ from the
1661  // file addresses in the ObjectFile, we must use the file base address for
1662  // the Section from the dSYM for the DWARF to resolve correctly.
1663  // This only happens with binaries in the shared cache in practice;
1664  // normally a mismatch like this would give a binary & dSYM that do not
1665  // match UUIDs. When a binary is included in the shared cache, its
1666  // segments are rearranged to optimize the shared cache, so its file
1667  // addresses will differ from what the ObjectFile had originally,
1668  // and what the dSYM has.
1669  if (is_dsym && unified_section_sp->GetFileAddress() != load_cmd.vmaddr) {
1670  Log *log = GetLog(LLDBLog::Symbols);
1671  if (log) {
1672  log->Printf(
1673  "Installing dSYM's %s segment file address over ObjectFile's "
1674  "so symbol table/debug info resolves correctly for %s",
1675  const_segname.AsCString(),
1676  module_sp->GetFileSpec().GetFilename().AsCString());
1677  }
1678 
1679  // Make sure we've parsed the symbol table from the ObjectFile before
1680  // we go around changing its Sections.
1681  module_sp->GetObjectFile()->GetSymtab();
1682  // eh_frame would present the same problems but we parse that on a per-
1683  // function basis as-needed so it's more difficult to remove its use of
1684  // the Sections. Realistically, the environments where this code path
1685  // will be taken will not have eh_frame sections.
1686 
1687  unified_section_sp->SetFileAddress(load_cmd.vmaddr);
1688 
1689  // Notify the module that the section addresses have been changed once
1690  // we're done so any file-address caches can be updated.
1691  context.FileAddressesChanged = true;
1692  }
1693  m_sections_up->AddSection(unified_section_sp);
1694  }
1695 
1696  llvm::MachO::section_64 sect64;
1697  ::memset(&sect64, 0, sizeof(sect64));
1698  // Push a section into our mach sections for the section at index zero
1699  // (NO_SECT) if we don't have any mach sections yet...
1700  if (m_mach_sections.empty())
1701  m_mach_sections.push_back(sect64);
1702  uint32_t segment_sect_idx;
1703  const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1;
1704 
1705  const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8;
1706  for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects;
1707  ++segment_sect_idx) {
1708  if (m_data.GetU8(&offset, (uint8_t *)sect64.sectname,
1709  sizeof(sect64.sectname)) == nullptr)
1710  break;
1711  if (m_data.GetU8(&offset, (uint8_t *)sect64.segname,
1712  sizeof(sect64.segname)) == nullptr)
1713  break;
1714  sect64.addr = m_data.GetAddress(&offset);
1715  sect64.size = m_data.GetAddress(&offset);
1716 
1717  if (m_data.GetU32(&offset, &sect64.offset, num_u32s) == nullptr)
1718  break;
1719 
1720  if (IsSharedCacheBinary() && !IsInMemory()) {
1721  sect64.offset = sect64.addr - m_text_address;
1722  }
1723 
1724  // Keep a list of mach sections around in case we need to get at data that
1725  // isn't stored in the abstracted Sections.
1726  m_mach_sections.push_back(sect64);
1727 
1728  if (add_section) {
1729  ConstString section_name(
1730  sect64.sectname, strnlen(sect64.sectname, sizeof(sect64.sectname)));
1731  if (!const_segname) {
1732  // We have a segment with no name so we need to conjure up segments
1733  // that correspond to the section's segname if there isn't already such
1734  // a section. If there is such a section, we resize the section so that
1735  // it spans all sections. We also mark these sections as fake so
1736  // address matches don't hit if they land in the gaps between the child
1737  // sections.
1738  const_segname.SetTrimmedCStringWithLength(sect64.segname,
1739  sizeof(sect64.segname));
1740  segment_sp = context.UnifiedList.FindSectionByName(const_segname);
1741  if (segment_sp.get()) {
1742  Section *segment = segment_sp.get();
1743  // Grow the section size as needed.
1744  const lldb::addr_t sect64_min_addr = sect64.addr;
1745  const lldb::addr_t sect64_max_addr = sect64_min_addr + sect64.size;
1746  const lldb::addr_t curr_seg_byte_size = segment->GetByteSize();
1747  const lldb::addr_t curr_seg_min_addr = segment->GetFileAddress();
1748  const lldb::addr_t curr_seg_max_addr =
1749  curr_seg_min_addr + curr_seg_byte_size;
1750  if (sect64_min_addr >= curr_seg_min_addr) {
1751  const lldb::addr_t new_seg_byte_size =
1752  sect64_max_addr - curr_seg_min_addr;
1753  // Only grow the section size if needed
1754  if (new_seg_byte_size > curr_seg_byte_size)
1755  segment->SetByteSize(new_seg_byte_size);
1756  } else {
1757  // We need to change the base address of the segment and adjust the
1758  // child section offsets for all existing children.
1759  const lldb::addr_t slide_amount =
1760  sect64_min_addr - curr_seg_min_addr;
1761  segment->Slide(slide_amount, false);
1762  segment->GetChildren().Slide(-slide_amount, false);
1763  segment->SetByteSize(curr_seg_max_addr - sect64_min_addr);
1764  }
1765 
1766  // Grow the section size as needed.
1767  if (sect64.offset) {
1768  const lldb::addr_t segment_min_file_offset =
1769  segment->GetFileOffset();
1770  const lldb::addr_t segment_max_file_offset =
1771  segment_min_file_offset + segment->GetFileSize();
1772 
1773  const lldb::addr_t section_min_file_offset = sect64.offset;
1774  const lldb::addr_t section_max_file_offset =
1775  section_min_file_offset + sect64.size;
1776  const lldb::addr_t new_file_offset =
1777  std::min(section_min_file_offset, segment_min_file_offset);
1778  const lldb::addr_t new_file_size =
1779  std::max(section_max_file_offset, segment_max_file_offset) -
1780  new_file_offset;
1781  segment->SetFileOffset(new_file_offset);
1782  segment->SetFileSize(new_file_size);
1783  }
1784  } else {
1785  // Create a fake section for the section's named segment
1786  segment_sp = std::make_shared<Section>(
1787  segment_sp, // Parent section
1788  module_sp, // Module to which this section belongs
1789  this, // Object file to which this section belongs
1790  ++context.NextSegmentIdx
1791  << 8, // Section ID is the 1 based segment index
1792  // shifted right by 8 bits as not to
1793  // collide with any of the 256 section IDs
1794  // that are possible
1795  const_segname, // Name of this section
1796  eSectionTypeContainer, // This section is a container of
1797  // other sections.
1798  sect64.addr, // File VM address == addresses as they are
1799  // found in the object file
1800  sect64.size, // VM size in bytes of this section
1801  sect64.offset, // Offset to the data for this section in
1802  // the file
1803  sect64.offset ? sect64.size : 0, // Size in bytes of
1804  // this section as
1805  // found in the file
1806  sect64.align,
1807  load_cmd.flags); // Flags for this section
1808  segment_sp->SetIsFake(true);
1809  segment_sp->SetPermissions(segment_permissions);
1810  m_sections_up->AddSection(segment_sp);
1811  if (add_to_unified)
1812  context.UnifiedList.AddSection(segment_sp);
1813  segment_sp->SetIsEncrypted(segment_is_encrypted);
1814  }
1815  }
1816  assert(segment_sp.get());
1817 
1818  lldb::SectionType sect_type = GetSectionType(sect64.flags, section_name);
1819 
1820  SectionSP section_sp(new Section(
1821  segment_sp, module_sp, this, ++context.NextSectionIdx, section_name,
1822  sect_type, sect64.addr - segment_sp->GetFileAddress(), sect64.size,
1823  sect64.offset, sect64.offset == 0 ? 0 : sect64.size, sect64.align,
1824  sect64.flags));
1825  // Set the section to be encrypted to match the segment
1826 
1827  bool section_is_encrypted = false;
1828  if (!segment_is_encrypted && load_cmd.filesize != 0)
1829  section_is_encrypted = context.EncryptedRanges.FindEntryThatContains(
1830  sect64.offset) != nullptr;
1831 
1832  section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted);
1833  section_sp->SetPermissions(segment_permissions);
1834  segment_sp->GetChildren().AddSection(section_sp);
1835 
1836  if (segment_sp->IsFake()) {
1837  segment_sp.reset();
1838  const_segname.Clear();
1839  }
1840  }
1841  }
1842  if (segment_sp && is_dsym) {
1843  if (first_segment_sectID <= context.NextSectionIdx) {
1844  lldb::user_id_t sect_uid;
1845  for (sect_uid = first_segment_sectID; sect_uid <= context.NextSectionIdx;
1846  ++sect_uid) {
1847  SectionSP curr_section_sp(
1848  segment_sp->GetChildren().FindSectionByID(sect_uid));
1849  SectionSP next_section_sp;
1850  if (sect_uid + 1 <= context.NextSectionIdx)
1851  next_section_sp =
1852  segment_sp->GetChildren().FindSectionByID(sect_uid + 1);
1853 
1854  if (curr_section_sp.get()) {
1855  if (curr_section_sp->GetByteSize() == 0) {
1856  if (next_section_sp.get() != nullptr)
1857  curr_section_sp->SetByteSize(next_section_sp->GetFileAddress() -
1858  curr_section_sp->GetFileAddress());
1859  else
1860  curr_section_sp->SetByteSize(load_cmd.vmsize);
1861  }
1862  }
1863  }
1864  }
1865  }
1866 }
1867 
1869  const llvm::MachO::load_command &load_cmd, lldb::offset_t offset) {
1870  m_dysymtab.cmd = load_cmd.cmd;
1871  m_dysymtab.cmdsize = load_cmd.cmdsize;
1872  m_data.GetU32(&offset, &m_dysymtab.ilocalsym,
1873  (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2);
1874 }
1875 
1876 void ObjectFileMachO::CreateSections(SectionList &unified_section_list) {
1877  if (m_sections_up)
1878  return;
1879 
1880  m_sections_up = std::make_unique<SectionList>();
1881 
1883  // bool dump_sections = false;
1884  ModuleSP module_sp(GetModule());
1885 
1886  offset = MachHeaderSizeFromMagic(m_header.magic);
1887 
1888  SegmentParsingContext context(GetEncryptedFileRanges(), unified_section_list);
1889  llvm::MachO::load_command load_cmd;
1890  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1891  const lldb::offset_t load_cmd_offset = offset;
1892  if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
1893  break;
1894 
1895  if (load_cmd.cmd == LC_SEGMENT || load_cmd.cmd == LC_SEGMENT_64)
1896  ProcessSegmentCommand(load_cmd, offset, i, context);
1897  else if (load_cmd.cmd == LC_DYSYMTAB)
1898  ProcessDysymtabCommand(load_cmd, offset);
1899 
1900  offset = load_cmd_offset + load_cmd.cmdsize;
1901  }
1902 
1903  if (context.FileAddressesChanged && module_sp)
1904  module_sp->SectionFileAddressesChanged();
1905 }
1906 
1908 public:
1910  : m_section_list(section_list), m_section_infos() {
1911  // Get the number of sections down to a depth of 1 to include all segments
1912  // and their sections, but no other sections that may be added for debug
1913  // map or
1914  m_section_infos.resize(section_list->GetNumSections(1));
1915  }
1916 
1917  SectionSP GetSection(uint8_t n_sect, addr_t file_addr) {
1918  if (n_sect == 0)
1919  return SectionSP();
1920  if (n_sect < m_section_infos.size()) {
1921  if (!m_section_infos[n_sect].section_sp) {
1922  SectionSP section_sp(m_section_list->FindSectionByID(n_sect));
1923  m_section_infos[n_sect].section_sp = section_sp;
1924  if (section_sp) {
1925  m_section_infos[n_sect].vm_range.SetBaseAddress(
1926  section_sp->GetFileAddress());
1927  m_section_infos[n_sect].vm_range.SetByteSize(
1928  section_sp->GetByteSize());
1929  } else {
1930  std::string filename = "<unknown>";
1931  SectionSP first_section_sp(m_section_list->GetSectionAtIndex(0));
1932  if (first_section_sp)
1933  filename = first_section_sp->GetObjectFile()->GetFileSpec().GetPath();
1934 
1935  Host::SystemLog(Host::eSystemLogError,
1936  "error: unable to find section %d for a symbol in "
1937  "%s, corrupt file?\n",
1938  n_sect, filename.c_str());
1939  }
1940  }
1941  if (m_section_infos[n_sect].vm_range.Contains(file_addr)) {
1942  // Symbol is in section.
1943  return m_section_infos[n_sect].section_sp;
1944  } else if (m_section_infos[n_sect].vm_range.GetByteSize() == 0 &&
1945  m_section_infos[n_sect].vm_range.GetBaseAddress() ==
1946  file_addr) {
1947  // Symbol is in section with zero size, but has the same start address
1948  // as the section. This can happen with linker symbols (symbols that
1949  // start with the letter 'l' or 'L'.
1950  return m_section_infos[n_sect].section_sp;
1951  }
1952  }
1954  }
1955 
1956 protected:
1957  struct SectionInfo {
1959 
1961  SectionSP section_sp;
1962  };
1964  std::vector<SectionInfo> m_section_infos;
1965 };
1966 
1967 #define TRIE_SYMBOL_IS_THUMB (1ULL << 63)
1968 struct TrieEntry {
1969  void Dump() const {
1970  printf("0x%16.16llx 0x%16.16llx 0x%16.16llx \"%s\"",
1971  static_cast<unsigned long long>(address),
1972  static_cast<unsigned long long>(flags),
1973  static_cast<unsigned long long>(other), name.GetCString());
1974  if (import_name)
1975  printf(" -> \"%s\"\n", import_name.GetCString());
1976  else
1977  printf("\n");
1978  }
1981  uint64_t flags =
1982  0; // EXPORT_SYMBOL_FLAGS_REEXPORT, EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER,
1983  // TRIE_SYMBOL_IS_THUMB
1984  uint64_t other = 0;
1986 };
1987 
1991 
1993 
1994  void Dump(uint32_t idx) const {
1995  printf("[%3u] 0x%16.16llx: ", idx,
1996  static_cast<unsigned long long>(nodeOffset));
1997  entry.Dump();
1998  }
1999 
2000  bool operator<(const TrieEntryWithOffset &other) const {
2001  return (nodeOffset < other.nodeOffset);
2002  }
2003 };
2004 
2006  const bool is_arm, addr_t text_seg_base_addr,
2007  std::vector<llvm::StringRef> &nameSlices,
2008  std::set<lldb::addr_t> &resolver_addresses,
2009  std::vector<TrieEntryWithOffset> &reexports,
2010  std::vector<TrieEntryWithOffset> &ext_symbols) {
2011  if (!data.ValidOffset(offset))
2012  return true;
2013 
2014  // Terminal node -- end of a branch, possibly add this to
2015  // the symbol table or resolver table.
2016  const uint64_t terminalSize = data.GetULEB128(&offset);
2017  lldb::offset_t children_offset = offset + terminalSize;
2018  if (terminalSize != 0) {
2019  TrieEntryWithOffset e(offset);
2020  e.entry.flags = data.GetULEB128(&offset);
2021  const char *import_name = nullptr;
2022  if (e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
2023  e.entry.address = 0;
2024  e.entry.other = data.GetULEB128(&offset); // dylib ordinal
2025  import_name = data.GetCStr(&offset);
2026  } else {
2027  e.entry.address = data.GetULEB128(&offset);
2028  if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
2029  e.entry.address += text_seg_base_addr;
2030  if (e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
2031  e.entry.other = data.GetULEB128(&offset);
2032  uint64_t resolver_addr = e.entry.other;
2033  if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
2034  resolver_addr += text_seg_base_addr;
2035  if (is_arm)
2036  resolver_addr &= THUMB_ADDRESS_BIT_MASK;
2037  resolver_addresses.insert(resolver_addr);
2038  } else
2039  e.entry.other = 0;
2040  }
2041  bool add_this_entry = false;
2042  if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT) &&
2043  import_name && import_name[0]) {
2044  // add symbols that are reexport symbols with a valid import name.
2045  add_this_entry = true;
2046  } else if (e.entry.flags == 0 &&
2047  (import_name == nullptr || import_name[0] == '\0')) {
2048  // add externally visible symbols, in case the nlist record has
2049  // been stripped/omitted.
2050  add_this_entry = true;
2051  }
2052  if (add_this_entry) {
2053  std::string name;
2054  if (!nameSlices.empty()) {
2055  for (auto name_slice : nameSlices)
2056  name.append(name_slice.data(), name_slice.size());
2057  }
2058  if (name.size() > 1) {
2059  // Skip the leading '_'
2060  e.entry.name.SetCStringWithLength(name.c_str() + 1, name.size() - 1);
2061  }
2062  if (import_name) {
2063  // Skip the leading '_'
2064  e.entry.import_name.SetCString(import_name + 1);
2065  }
2066  if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT)) {
2067  reexports.push_back(e);
2068  } else {
2069  if (is_arm && (e.entry.address & 1)) {
2072  }
2073  ext_symbols.push_back(e);
2074  }
2075  }
2076  }
2077 
2078  const uint8_t childrenCount = data.GetU8(&children_offset);
2079  for (uint8_t i = 0; i < childrenCount; ++i) {
2080  const char *cstr = data.GetCStr(&children_offset);
2081  if (cstr)
2082  nameSlices.push_back(llvm::StringRef(cstr));
2083  else
2084  return false; // Corrupt data
2085  lldb::offset_t childNodeOffset = data.GetULEB128(&children_offset);
2086  if (childNodeOffset) {
2087  if (!ParseTrieEntries(data, childNodeOffset, is_arm, text_seg_base_addr,
2088  nameSlices, resolver_addresses, reexports,
2089  ext_symbols)) {
2090  return false;
2091  }
2092  }
2093  nameSlices.pop_back();
2094  }
2095  return true;
2096 }
2097 
2098 static SymbolType GetSymbolType(const char *&symbol_name,
2099  bool &demangled_is_synthesized,
2100  const SectionSP &text_section_sp,
2101  const SectionSP &data_section_sp,
2102  const SectionSP &data_dirty_section_sp,
2103  const SectionSP &data_const_section_sp,
2104  const SectionSP &symbol_section) {
2106 
2107  const char *symbol_sect_name = symbol_section->GetName().AsCString();
2108  if (symbol_section->IsDescendant(text_section_sp.get())) {
2109  if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS |
2110  S_ATTR_SELF_MODIFYING_CODE |
2111  S_ATTR_SOME_INSTRUCTIONS))
2112  type = eSymbolTypeData;
2113  else
2114  type = eSymbolTypeCode;
2115  } else if (symbol_section->IsDescendant(data_section_sp.get()) ||
2116  symbol_section->IsDescendant(data_dirty_section_sp.get()) ||
2117  symbol_section->IsDescendant(data_const_section_sp.get())) {
2118  if (symbol_sect_name &&
2119  ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) {
2120  type = eSymbolTypeRuntime;
2121 
2122  if (symbol_name) {
2123  llvm::StringRef symbol_name_ref(symbol_name);
2124  if (symbol_name_ref.startswith("OBJC_")) {
2125  static const llvm::StringRef g_objc_v2_prefix_class("OBJC_CLASS_$_");
2126  static const llvm::StringRef g_objc_v2_prefix_metaclass(
2127  "OBJC_METACLASS_$_");
2128  static const llvm::StringRef g_objc_v2_prefix_ivar("OBJC_IVAR_$_");
2129  if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
2130  symbol_name = symbol_name + g_objc_v2_prefix_class.size();
2131  type = eSymbolTypeObjCClass;
2132  demangled_is_synthesized = true;
2133  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_metaclass)) {
2134  symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
2135  type = eSymbolTypeObjCMetaClass;
2136  demangled_is_synthesized = true;
2137  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_ivar)) {
2138  symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
2139  type = eSymbolTypeObjCIVar;
2140  demangled_is_synthesized = true;
2141  }
2142  }
2143  }
2144  } else if (symbol_sect_name &&
2145  ::strstr(symbol_sect_name, "__gcc_except_tab") ==
2146  symbol_sect_name) {
2147  type = eSymbolTypeException;
2148  } else {
2149  type = eSymbolTypeData;
2150  }
2151  } else if (symbol_sect_name &&
2152  ::strstr(symbol_sect_name, "__IMPORT") == symbol_sect_name) {
2153  type = eSymbolTypeTrampoline;
2154  }
2155  return type;
2156 }
2157 
2158 // Read the UUID out of a dyld_shared_cache file on-disk.
2160  const ByteOrder byte_order,
2161  const uint32_t addr_byte_size) {
2162  UUID dsc_uuid;
2163  DataBufferSP DscData = MapFileData(
2164  dyld_shared_cache, sizeof(struct lldb_copy_dyld_cache_header_v1), 0);
2165  if (!DscData)
2166  return dsc_uuid;
2167  DataExtractor dsc_header_data(DscData, byte_order, addr_byte_size);
2168 
2169  char version_str[7];
2170  lldb::offset_t offset = 0;
2171  memcpy(version_str, dsc_header_data.GetData(&offset, 6), 6);
2172  version_str[6] = '\0';
2173  if (strcmp(version_str, "dyld_v") == 0) {
2174  offset = offsetof(struct lldb_copy_dyld_cache_header_v1, uuid);
2175  dsc_uuid = UUID::fromOptionalData(
2176  dsc_header_data.GetData(&offset, sizeof(uuid_t)), sizeof(uuid_t));
2177  }
2178  Log *log = GetLog(LLDBLog::Symbols);
2179  if (log && dsc_uuid.IsValid()) {
2180  LLDB_LOGF(log, "Shared cache %s has UUID %s",
2181  dyld_shared_cache.GetPath().c_str(),
2182  dsc_uuid.GetAsString().c_str());
2183  }
2184  return dsc_uuid;
2185 }
2186 
2187 static llvm::Optional<struct nlist_64>
2188 ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset,
2189  size_t nlist_byte_size) {
2190  struct nlist_64 nlist;
2191  if (!nlist_data.ValidOffsetForDataOfSize(nlist_data_offset, nlist_byte_size))
2192  return {};
2193  nlist.n_strx = nlist_data.GetU32_unchecked(&nlist_data_offset);
2194  nlist.n_type = nlist_data.GetU8_unchecked(&nlist_data_offset);
2195  nlist.n_sect = nlist_data.GetU8_unchecked(&nlist_data_offset);
2196  nlist.n_desc = nlist_data.GetU16_unchecked(&nlist_data_offset);
2197  nlist.n_value = nlist_data.GetAddress_unchecked(&nlist_data_offset);
2198  return nlist;
2199 }
2200 
2201 enum { DebugSymbols = true, NonDebugSymbols = false };
2202 
2204  ModuleSP module_sp(GetModule());
2205  if (!module_sp)
2206  return;
2207 
2208  const FileSpec &file = m_file ? m_file : module_sp->GetFileSpec();
2209  const char *file_name = file.GetFilename().AsCString("<Unknown>");
2210  LLDB_SCOPED_TIMERF("ObjectFileMachO::ParseSymtab () module = %s", file_name);
2211  Progress progress(llvm::formatv("Parsing symbol table for {0}", file_name));
2212 
2213  llvm::MachO::symtab_command symtab_load_command = {0, 0, 0, 0, 0, 0};
2214  llvm::MachO::linkedit_data_command function_starts_load_command = {0, 0, 0, 0};
2215  llvm::MachO::linkedit_data_command exports_trie_load_command = {0, 0, 0, 0};
2216  llvm::MachO::dyld_info_command dyld_info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2217  llvm::MachO::dysymtab_command dysymtab = m_dysymtab;
2218  // The data element of type bool indicates that this entry is thumb
2219  // code.
2220  typedef AddressDataArray<lldb::addr_t, bool, 100> FunctionStarts;
2221 
2222  // Record the address of every function/data that we add to the symtab.
2223  // We add symbols to the table in the order of most information (nlist
2224  // records) to least (function starts), and avoid duplicating symbols
2225  // via this set.
2226  llvm::DenseSet<addr_t> symbols_added;
2227 
2228  // We are using a llvm::DenseSet for "symbols_added" so we must be sure we
2229  // do not add the tombstone or empty keys to the set.
2230  auto add_symbol_addr = [&symbols_added](lldb::addr_t file_addr) {
2231  // Don't add the tombstone or empty keys.
2232  if (file_addr == UINT64_MAX || file_addr == UINT64_MAX - 1)
2233  return;
2234  symbols_added.insert(file_addr);
2235  };
2236  FunctionStarts function_starts;
2238  uint32_t i;
2239  FileSpecList dylib_files;
2240  Log *log = GetLog(LLDBLog::Symbols);
2241  llvm::StringRef g_objc_v2_prefix_class("_OBJC_CLASS_$_");
2242  llvm::StringRef g_objc_v2_prefix_metaclass("_OBJC_METACLASS_$_");
2243  llvm::StringRef g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
2244  UUID image_uuid;
2245 
2246  for (i = 0; i < m_header.ncmds; ++i) {
2247  const lldb::offset_t cmd_offset = offset;
2248  // Read in the load command and load command size
2249  llvm::MachO::load_command lc;
2250  if (m_data.GetU32(&offset, &lc, 2) == nullptr)
2251  break;
2252  // Watch for the symbol table load command
2253  switch (lc.cmd) {
2254  case LC_SYMTAB:
2255  symtab_load_command.cmd = lc.cmd;
2256  symtab_load_command.cmdsize = lc.cmdsize;
2257  // Read in the rest of the symtab load command
2258  if (m_data.GetU32(&offset, &symtab_load_command.symoff, 4) ==
2259  nullptr) // fill in symoff, nsyms, stroff, strsize fields
2260  return;
2261  break;
2262 
2263  case LC_DYLD_INFO:
2264  case LC_DYLD_INFO_ONLY:
2265  if (m_data.GetU32(&offset, &dyld_info.rebase_off, 10)) {
2266  dyld_info.cmd = lc.cmd;
2267  dyld_info.cmdsize = lc.cmdsize;
2268  } else {
2269  memset(&dyld_info, 0, sizeof(dyld_info));
2270  }
2271  break;
2272 
2273  case LC_LOAD_DYLIB:
2274  case LC_LOAD_WEAK_DYLIB:
2275  case LC_REEXPORT_DYLIB:
2276  case LC_LOADFVMLIB:
2277  case LC_LOAD_UPWARD_DYLIB: {
2278  uint32_t name_offset = cmd_offset + m_data.GetU32(&offset);
2279  const char *path = m_data.PeekCStr(name_offset);
2280  if (path) {
2281  FileSpec file_spec(path);
2282  // Strip the path if there is @rpath, @executable, etc so we just use
2283  // the basename
2284  if (path[0] == '@')
2285  file_spec.GetDirectory().Clear();
2286 
2287  if (lc.cmd == LC_REEXPORT_DYLIB) {
2288  m_reexported_dylibs.AppendIfUnique(file_spec);
2289  }
2290 
2291  dylib_files.Append(file_spec);
2292  }
2293  } break;
2294 
2295  case LC_DYLD_EXPORTS_TRIE:
2296  exports_trie_load_command.cmd = lc.cmd;
2297  exports_trie_load_command.cmdsize = lc.cmdsize;
2298  if (m_data.GetU32(&offset, &exports_trie_load_command.dataoff, 2) ==
2299  nullptr) // fill in offset and size fields
2300  memset(&exports_trie_load_command, 0,
2301  sizeof(exports_trie_load_command));
2302  break;
2303  case LC_FUNCTION_STARTS:
2304  function_starts_load_command.cmd = lc.cmd;
2305  function_starts_load_command.cmdsize = lc.cmdsize;
2306  if (m_data.GetU32(&offset, &function_starts_load_command.dataoff, 2) ==
2307  nullptr) // fill in data offset and size fields
2308  memset(&function_starts_load_command, 0,
2309  sizeof(function_starts_load_command));
2310  break;
2311 
2312  case LC_UUID: {
2313  const uint8_t *uuid_bytes = m_data.PeekData(offset, 16);
2314 
2315  if (uuid_bytes)
2316  image_uuid = UUID::fromOptionalData(uuid_bytes, 16);
2317  break;
2318  }
2319 
2320  default:
2321  break;
2322  }
2323  offset = cmd_offset + lc.cmdsize;
2324  }
2325 
2326  if (!symtab_load_command.cmd)
2327  return;
2328 
2329  SectionList *section_list = GetSectionList();
2330  if (section_list == nullptr)
2331  return;
2332 
2333  const uint32_t addr_byte_size = m_data.GetAddressByteSize();
2334  const ByteOrder byte_order = m_data.GetByteOrder();
2335  bool bit_width_32 = addr_byte_size == 4;
2336  const size_t nlist_byte_size =
2337  bit_width_32 ? sizeof(struct nlist) : sizeof(struct nlist_64);
2338 
2339  DataExtractor nlist_data(nullptr, 0, byte_order, addr_byte_size);
2340  DataExtractor strtab_data(nullptr, 0, byte_order, addr_byte_size);
2341  DataExtractor function_starts_data(nullptr, 0, byte_order, addr_byte_size);
2342  DataExtractor indirect_symbol_index_data(nullptr, 0, byte_order,
2343  addr_byte_size);
2344  DataExtractor dyld_trie_data(nullptr, 0, byte_order, addr_byte_size);
2345 
2346  const addr_t nlist_data_byte_size =
2347  symtab_load_command.nsyms * nlist_byte_size;
2348  const addr_t strtab_data_byte_size = symtab_load_command.strsize;
2349  addr_t strtab_addr = LLDB_INVALID_ADDRESS;
2350 
2351  ProcessSP process_sp(m_process_wp.lock());
2352  Process *process = process_sp.get();
2353 
2354  uint32_t memory_module_load_level = eMemoryModuleLoadLevelComplete;
2355  bool is_shared_cache_image = IsSharedCacheBinary();
2356  bool is_local_shared_cache_image = is_shared_cache_image && !IsInMemory();
2357  SectionSP linkedit_section_sp(
2358  section_list->FindSectionByName(GetSegmentNameLINKEDIT()));
2359 
2360  if (process && m_header.filetype != llvm::MachO::MH_OBJECT &&
2361  !is_local_shared_cache_image) {
2362  Target &target = process->GetTarget();
2363 
2364  memory_module_load_level = target.GetMemoryModuleLoadLevel();
2365 
2366  // Reading mach file from memory in a process or core file...
2367 
2368  if (linkedit_section_sp) {
2369  addr_t linkedit_load_addr =
2370  linkedit_section_sp->GetLoadBaseAddress(&target);
2371  if (linkedit_load_addr == LLDB_INVALID_ADDRESS) {
2372  // We might be trying to access the symbol table before the
2373  // __LINKEDIT's load address has been set in the target. We can't
2374  // fail to read the symbol table, so calculate the right address
2375  // manually
2376  linkedit_load_addr = CalculateSectionLoadAddressForMemoryImage(
2377  m_memory_addr, GetMachHeaderSection(), linkedit_section_sp.get());
2378  }
2379 
2380  const addr_t linkedit_file_offset = linkedit_section_sp->GetFileOffset();
2381  const addr_t symoff_addr = linkedit_load_addr +
2382  symtab_load_command.symoff -
2383  linkedit_file_offset;
2384  strtab_addr = linkedit_load_addr + symtab_load_command.stroff -
2385  linkedit_file_offset;
2386 
2387  // Always load dyld - the dynamic linker - from memory if we didn't
2388  // find a binary anywhere else. lldb will not register
2389  // dylib/framework/bundle loads/unloads if we don't have the dyld
2390  // symbols, we force dyld to load from memory despite the user's
2391  // target.memory-module-load-level setting.
2392  if (memory_module_load_level == eMemoryModuleLoadLevelComplete ||
2393  m_header.filetype == llvm::MachO::MH_DYLINKER) {
2394  DataBufferSP nlist_data_sp(
2395  ReadMemory(process_sp, symoff_addr, nlist_data_byte_size));
2396  if (nlist_data_sp)
2397  nlist_data.SetData(nlist_data_sp, 0, nlist_data_sp->GetByteSize());
2398  if (dysymtab.nindirectsyms != 0) {
2399  const addr_t indirect_syms_addr = linkedit_load_addr +
2400  dysymtab.indirectsymoff -
2401  linkedit_file_offset;
2402  DataBufferSP indirect_syms_data_sp(ReadMemory(
2403  process_sp, indirect_syms_addr, dysymtab.nindirectsyms * 4));
2404  if (indirect_syms_data_sp)
2405  indirect_symbol_index_data.SetData(
2406  indirect_syms_data_sp, 0,
2407  indirect_syms_data_sp->GetByteSize());
2408  // If this binary is outside the shared cache,
2409  // cache the string table.
2410  // Binaries in the shared cache all share a giant string table,
2411  // and we can't share the string tables across multiple
2412  // ObjectFileMachO's, so we'd end up re-reading this mega-strtab
2413  // for every binary in the shared cache - it would be a big perf
2414  // problem. For binaries outside the shared cache, it's faster to
2415  // read the entire strtab at once instead of piece-by-piece as we
2416  // process the nlist records.
2417  if (!is_shared_cache_image) {
2418  DataBufferSP strtab_data_sp(
2419  ReadMemory(process_sp, strtab_addr, strtab_data_byte_size));
2420  if (strtab_data_sp) {
2421  strtab_data.SetData(strtab_data_sp, 0,
2422  strtab_data_sp->GetByteSize());
2423  }
2424  }
2425  }
2426  if (memory_module_load_level >= eMemoryModuleLoadLevelPartial) {
2427  if (function_starts_load_command.cmd) {
2428  const addr_t func_start_addr =
2429  linkedit_load_addr + function_starts_load_command.dataoff -
2430  linkedit_file_offset;
2431  DataBufferSP func_start_data_sp(
2432  ReadMemory(process_sp, func_start_addr,
2433  function_starts_load_command.datasize));
2434  if (func_start_data_sp)
2435  function_starts_data.SetData(func_start_data_sp, 0,
2436  func_start_data_sp->GetByteSize());
2437  }
2438  }
2439  }
2440  }
2441  } else {
2442  if (is_local_shared_cache_image) {
2443  // The load commands in shared cache images are relative to the
2444  // beginning of the shared cache, not the library image. The
2445  // data we get handed when creating the ObjectFileMachO starts
2446  // at the beginning of a specific library and spans to the end
2447  // of the cache to be able to reach the shared LINKEDIT
2448  // segments. We need to convert the load command offsets to be
2449  // relative to the beginning of our specific image.
2450  lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset();
2451  lldb::offset_t linkedit_slide =
2452  linkedit_offset - m_linkedit_original_offset;
2453  symtab_load_command.symoff += linkedit_slide;
2454  symtab_load_command.stroff += linkedit_slide;
2455  dyld_info.export_off += linkedit_slide;
2456  dysymtab.indirectsymoff += linkedit_slide;
2457  function_starts_load_command.dataoff += linkedit_slide;
2458  exports_trie_load_command.dataoff += linkedit_slide;
2459  }
2460 
2461  nlist_data.SetData(m_data, symtab_load_command.symoff,
2462  nlist_data_byte_size);
2463  strtab_data.SetData(m_data, symtab_load_command.stroff,
2464  strtab_data_byte_size);
2465 
2466  // We shouldn't have exports data from both the LC_DYLD_INFO command
2467  // AND the LC_DYLD_EXPORTS_TRIE command in the same binary:
2468  lldbassert(!((dyld_info.export_size > 0)
2469  && (exports_trie_load_command.datasize > 0)));
2470  if (dyld_info.export_size > 0) {
2471  dyld_trie_data.SetData(m_data, dyld_info.export_off,
2472  dyld_info.export_size);
2473  } else if (exports_trie_load_command.datasize > 0) {
2474  dyld_trie_data.SetData(m_data, exports_trie_load_command.dataoff,
2475  exports_trie_load_command.datasize);
2476  }
2477 
2478  if (dysymtab.nindirectsyms != 0) {
2479  indirect_symbol_index_data.SetData(m_data, dysymtab.indirectsymoff,
2480  dysymtab.nindirectsyms * 4);
2481  }
2482  if (function_starts_load_command.cmd) {
2483  function_starts_data.SetData(m_data, function_starts_load_command.dataoff,
2484  function_starts_load_command.datasize);
2485  }
2486  }
2487 
2488  const bool have_strtab_data = strtab_data.GetByteSize() > 0;
2489 
2490  ConstString g_segment_name_TEXT = GetSegmentNameTEXT();
2491  ConstString g_segment_name_DATA = GetSegmentNameDATA();
2492  ConstString g_segment_name_DATA_DIRTY = GetSegmentNameDATA_DIRTY();
2493  ConstString g_segment_name_DATA_CONST = GetSegmentNameDATA_CONST();
2494  ConstString g_segment_name_OBJC = GetSegmentNameOBJC();
2495  ConstString g_section_name_eh_frame = GetSectionNameEHFrame();
2496  SectionSP text_section_sp(
2497  section_list->FindSectionByName(g_segment_name_TEXT));
2498  SectionSP data_section_sp(
2499  section_list->FindSectionByName(g_segment_name_DATA));
2500  SectionSP data_dirty_section_sp(
2501  section_list->FindSectionByName(g_segment_name_DATA_DIRTY));
2502  SectionSP data_const_section_sp(
2503  section_list->FindSectionByName(g_segment_name_DATA_CONST));
2504  SectionSP objc_section_sp(
2505  section_list->FindSectionByName(g_segment_name_OBJC));
2506  SectionSP eh_frame_section_sp;
2507  if (text_section_sp.get())
2508  eh_frame_section_sp = text_section_sp->GetChildren().FindSectionByName(
2509  g_section_name_eh_frame);
2510  else
2511  eh_frame_section_sp =
2512  section_list->FindSectionByName(g_section_name_eh_frame);
2513 
2514  const bool is_arm = (m_header.cputype == llvm::MachO::CPU_TYPE_ARM);
2515  const bool always_thumb = GetArchitecture().IsAlwaysThumbInstructions();
2516 
2517  // lldb works best if it knows the start address of all functions in a
2518  // module. Linker symbols or debug info are normally the best source of
2519  // information for start addr / size but they may be stripped in a released
2520  // binary. Two additional sources of information exist in Mach-O binaries:
2521  // LC_FUNCTION_STARTS - a list of ULEB128 encoded offsets of each
2522  // function's start address in the
2523  // binary, relative to the text section.
2524  // eh_frame - the eh_frame FDEs have the start addr & size of
2525  // each function
2526  // LC_FUNCTION_STARTS is the fastest source to read in, and is present on
2527  // all modern binaries.
2528  // Binaries built to run on older releases may need to use eh_frame
2529  // information.
2530 
2531  if (text_section_sp && function_starts_data.GetByteSize()) {
2532  FunctionStarts::Entry function_start_entry;
2533  function_start_entry.data = false;
2534  lldb::offset_t function_start_offset = 0;
2535  function_start_entry.addr = text_section_sp->GetFileAddress();
2536  uint64_t delta;
2537  while ((delta = function_starts_data.GetULEB128(&function_start_offset)) >
2538  0) {
2539  // Now append the current entry
2540  function_start_entry.addr += delta;
2541  if (is_arm) {
2542  if (function_start_entry.addr & 1) {
2543  function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2544  function_start_entry.data = true;
2545  } else if (always_thumb) {
2546  function_start_entry.data = true;
2547  }
2548  }
2549  function_starts.Append(function_start_entry);
2550  }
2551  } else {
2552  // If m_type is eTypeDebugInfo, then this is a dSYM - it will have the
2553  // load command claiming an eh_frame but it doesn't actually have the
2554  // eh_frame content. And if we have a dSYM, we don't need to do any of
2555  // this fill-in-the-missing-symbols works anyway - the debug info should
2556  // give us all the functions in the module.
2557  if (text_section_sp.get() && eh_frame_section_sp.get() &&
2558  m_type != eTypeDebugInfo) {
2559  DWARFCallFrameInfo eh_frame(*this, eh_frame_section_sp,
2560  DWARFCallFrameInfo::EH);
2562  eh_frame.GetFunctionAddressAndSizeVector(functions);
2563  addr_t text_base_addr = text_section_sp->GetFileAddress();
2564  size_t count = functions.GetSize();
2565  for (size_t i = 0; i < count; ++i) {
2567  functions.GetEntryAtIndex(i);
2568  if (func) {
2569  FunctionStarts::Entry function_start_entry;
2570  function_start_entry.addr = func->base - text_base_addr;
2571  if (is_arm) {
2572  if (function_start_entry.addr & 1) {
2573  function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2574  function_start_entry.data = true;
2575  } else if (always_thumb) {
2576  function_start_entry.data = true;
2577  }
2578  }
2579  function_starts.Append(function_start_entry);
2580  }
2581  }
2582  }
2583  }
2584 
2585  const size_t function_starts_count = function_starts.GetSize();
2586 
2587  // For user process binaries (executables, dylibs, frameworks, bundles), if
2588  // we don't have LC_FUNCTION_STARTS/eh_frame section in this binary, we're
2589  // going to assume the binary has been stripped. Don't allow assembly
2590  // language instruction emulation because we don't know proper function
2591  // start boundaries.
2592  //
2593  // For all other types of binaries (kernels, stand-alone bare board
2594  // binaries, kexts), they may not have LC_FUNCTION_STARTS / eh_frame
2595  // sections - we should not make any assumptions about them based on that.
2596  if (function_starts_count == 0 && CalculateStrata() == eStrataUser) {
2598  Log *unwind_or_symbol_log(GetLog(LLDBLog::Symbols | LLDBLog::Unwind));
2599 
2600  if (unwind_or_symbol_log)
2601  module_sp->LogMessage(
2602  unwind_or_symbol_log,
2603  "no LC_FUNCTION_STARTS, will not allow assembly profiled unwinds");
2604  }
2605 
2606  const user_id_t TEXT_eh_frame_sectID = eh_frame_section_sp.get()
2607  ? eh_frame_section_sp->GetID()
2608  : static_cast<user_id_t>(NO_SECT);
2609 
2610  uint32_t N_SO_index = UINT32_MAX;
2611 
2612  MachSymtabSectionInfo section_info(section_list);
2613  std::vector<uint32_t> N_FUN_indexes;
2614  std::vector<uint32_t> N_NSYM_indexes;
2615  std::vector<uint32_t> N_INCL_indexes;
2616  std::vector<uint32_t> N_BRAC_indexes;
2617  std::vector<uint32_t> N_COMM_indexes;
2618  typedef std::multimap<uint64_t, uint32_t> ValueToSymbolIndexMap;
2619  typedef llvm::DenseMap<uint32_t, uint32_t> NListIndexToSymbolIndexMap;
2620  typedef llvm::DenseMap<const char *, uint32_t> ConstNameToSymbolIndexMap;
2621  ValueToSymbolIndexMap N_FUN_addr_to_sym_idx;
2622  ValueToSymbolIndexMap N_STSYM_addr_to_sym_idx;
2623  ConstNameToSymbolIndexMap N_GSYM_name_to_sym_idx;
2624  // Any symbols that get merged into another will get an entry in this map
2625  // so we know
2626  NListIndexToSymbolIndexMap m_nlist_idx_to_sym_idx;
2627  uint32_t nlist_idx = 0;
2628  Symbol *symbol_ptr = nullptr;
2629 
2630  uint32_t sym_idx = 0;
2631  Symbol *sym = nullptr;
2632  size_t num_syms = 0;
2633  std::string memory_symbol_name;
2634  uint32_t unmapped_local_symbols_found = 0;
2635 
2636  std::vector<TrieEntryWithOffset> reexport_trie_entries;
2637  std::vector<TrieEntryWithOffset> external_sym_trie_entries;
2638  std::set<lldb::addr_t> resolver_addresses;
2639 
2640  if (dyld_trie_data.GetByteSize() > 0) {
2641  ConstString text_segment_name("__TEXT");
2642  SectionSP text_segment_sp =
2643  GetSectionList()->FindSectionByName(text_segment_name);
2644  lldb::addr_t text_segment_file_addr = LLDB_INVALID_ADDRESS;
2645  if (text_segment_sp)
2646  text_segment_file_addr = text_segment_sp->GetFileAddress();
2647  std::vector<llvm::StringRef> nameSlices;
2648  ParseTrieEntries(dyld_trie_data, 0, is_arm, text_segment_file_addr,
2649  nameSlices, resolver_addresses, reexport_trie_entries,
2650  external_sym_trie_entries);
2651  }
2652 
2653  typedef std::set<ConstString> IndirectSymbols;
2654  IndirectSymbols indirect_symbol_names;
2655 
2656 #if TARGET_OS_IPHONE
2657 
2658  // Some recent builds of the dyld_shared_cache (hereafter: DSC) have been
2659  // optimized by moving LOCAL symbols out of the memory mapped portion of
2660  // the DSC. The symbol information has all been retained, but it isn't
2661  // available in the normal nlist data. However, there *are* duplicate
2662  // entries of *some*
2663  // LOCAL symbols in the normal nlist data. To handle this situation
2664  // correctly, we must first attempt
2665  // to parse any DSC unmapped symbol information. If we find any, we set a
2666  // flag that tells the normal nlist parser to ignore all LOCAL symbols.
2667 
2668  if (IsSharedCacheBinary()) {
2669  // Before we can start mapping the DSC, we need to make certain the
2670  // target process is actually using the cache we can find.
2671 
2672  // Next we need to determine the correct path for the dyld shared cache.
2673 
2674  ArchSpec header_arch = GetArchitecture();
2675 
2676  UUID dsc_uuid;
2677  UUID process_shared_cache_uuid;
2678  addr_t process_shared_cache_base_addr;
2679 
2680  if (process) {
2681  GetProcessSharedCacheUUID(process, process_shared_cache_base_addr,
2682  process_shared_cache_uuid);
2683  }
2684 
2685  __block bool found_image = false;
2686  __block void *nlist_buffer = nullptr;
2687  __block unsigned nlist_count = 0;
2688  __block char *string_table = nullptr;
2689  __block vm_offset_t vm_nlist_memory = 0;
2690  __block mach_msg_type_number_t vm_nlist_bytes_read = 0;
2691  __block vm_offset_t vm_string_memory = 0;
2692  __block mach_msg_type_number_t vm_string_bytes_read = 0;
2693 
2694  auto _ = llvm::make_scope_exit(^{
2695  if (vm_nlist_memory)
2696  vm_deallocate(mach_task_self(), vm_nlist_memory, vm_nlist_bytes_read);
2697  if (vm_string_memory)
2698  vm_deallocate(mach_task_self(), vm_string_memory, vm_string_bytes_read);
2699  });
2700 
2701  typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
2702  typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
2703  UndefinedNameToDescMap undefined_name_to_desc;
2704  SymbolIndexToName reexport_shlib_needs_fixup;
2705 
2706  dyld_for_each_installed_shared_cache(^(dyld_shared_cache_t shared_cache) {
2707  uuid_t cache_uuid;
2708  dyld_shared_cache_copy_uuid(shared_cache, &cache_uuid);
2709  if (found_image)
2710  return;
2711 
2712  if (process_shared_cache_uuid.IsValid() &&
2713  process_shared_cache_uuid != UUID::fromOptionalData(&cache_uuid, 16))
2714  return;
2715 
2716  dyld_shared_cache_for_each_image(shared_cache, ^(dyld_image_t image) {
2717  uuid_t dsc_image_uuid;
2718  if (found_image)
2719  return;
2720 
2721  dyld_image_copy_uuid(image, &dsc_image_uuid);
2722  if (image_uuid != UUID::fromOptionalData(dsc_image_uuid, 16))
2723  return;
2724 
2725  found_image = true;
2726 
2727  // Compute the size of the string table. We need to ask dyld for a
2728  // new SPI to avoid this step.
2729  dyld_image_local_nlist_content_4Symbolication(
2730  image, ^(const void *nlistStart, uint64_t nlistCount,
2731  const char *stringTable) {
2732  if (!nlistStart || !nlistCount)
2733  return;
2734 
2735  // The buffers passed here are valid only inside the block.
2736  // Use vm_read to make a cheap copy of them available for our
2737  // processing later.
2738  kern_return_t ret =
2739  vm_read(mach_task_self(), (vm_address_t)nlistStart,
2740  nlist_byte_size * nlistCount, &vm_nlist_memory,
2741  &vm_nlist_bytes_read);
2742  if (ret != KERN_SUCCESS)
2743  return;
2744  assert(vm_nlist_bytes_read == nlist_byte_size * nlistCount);
2745 
2746  // We don't know the size of the string table. It's cheaper
2747  // to map the whol VM region than to determine the size by
2748  // parsing all teh nlist entries.
2749  vm_address_t string_address = (vm_address_t)stringTable;
2750  vm_size_t region_size;
2751  mach_msg_type_number_t info_count = VM_REGION_BASIC_INFO_COUNT_64;
2752  vm_region_basic_info_data_t info;
2753  memory_object_name_t object;
2754  ret = vm_region_64(mach_task_self(), &string_address,
2755  &region_size, VM_REGION_BASIC_INFO_64,
2756  (vm_region_info_t)&info, &info_count, &object);
2757  if (ret != KERN_SUCCESS)
2758  return;
2759 
2760  ret = vm_read(mach_task_self(), (vm_address_t)stringTable,
2761  region_size -
2762  ((vm_address_t)stringTable - string_address),
2763  &vm_string_memory, &vm_string_bytes_read);
2764  if (ret != KERN_SUCCESS)
2765  return;
2766 
2767  nlist_buffer = (void *)vm_nlist_memory;
2768  string_table = (char *)vm_string_memory;
2769  nlist_count = nlistCount;
2770  });
2771  });
2772  });
2773  if (nlist_buffer) {
2774  DataExtractor dsc_local_symbols_data(nlist_buffer,
2775  nlist_count * nlist_byte_size,
2776  byte_order, addr_byte_size);
2777  unmapped_local_symbols_found = nlist_count;
2778 
2779  // The normal nlist code cannot correctly size the Symbols
2780  // array, we need to allocate it here.
2781  sym = symtab.Resize(
2782  symtab_load_command.nsyms + m_dysymtab.nindirectsyms +
2783  unmapped_local_symbols_found - m_dysymtab.nlocalsym);
2784  num_syms = symtab.GetNumSymbols();
2785 
2786  lldb::offset_t nlist_data_offset = 0;
2787 
2788  for (uint32_t nlist_index = 0;
2789  nlist_index < nlist_count;
2790  nlist_index++) {
2791  /////////////////////////////
2792  {
2793  llvm::Optional<struct nlist_64> nlist_maybe =
2794  ParseNList(dsc_local_symbols_data, nlist_data_offset,
2795  nlist_byte_size);
2796  if (!nlist_maybe)
2797  break;
2798  struct nlist_64 nlist = *nlist_maybe;
2799 
2801  const char *symbol_name = string_table + nlist.n_strx;
2802 
2803  if (symbol_name == NULL) {
2804  // No symbol should be NULL, even the symbols with no
2805  // string values should have an offset zero which
2806  // points to an empty C-string
2807  Host::SystemLog(
2808  Host::eSystemLogError,
2809  "error: DSC unmapped local symbol[%u] has invalid "
2810  "string table offset 0x%x in %s, ignoring symbol\n",
2811  nlist_index, nlist.n_strx,
2812  module_sp->GetFileSpec().GetPath().c_str());
2813  continue;
2814  }
2815  if (symbol_name[0] == '\0')
2816  symbol_name = NULL;
2817 
2818  const char *symbol_name_non_abi_mangled = NULL;
2819 
2820  SectionSP symbol_section;
2821  uint32_t symbol_byte_size = 0;
2822  bool add_nlist = true;
2823  bool is_debug = ((nlist.n_type & N_STAB) != 0);
2824  bool demangled_is_synthesized = false;
2825  bool is_gsym = false;
2826  bool set_value = true;
2827 
2828  assert(sym_idx < num_syms);
2829 
2830  sym[sym_idx].SetDebug(is_debug);
2831 
2832  if (is_debug) {
2833  switch (nlist.n_type) {
2834  case N_GSYM:
2835  // global symbol: name,,NO_SECT,type,0
2836  // Sometimes the N_GSYM value contains the address.
2837 
2838  // FIXME: In the .o files, we have a GSYM and a debug
2839  // symbol for all the ObjC data. They
2840  // have the same address, but we want to ensure that
2841  // we always find only the real symbol, 'cause we
2842  // don't currently correctly attribute the
2843  // GSYM one to the ObjCClass/Ivar/MetaClass
2844  // symbol type. This is a temporary hack to make
2845  // sure the ObjectiveC symbols get treated correctly.
2846  // To do this right, we should coalesce all the GSYM
2847  // & global symbols that have the same address.
2848 
2849  is_gsym = true;
2850  sym[sym_idx].SetExternal(true);
2851 
2852  if (symbol_name && symbol_name[0] == '_' &&
2853  symbol_name[1] == 'O') {
2854  llvm::StringRef symbol_name_ref(symbol_name);
2855  if (symbol_name_ref.startswith(
2856  g_objc_v2_prefix_class)) {
2857  symbol_name_non_abi_mangled = symbol_name + 1;
2858  symbol_name =
2859  symbol_name + g_objc_v2_prefix_class.size();
2860  type = eSymbolTypeObjCClass;
2861  demangled_is_synthesized = true;
2862 
2863  } else if (symbol_name_ref.startswith(
2864  g_objc_v2_prefix_metaclass)) {
2865  symbol_name_non_abi_mangled = symbol_name + 1;
2866  symbol_name =
2867  symbol_name + g_objc_v2_prefix_metaclass.size();
2868  type = eSymbolTypeObjCMetaClass;
2869  demangled_is_synthesized = true;
2870  } else if (symbol_name_ref.startswith(
2871  g_objc_v2_prefix_ivar)) {
2872  symbol_name_non_abi_mangled = symbol_name + 1;
2873  symbol_name =
2874  symbol_name + g_objc_v2_prefix_ivar.size();
2875  type = eSymbolTypeObjCIVar;
2876  demangled_is_synthesized = true;
2877  }
2878  } else {
2879  if (nlist.n_value != 0)
2880  symbol_section = section_info.GetSection(
2881  nlist.n_sect, nlist.n_value);
2882  type = eSymbolTypeData;
2883  }
2884  break;
2885 
2886  case N_FNAME:
2887  // procedure name (f77 kludge): name,,NO_SECT,0,0
2888  type = eSymbolTypeCompiler;
2889  break;
2890 
2891  case N_FUN:
2892  // procedure: name,,n_sect,linenumber,address
2893  if (symbol_name) {
2894  type = eSymbolTypeCode;
2895  symbol_section = section_info.GetSection(
2896  nlist.n_sect, nlist.n_value);
2897 
2898  N_FUN_addr_to_sym_idx.insert(
2899  std::make_pair(nlist.n_value, sym_idx));
2900  // We use the current number of symbols in the
2901  // symbol table in lieu of using nlist_idx in case
2902  // we ever start trimming entries out
2903  N_FUN_indexes.push_back(sym_idx);
2904  } else {
2905  type = eSymbolTypeCompiler;
2906 
2907  if (!N_FUN_indexes.empty()) {
2908  // Copy the size of the function into the
2909  // original
2910  // STAB entry so we don't have
2911  // to hunt for it later
2912  symtab.SymbolAtIndex(N_FUN_indexes.back())
2913  ->SetByteSize(nlist.n_value);
2914  N_FUN_indexes.pop_back();
2915  // We don't really need the end function STAB as
2916  // it contains the size which we already placed
2917  // with the original symbol, so don't add it if
2918  // we want a minimal symbol table
2919  add_nlist = false;
2920  }
2921  }
2922  break;
2923 
2924  case N_STSYM:
2925  // static symbol: name,,n_sect,type,address
2926  N_STSYM_addr_to_sym_idx.insert(
2927  std::make_pair(nlist.n_value, sym_idx));
2928  symbol_section = section_info.GetSection(nlist.n_sect,
2929  nlist.n_value);
2930  if (symbol_name && symbol_name[0]) {
2931  type = ObjectFile::GetSymbolTypeFromName(
2932  symbol_name + 1, eSymbolTypeData);
2933  }
2934  break;
2935 
2936  case N_LCSYM:
2937  // .lcomm symbol: name,,n_sect,type,address
2938  symbol_section = section_info.GetSection(nlist.n_sect,
2939  nlist.n_value);
2940  type = eSymbolTypeCommonBlock;
2941  break;
2942 
2943  case N_BNSYM:
2944  // We use the current number of symbols in the symbol
2945  // table in lieu of using nlist_idx in case we ever
2946  // start trimming entries out Skip these if we want
2947  // minimal symbol tables
2948  add_nlist = false;
2949  break;
2950 
2951  case N_ENSYM:
2952  // Set the size of the N_BNSYM to the terminating
2953  // index of this N_ENSYM so that we can always skip
2954  // the entire symbol if we need to navigate more
2955  // quickly at the source level when parsing STABS
2956  // Skip these if we want minimal symbol tables
2957  add_nlist = false;
2958  break;
2959 
2960  case N_OPT:
2961  // emitted with gcc2_compiled and in gcc source
2962  type = eSymbolTypeCompiler;
2963  break;
2964 
2965  case N_RSYM:
2966  // register sym: name,,NO_SECT,type,register
2967  type = eSymbolTypeVariable;
2968  break;
2969 
2970  case N_SLINE:
2971  // src line: 0,,n_sect,linenumber,address
2972  symbol_section = section_info.GetSection(nlist.n_sect,
2973  nlist.n_value);
2974  type = eSymbolTypeLineEntry;
2975  break;
2976 
2977  case N_SSYM:
2978  // structure elt: name,,NO_SECT,type,struct_offset
2979  type = eSymbolTypeVariableType;
2980  break;
2981 
2982  case N_SO:
2983  // source file name
2984  type = eSymbolTypeSourceFile;
2985  if (symbol_name == NULL) {
2986  add_nlist = false;
2987  if (N_SO_index != UINT32_MAX) {
2988  // Set the size of the N_SO to the terminating
2989  // index of this N_SO so that we can always skip
2990  // the entire N_SO if we need to navigate more
2991  // quickly at the source level when parsing STABS
2992  symbol_ptr = symtab.SymbolAtIndex(N_SO_index);
2993  symbol_ptr->SetByteSize(sym_idx);
2994  symbol_ptr->SetSizeIsSibling(true);
2995  }
2996  N_NSYM_indexes.clear();
2997  N_INCL_indexes.clear();
2998  N_BRAC_indexes.clear();
2999  N_COMM_indexes.clear();
3000  N_FUN_indexes.clear();
3001  N_SO_index = UINT32_MAX;
3002  } else {
3003  // We use the current number of symbols in the
3004  // symbol table in lieu of using nlist_idx in case
3005  // we ever start trimming entries out
3006  const bool N_SO_has_full_path = symbol_name[0] == '/';
3007  if (N_SO_has_full_path) {
3008  if ((N_SO_index == sym_idx - 1) &&
3009  ((sym_idx - 1) < num_syms)) {
3010  // We have two consecutive N_SO entries where
3011  // the first contains a directory and the
3012  // second contains a full path.
3013  sym[sym_idx - 1].GetMangled().SetValue(
3014  ConstString(symbol_name), false);
3015  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3016  add_nlist = false;
3017  } else {
3018  // This is the first entry in a N_SO that
3019  // contains a directory or
3020  // a full path to the source file
3021  N_SO_index = sym_idx;
3022  }
3023  } else if ((N_SO_index == sym_idx - 1) &&
3024  ((sym_idx - 1) < num_syms)) {
3025  // This is usually the second N_SO entry that
3026  // contains just the filename, so here we combine
3027  // it with the first one if we are minimizing the
3028  // symbol table
3029  const char *so_path = sym[sym_idx - 1]
3030  .GetMangled()
3031  .GetDemangledName()
3032  .AsCString();
3033  if (so_path && so_path[0]) {
3034  std::string full_so_path(so_path);
3035  const size_t double_slash_pos =
3036  full_so_path.find("//");
3037  if (double_slash_pos != std::string::npos) {
3038  // The linker has been generating bad N_SO
3039  // entries with doubled up paths
3040  // in the format "%s%s" where the first
3041  // string in the DW_AT_comp_dir, and the
3042  // second is the directory for the source
3043  // file so you end up with a path that looks
3044  // like "/tmp/src//tmp/src/"
3045  FileSpec so_dir(so_path);
3046  if (!FileSystem::Instance().Exists(so_dir)) {
3047  so_dir.SetFile(
3048  &full_so_path[double_slash_pos + 1],
3049  FileSpec::Style::native);
3050  if (FileSystem::Instance().Exists(so_dir)) {
3051  // Trim off the incorrect path
3052  full_so_path.erase(0, double_slash_pos + 1);
3053  }
3054  }
3055  }
3056  if (*full_so_path.rbegin() != '/')
3057  full_so_path += '/';
3058  full_so_path += symbol_name;
3059  sym[sym_idx - 1].GetMangled().SetValue(
3060  ConstString(full_so_path.c_str()), false);
3061  add_nlist = false;
3062  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3063  }
3064  } else {
3065  // This could be a relative path to a N_SO
3066  N_SO_index = sym_idx;
3067  }
3068  }
3069  break;
3070 
3071  case N_OSO:
3072  // object file name: name,,0,0,st_mtime
3073  type = eSymbolTypeObjectFile;
3074  break;
3075 
3076  case N_LSYM:
3077  // local sym: name,,NO_SECT,type,offset
3078  type = eSymbolTypeLocal;
3079  break;
3080 
3081  // INCL scopes
3082  case N_BINCL:
3083  // include file beginning: name,,NO_SECT,0,sum We use
3084  // the current number of symbols in the symbol table
3085  // in lieu of using nlist_idx in case we ever start
3086  // trimming entries out
3087  N_INCL_indexes.push_back(sym_idx);
3088  type = eSymbolTypeScopeBegin;
3089  break;
3090 
3091  case N_EINCL:
3092  // include file end: name,,NO_SECT,0,0
3093  // Set the size of the N_BINCL to the terminating
3094  // index of this N_EINCL so that we can always skip
3095  // the entire symbol if we need to navigate more
3096  // quickly at the source level when parsing STABS
3097  if (!N_INCL_indexes.empty()) {
3098  symbol_ptr =
3099  symtab.SymbolAtIndex(N_INCL_indexes.back());
3100  symbol_ptr->SetByteSize(sym_idx + 1);
3101  symbol_ptr->SetSizeIsSibling(true);
3102  N_INCL_indexes.pop_back();
3103  }
3104  type = eSymbolTypeScopeEnd;
3105  break;
3106 
3107  case N_SOL:
3108  // #included file name: name,,n_sect,0,address
3109  type = eSymbolTypeHeaderFile;
3110 
3111  // We currently don't use the header files on darwin
3112  add_nlist = false;
3113  break;
3114 
3115  case N_PARAMS:
3116  // compiler parameters: name,,NO_SECT,0,0
3117  type = eSymbolTypeCompiler;
3118  break;
3119 
3120  case N_VERSION:
3121  // compiler version: name,,NO_SECT,0,0
3122  type = eSymbolTypeCompiler;
3123  break;
3124 
3125  case N_OLEVEL:
3126  // compiler -O level: name,,NO_SECT,0,0
3127  type = eSymbolTypeCompiler;
3128  break;
3129 
3130  case N_PSYM:
3131  // parameter: name,,NO_SECT,type,offset
3132  type = eSymbolTypeVariable;
3133  break;
3134 
3135  case N_ENTRY:
3136  // alternate entry: name,,n_sect,linenumber,address
3137  symbol_section = section_info.GetSection(nlist.n_sect,
3138  nlist.n_value);
3139  type = eSymbolTypeLineEntry;
3140  break;
3141 
3142  // Left and Right Braces
3143  case N_LBRAC:
3144  // left bracket: 0,,NO_SECT,nesting level,address We
3145  // use the current number of symbols in the symbol
3146  // table in lieu of using nlist_idx in case we ever
3147  // start trimming entries out
3148  symbol_section = section_info.GetSection(nlist.n_sect,
3149  nlist.n_value);
3150  N_BRAC_indexes.push_back(sym_idx);
3151  type = eSymbolTypeScopeBegin;
3152  break;
3153 
3154  case N_RBRAC:
3155  // right bracket: 0,,NO_SECT,nesting level,address
3156  // Set the size of the N_LBRAC to the terminating
3157  // index of this N_RBRAC so that we can always skip
3158  // the entire symbol if we need to navigate more
3159  // quickly at the source level when parsing STABS
3160  symbol_section = section_info.GetSection(nlist.n_sect,
3161  nlist.n_value);
3162  if (!N_BRAC_indexes.empty()) {
3163  symbol_ptr =
3164  symtab.SymbolAtIndex(N_BRAC_indexes.back());
3165  symbol_ptr->SetByteSize(sym_idx + 1);
3166  symbol_ptr->SetSizeIsSibling(true);
3167  N_BRAC_indexes.pop_back();
3168  }
3169  type = eSymbolTypeScopeEnd;
3170  break;
3171 
3172  case N_EXCL:
3173  // deleted include file: name,,NO_SECT,0,sum
3174  type = eSymbolTypeHeaderFile;
3175  break;
3176 
3177  // COMM scopes
3178  case N_BCOMM:
3179  // begin common: name,,NO_SECT,0,0
3180  // We use the current number of symbols in the symbol
3181  // table in lieu of using nlist_idx in case we ever
3182  // start trimming entries out
3183  type = eSymbolTypeScopeBegin;
3184  N_COMM_indexes.push_back(sym_idx);
3185  break;
3186 
3187  case N_ECOML:
3188  // end common (local name): 0,,n_sect,0,address
3189  symbol_section = section_info.GetSection(nlist.n_sect,
3190  nlist.n_value);
3191  // Fall through
3192 
3193  case N_ECOMM:
3194  // end common: name,,n_sect,0,0
3195  // Set the size of the N_BCOMM to the terminating
3196  // index of this N_ECOMM/N_ECOML so that we can
3197  // always skip the entire symbol if we need to
3198  // navigate more quickly at the source level when
3199  // parsing STABS
3200  if (!N_COMM_indexes.empty()) {
3201  symbol_ptr =
3202  symtab.SymbolAtIndex(N_COMM_indexes.back());
3203  symbol_ptr->SetByteSize(sym_idx + 1);
3204  symbol_ptr->SetSizeIsSibling(true);
3205  N_COMM_indexes.pop_back();
3206  }
3207  type = eSymbolTypeScopeEnd;
3208  break;
3209 
3210  case N_LENG:
3211  // second stab entry with length information
3212  type = eSymbolTypeAdditional;
3213  break;
3214 
3215  default:
3216  break;
3217  }
3218  } else {
3219  // uint8_t n_pext = N_PEXT & nlist.n_type;
3220  uint8_t n_type = N_TYPE & nlist.n_type;
3221  sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
3222 
3223  switch (n_type) {
3224  case N_INDR: {
3225  const char *reexport_name_cstr =
3226  strtab_data.PeekCStr(nlist.n_value);
3227  if (reexport_name_cstr && reexport_name_cstr[0]) {
3228  type = eSymbolTypeReExported;
3229  ConstString reexport_name(
3230  reexport_name_cstr +
3231  ((reexport_name_cstr[0] == '_') ? 1 : 0));
3232  sym[sym_idx].SetReExportedSymbolName(reexport_name);
3233  set_value = false;
3234  reexport_shlib_needs_fixup[sym_idx] = reexport_name;
3235  indirect_symbol_names.insert(ConstString(
3236  symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
3237  } else
3238  type = eSymbolTypeUndefined;
3239  } break;
3240 
3241  case N_UNDF:
3242  if (symbol_name && symbol_name[0]) {
3243  ConstString undefined_name(
3244  symbol_name + ((symbol_name[0] == '_') ? 1 : 0));
3245  undefined_name_to_desc[undefined_name] = nlist.n_desc;
3246  }
3247  // Fall through
3248  case N_PBUD:
3249  type = eSymbolTypeUndefined;
3250  break;
3251 
3252  case N_ABS:
3253  type = eSymbolTypeAbsolute;
3254  break;
3255 
3256  case N_SECT: {
3257  symbol_section = section_info.GetSection(nlist.n_sect,
3258  nlist.n_value);
3259 
3260  if (symbol_section == NULL) {
3261  // TODO: warn about this?
3262  add_nlist = false;
3263  break;
3264  }
3265 
3266  if (TEXT_eh_frame_sectID == nlist.n_sect) {
3267  type = eSymbolTypeException;
3268  } else {
3269  uint32_t section_type =
3270  symbol_section->Get() & SECTION_TYPE;
3271 
3272  switch (section_type) {
3273  case S_CSTRING_LITERALS:
3274  type = eSymbolTypeData;
3275  break; // section with only literal C strings
3276  case S_4BYTE_LITERALS:
3277  type = eSymbolTypeData;
3278  break; // section with only 4 byte literals
3279  case S_8BYTE_LITERALS:
3280  type = eSymbolTypeData;
3281  break; // section with only 8 byte literals
3282  case S_LITERAL_POINTERS:
3283  type = eSymbolTypeTrampoline;
3284  break; // section with only pointers to literals
3285  case S_NON_LAZY_SYMBOL_POINTERS:
3286  type = eSymbolTypeTrampoline;
3287  break; // section with only non-lazy symbol
3288  // pointers
3289  case S_LAZY_SYMBOL_POINTERS:
3290  type = eSymbolTypeTrampoline;
3291  break; // section with only lazy symbol pointers
3292  case S_SYMBOL_STUBS:
3293  type = eSymbolTypeTrampoline;
3294  break; // section with only symbol stubs, byte
3295  // size of stub in the reserved2 field
3296  case S_MOD_INIT_FUNC_POINTERS:
3297  type = eSymbolTypeCode;
3298  break; // section with only function pointers for
3299  // initialization
3300  case S_MOD_TERM_FUNC_POINTERS:
3301  type = eSymbolTypeCode;
3302  break; // section with only function pointers for
3303  // termination
3304  case S_INTERPOSING:
3305  type = eSymbolTypeTrampoline;
3306  break; // section with only pairs of function
3307  // pointers for interposing
3308  case S_16BYTE_LITERALS:
3309  type = eSymbolTypeData;
3310  break; // section with only 16 byte literals
3311  case S_DTRACE_DOF:
3313  break;
3314  case S_LAZY_DYLIB_SYMBOL_POINTERS:
3315  type = eSymbolTypeTrampoline;
3316  break;
3317  default:
3318  switch (symbol_section->GetType()) {
3320  type = eSymbolTypeCode;
3321  break;
3322  case eSectionTypeData:
3323  case eSectionTypeDataCString: // Inlined C string
3324  // data
3325  case eSectionTypeDataCStringPointers: // Pointers
3326  // to C
3327  // string
3328  // data
3329  case eSectionTypeDataSymbolAddress: // Address of
3330  // a symbol in
3331  // the symbol
3332  // table
3333  case eSectionTypeData4:
3334  case eSectionTypeData8:
3335  case eSectionTypeData16:
3336  type = eSymbolTypeData;
3337  break;
3338  default:
3339  break;
3340  }
3341  break;
3342  }
3343 
3344  if (type == eSymbolTypeInvalid) {
3345  const char *symbol_sect_name =
3346  symbol_section->GetName().AsCString();
3347  if (symbol_section->IsDescendant(
3348  text_section_sp.get())) {
3349  if (symbol_section->IsClear(
3350  S_ATTR_PURE_INSTRUCTIONS |
3351  S_ATTR_SELF_MODIFYING_CODE |
3352  S_ATTR_SOME_INSTRUCTIONS))
3353  type = eSymbolTypeData;
3354  else
3355  type = eSymbolTypeCode;
3356  } else if (symbol_section->IsDescendant(
3357  data_section_sp.get()) ||
3358  symbol_section->IsDescendant(
3359  data_dirty_section_sp.get()) ||
3360  symbol_section->IsDescendant(
3361  data_const_section_sp.get())) {
3362  if (symbol_sect_name &&
3363  ::strstr(symbol_sect_name, "__objc") ==
3364  symbol_sect_name) {
3365  type = eSymbolTypeRuntime;
3366 
3367  if (symbol_name) {
3368  llvm::StringRef symbol_name_ref(symbol_name);
3369  if (symbol_name_ref.startswith("_OBJC_")) {
3370  llvm::StringRef
3371  g_objc_v2_prefix_class(
3372  "_OBJC_CLASS_$_");
3373  llvm::StringRef
3374  g_objc_v2_prefix_metaclass(
3375  "_OBJC_METACLASS_$_");
3376  llvm::StringRef
3377  g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
3378  if (symbol_name_ref.startswith(
3379  g_objc_v2_prefix_class)) {
3380  symbol_name_non_abi_mangled =
3381  symbol_name + 1;
3382  symbol_name =
3383  symbol_name +
3384  g_objc_v2_prefix_class.size();
3385  type = eSymbolTypeObjCClass;
3386  demangled_is_synthesized = true;
3387  } else if (
3388  symbol_name_ref.startswith(
3389  g_objc_v2_prefix_metaclass)) {
3390  symbol_name_non_abi_mangled =
3391  symbol_name + 1;
3392  symbol_name =
3393  symbol_name +
3394  g_objc_v2_prefix_metaclass.size();
3395  type = eSymbolTypeObjCMetaClass;
3396  demangled_is_synthesized = true;
3397  } else if (symbol_name_ref.startswith(
3398  g_objc_v2_prefix_ivar)) {
3399  symbol_name_non_abi_mangled =
3400  symbol_name + 1;
3401  symbol_name =
3402  symbol_name +
3403  g_objc_v2_prefix_ivar.size();
3404  type = eSymbolTypeObjCIVar;
3405  demangled_is_synthesized = true;
3406  }
3407  }
3408  }
3409  } else if (symbol_sect_name &&
3410  ::strstr(symbol_sect_name,
3411  "__gcc_except_tab") ==
3412  symbol_sect_name) {
3413  type = eSymbolTypeException;
3414  } else {
3415  type = eSymbolTypeData;
3416  }
3417  } else if (symbol_sect_name &&
3418  ::strstr(symbol_sect_name, "__IMPORT") ==
3419  symbol_sect_name) {
3420  type = eSymbolTypeTrampoline;
3421  } else if (symbol_section->IsDescendant(
3422  objc_section_sp.get())) {
3423  type = eSymbolTypeRuntime;
3424  if (symbol_name && symbol_name[0] == '.') {
3425  llvm::StringRef symbol_name_ref(symbol_name);
3426  llvm::StringRef
3427  g_objc_v1_prefix_class(".objc_class_name_");
3428  if (symbol_name_ref.startswith(
3429  g_objc_v1_prefix_class)) {
3430  symbol_name_non_abi_mangled = symbol_name;
3431  symbol_name = symbol_name +
3432  g_objc_v1_prefix_class.size();
3433  type = eSymbolTypeObjCClass;
3434  demangled_is_synthesized = true;
3435  }
3436  }
3437  }
3438  }
3439  }
3440  } break;
3441  }
3442  }
3443 
3444  if (add_nlist) {
3445  uint64_t symbol_value = nlist.n_value;
3446  if (symbol_name_non_abi_mangled) {
3447  sym[sym_idx].GetMangled().SetMangledName(
3448  ConstString(symbol_name_non_abi_mangled));
3449  sym[sym_idx].GetMangled().SetDemangledName(
3450  ConstString(symbol_name));
3451  } else {
3452  bool symbol_name_is_mangled = false;
3453 
3454  if (symbol_name && symbol_name[0] == '_') {
3455  symbol_name_is_mangled = symbol_name[1] == '_';
3456  symbol_name++; // Skip the leading underscore
3457  }
3458 
3459  if (symbol_name) {
3460  ConstString const_symbol_name(symbol_name);
3461  sym[sym_idx].GetMangled().SetValue(
3462  const_symbol_name, symbol_name_is_mangled);
3463  if (is_gsym && is_debug) {
3464  const char *gsym_name =
3465  sym[sym_idx]
3466  .GetMangled()
3467  .GetName(Mangled::ePreferMangled)
3468  .GetCString();
3469  if (gsym_name)
3470  N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
3471  }
3472  }
3473  }
3474  if (symbol_section) {
3475  const addr_t section_file_addr =
3476  symbol_section->GetFileAddress();
3477  if (symbol_byte_size == 0 &&
3478  function_starts_count > 0) {
3479  addr_t symbol_lookup_file_addr = nlist.n_value;
3480  // Do an exact address match for non-ARM addresses,
3481  // else get the closest since the symbol might be a
3482  // thumb symbol which has an address with bit zero
3483  // set
3484  FunctionStarts::Entry *func_start_entry =
3485  function_starts.FindEntry(symbol_lookup_file_addr,
3486  !is_arm);
3487  if (is_arm && func_start_entry) {
3488  // Verify that the function start address is the
3489  // symbol address (ARM) or the symbol address + 1
3490  // (thumb)
3491  if (func_start_entry->addr !=
3492  symbol_lookup_file_addr &&
3493  func_start_entry->addr !=
3494  (symbol_lookup_file_addr + 1)) {
3495  // Not the right entry, NULL it out...
3496  func_start_entry = NULL;
3497  }
3498  }
3499  if (func_start_entry) {
3500  func_start_entry->data = true;
3501 
3502  addr_t symbol_file_addr = func_start_entry->addr;
3503  uint32_t symbol_flags = 0;
3504  if (is_arm) {
3505  if (symbol_file_addr & 1)
3506  symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
3507  symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3508  }
3509 
3510  const FunctionStarts::Entry *next_func_start_entry =
3511  function_starts.FindNextEntry(func_start_entry);
3512  const addr_t section_end_file_addr =
3513  section_file_addr +
3514  symbol_section->GetByteSize();
3515  if (next_func_start_entry) {
3516  addr_t next_symbol_file_addr =
3517  next_func_start_entry->addr;
3518  // Be sure the clear the Thumb address bit when
3519  // we calculate the size from the current and
3520  // next address
3521  if (is_arm)
3522  next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3523  symbol_byte_size = std::min<lldb::addr_t>(
3524  next_symbol_file_addr - symbol_file_addr,
3525  section_end_file_addr - symbol_file_addr);
3526  } else {
3527  symbol_byte_size =
3528  section_end_file_addr - symbol_file_addr;
3529  }
3530  }
3531  }
3532  symbol_value -= section_file_addr;
3533  }
3534 
3535  if (is_debug == false) {
3536  if (type == eSymbolTypeCode) {
3537  // See if we can find a N_FUN entry for any code
3538  // symbols. If we do find a match, and the name
3539  // matches, then we can merge the two into just the
3540  // function symbol to avoid duplicate entries in
3541  // the symbol table
3542  auto range =
3543  N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
3544  if (range.first != range.second) {
3545  bool found_it = false;
3546  for (auto pos = range.first; pos != range.second;
3547  ++pos) {
3548  if (sym[sym_idx].GetMangled().GetName(
3549  Mangled::ePreferMangled) ==
3550  sym[pos->second].GetMangled().GetName(
3551  Mangled::ePreferMangled)) {
3552  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3553  // We just need the flags from the linker
3554  // symbol, so put these flags
3555  // into the N_FUN flags to avoid duplicate
3556  // symbols in the symbol table
3557  sym[pos->second].SetExternal(
3558  sym[sym_idx].IsExternal());
3559  sym[pos->second].SetFlags(nlist.n_type << 16 |
3560  nlist.n_desc);
3561  if (resolver_addresses.find(nlist.n_value) !=
3562  resolver_addresses.end())
3563  sym[pos->second].SetType(eSymbolTypeResolver);
3564  sym[sym_idx].Clear();
3565  found_it = true;
3566  break;
3567  }
3568  }
3569  if (found_it)
3570  continue;
3571  } else {
3572  if (resolver_addresses.find(nlist.n_value) !=
3573  resolver_addresses.end())
3574  type = eSymbolTypeResolver;
3575  }
3576  } else if (type == eSymbolTypeData ||
3577  type == eSymbolTypeObjCClass ||
3578  type == eSymbolTypeObjCMetaClass ||
3579  type == eSymbolTypeObjCIVar) {
3580  // See if we can find a N_STSYM entry for any data
3581  // symbols. If we do find a match, and the name
3582  // matches, then we can merge the two into just the
3583  // Static symbol to avoid duplicate entries in the
3584  // symbol table
3585  auto range = N_STSYM_addr_to_sym_idx.equal_range(
3586  nlist.n_value);
3587  if (range.first != range.second) {
3588  bool found_it = false;
3589  for (auto pos = range.first; pos != range.second;
3590  ++pos) {
3591  if (sym[sym_idx].GetMangled().GetName(
3592  Mangled::ePreferMangled) ==
3593  sym[pos->second].GetMangled().GetName(
3594  Mangled::ePreferMangled)) {
3595  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3596  // We just need the flags from the linker
3597  // symbol, so put these flags
3598  // into the N_STSYM flags to avoid duplicate
3599  // symbols in the symbol table
3600  sym[pos->second].SetExternal(
3601  sym[sym_idx].IsExternal());
3602  sym[pos->second].SetFlags(nlist.n_type << 16 |
3603  nlist.n_desc);
3604  sym[sym_idx].Clear();
3605  found_it = true;
3606  break;
3607  }
3608  }
3609  if (found_it)
3610  continue;
3611  } else {
3612  const char *gsym_name =
3613  sym[sym_idx]
3614  .GetMangled()
3615  .GetName(Mangled::ePreferMangled)
3616  .GetCString();
3617  if (gsym_name) {
3618  // Combine N_GSYM stab entries with the non
3619  // stab symbol
3620  ConstNameToSymbolIndexMap::const_iterator pos =
3621  N_GSYM_name_to_sym_idx.find(gsym_name);
3622  if (pos != N_GSYM_name_to_sym_idx.end()) {
3623  const uint32_t GSYM_sym_idx = pos->second;
3624  m_nlist_idx_to_sym_idx[nlist_idx] =
3625  GSYM_sym_idx;
3626  // Copy the address, because often the N_GSYM
3627  // address has an invalid address of zero
3628  // when the global is a common symbol
3629  sym[GSYM_sym_idx].GetAddressRef().SetSection(
3630  symbol_section);
3631  sym[GSYM_sym_idx].GetAddressRef().SetOffset(
3632  symbol_value);
3633  add_symbol_addr(sym[GSYM_sym_idx]
3634  .GetAddress()
3635  .GetFileAddress());
3636  // We just need the flags from the linker
3637  // symbol, so put these flags
3638  // into the N_GSYM flags to avoid duplicate
3639  // symbols in the symbol table
3640  sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 |
3641  nlist.n_desc);
3642  sym[sym_idx].Clear();
3643  continue;
3644  }
3645  }
3646  }
3647  }
3648  }
3649 
3650  sym[sym_idx].SetID(nlist_idx);
3651  sym[sym_idx].SetType(type);
3652  if (set_value) {
3653  sym[sym_idx].GetAddressRef().SetSection(symbol_section);
3654  sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
3655  add_symbol_addr(
3656  sym[sym_idx].GetAddress().GetFileAddress());
3657  }
3658  sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
3659 
3660  if (symbol_byte_size > 0)
3661  sym[sym_idx].SetByteSize(symbol_byte_size);
3662 
3663  if (demangled_is_synthesized)
3664  sym[sym_idx].SetDemangledNameIsSynthesized(true);
3665  ++sym_idx;
3666  } else {
3667  sym[sym_idx].Clear();
3668  }
3669  }
3670  /////////////////////////////
3671  }
3672  }
3673 
3674  for (const auto &pos : reexport_shlib_needs_fixup) {
3675  const auto undef_pos = undefined_name_to_desc.find(pos.second);
3676  if (undef_pos != undefined_name_to_desc.end()) {
3677  const uint8_t dylib_ordinal =
3678  llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
3679  if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
3680  sym[pos.first].SetReExportedSymbolSharedLibrary(
3681  dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
3682  }
3683  }
3684  }
3685 
3686 #endif
3687  lldb::offset_t nlist_data_offset = 0;
3688 
3689  if (nlist_data.GetByteSize() > 0) {
3690 
3691  // If the sym array was not created while parsing the DSC unmapped
3692  // symbols, create it now.
3693  if (sym == nullptr) {
3694  sym =
3695  symtab.Resize(symtab_load_command.nsyms + m_dysymtab.nindirectsyms);
3696  num_syms = symtab.GetNumSymbols();
3697  }
3698 
3699  if (unmapped_local_symbols_found) {
3700  assert(m_dysymtab.ilocalsym == 0);
3701  nlist_data_offset += (m_dysymtab.nlocalsym * nlist_byte_size);
3702  nlist_idx = m_dysymtab.nlocalsym;
3703  } else {
3704  nlist_idx = 0;
3705  }
3706 
3707  typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
3708  typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
3709  UndefinedNameToDescMap undefined_name_to_desc;
3710  SymbolIndexToName reexport_shlib_needs_fixup;
3711 
3712  // Symtab parsing is a huge mess. Everything is entangled and the code
3713  // requires access to a ridiculous amount of variables. LLDB depends
3714  // heavily on the proper merging of symbols and to get that right we need
3715  // to make sure we have parsed all the debug symbols first. Therefore we
3716  // invoke the lambda twice, once to parse only the debug symbols and then
3717  // once more to parse the remaining symbols.
3718  auto ParseSymbolLambda = [&](struct nlist_64 &nlist, uint32_t nlist_idx,
3719  bool debug_only) {
3720  const bool is_debug = ((nlist.n_type & N_STAB) != 0);
3721  if (is_debug != debug_only)
3722  return true;
3723 
3724  const char *symbol_name_non_abi_mangled = nullptr;
3725  const char *symbol_name = nullptr;
3726 
3727  if (have_strtab_data) {
3728  symbol_name = strtab_data.PeekCStr(nlist.n_strx);
3729 
3730  if (symbol_name == nullptr) {
3731  // No symbol should be NULL, even the symbols with no string values
3732  // should have an offset zero which points to an empty C-string
3733  Host::SystemLog(Host::eSystemLogError,
3734  "error: symbol[%u] has invalid string table offset "
3735  "0x%x in %s, ignoring symbol\n",
3736  nlist_idx, nlist.n_strx,
3737  module_sp->GetFileSpec().GetPath().c_str());
3738  return true;
3739  }
3740  if (symbol_name[0] == '\0')
3741  symbol_name = nullptr;
3742  } else {
3743  const addr_t str_addr = strtab_addr + nlist.n_strx;
3744  Status str_error;
3745  if (process->ReadCStringFromMemory(str_addr, memory_symbol_name,
3746  str_error))
3747  symbol_name = memory_symbol_name.c_str();
3748  }
3749 
3751  SectionSP symbol_section;
3752  lldb::addr_t symbol_byte_size = 0;
3753  bool add_nlist = true;
3754  bool is_gsym = false;
3755  bool demangled_is_synthesized = false;
3756  bool set_value = true;
3757 
3758  assert(sym_idx < num_syms);
3759  sym[sym_idx].SetDebug(is_debug);
3760 
3761  if (is_debug) {
3762  switch (nlist.n_type) {
3763  case N_GSYM:
3764  // global symbol: name,,NO_SECT,type,0
3765  // Sometimes the N_GSYM value contains the address.
3766 
3767  // FIXME: In the .o files, we have a GSYM and a debug symbol for all
3768  // the ObjC data. They
3769  // have the same address, but we want to ensure that we always find
3770  // only the real symbol, 'cause we don't currently correctly
3771  // attribute the GSYM one to the ObjCClass/Ivar/MetaClass symbol
3772  // type. This is a temporary hack to make sure the ObjectiveC
3773  // symbols get treated correctly. To do this right, we should
3774  // coalesce all the GSYM & global symbols that have the same
3775  // address.
3776  is_gsym = true;
3777  sym[sym_idx].SetExternal(true);
3778 
3779  if (symbol_name && symbol_name[0] == '_' && symbol_name[1] == 'O') {
3780  llvm::StringRef symbol_name_ref(symbol_name);
3781  if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
3782  symbol_name_non_abi_mangled = symbol_name + 1;
3783  symbol_name = symbol_name + g_objc_v2_prefix_class.size();
3784  type = eSymbolTypeObjCClass;
3785  demangled_is_synthesized = true;
3786 
3787  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_metaclass)) {
3788  symbol_name_non_abi_mangled = symbol_name + 1;
3789  symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
3790  type = eSymbolTypeObjCMetaClass;
3791  demangled_is_synthesized = true;
3792  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_ivar)) {
3793  symbol_name_non_abi_mangled = symbol_name + 1;
3794  symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
3795  type = eSymbolTypeObjCIVar;
3796  demangled_is_synthesized = true;
3797  }
3798  } else {
3799  if (nlist.n_value != 0)
3800  symbol_section =
3801  section_info.GetSection(nlist.n_sect, nlist.n_value);
3802  type = eSymbolTypeData;
3803  }
3804  break;
3805 
3806  case N_FNAME:
3807  // procedure name (f77 kludge): name,,NO_SECT,0,0
3808  type = eSymbolTypeCompiler;
3809  break;
3810 
3811  case N_FUN:
3812  // procedure: name,,n_sect,linenumber,address
3813  if (symbol_name) {
3814  type = eSymbolTypeCode;
3815  symbol_section =
3816  section_info.GetSection(nlist.n_sect, nlist.n_value);
3817 
3818  N_FUN_addr_to_sym_idx.insert(
3819  std::make_pair(nlist.n_value, sym_idx));
3820  // We use the current number of symbols in the symbol table in
3821  // lieu of using nlist_idx in case we ever start trimming entries
3822  // out
3823  N_FUN_indexes.push_back(sym_idx);
3824  } else {
3825  type = eSymbolTypeCompiler;
3826 
3827  if (!N_FUN_indexes.empty()) {
3828  // Copy the size of the function into the original STAB entry
3829  // so we don't have to hunt for it later
3830  symtab.SymbolAtIndex(N_FUN_indexes.back())
3831  ->SetByteSize(nlist.n_value);
3832  N_FUN_indexes.pop_back();
3833  // We don't really need the end function STAB as it contains
3834  // the size which we already placed with the original symbol,
3835  // so don't add it if we want a minimal symbol table
3836  add_nlist = false;
3837  }
3838  }
3839  break;
3840 
3841  case N_STSYM:
3842  // static symbol: name,,n_sect,type,address
3843  N_STSYM_addr_to_sym_idx.insert(
3844  std::make_pair(nlist.n_value, sym_idx));
3845  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3846  if (symbol_name && symbol_name[0]) {
3847  type = ObjectFile::GetSymbolTypeFromName(symbol_name + 1,
3848  eSymbolTypeData);
3849  }
3850  break;
3851 
3852  case N_LCSYM:
3853  // .lcomm symbol: name,,n_sect,type,address
3854  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3855  type = eSymbolTypeCommonBlock;
3856  break;
3857 
3858  case N_BNSYM:
3859  // We use the current number of symbols in the symbol table in lieu
3860  // of using nlist_idx in case we ever start trimming entries out
3861  // Skip these if we want minimal symbol tables
3862  add_nlist = false;
3863  break;
3864 
3865  case N_ENSYM:
3866  // Set the size of the N_BNSYM to the terminating index of this
3867  // N_ENSYM so that we can always skip the entire symbol if we need
3868  // to navigate more quickly at the source level when parsing STABS
3869  // Skip these if we want minimal symbol tables
3870  add_nlist = false;
3871  break;
3872 
3873  case N_OPT:
3874  // emitted with gcc2_compiled and in gcc source
3875  type = eSymbolTypeCompiler;
3876  break;
3877 
3878  case N_RSYM:
3879  // register sym: name,,NO_SECT,type,register
3880  type = eSymbolTypeVariable;
3881  break;
3882 
3883  case N_SLINE:
3884  // src line: 0,,n_sect,linenumber,address
3885  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3886  type = eSymbolTypeLineEntry;
3887  break;
3888 
3889  case N_SSYM:
3890  // structure elt: name,,NO_SECT,type,struct_offset
3891  type = eSymbolTypeVariableType;
3892  break;
3893 
3894  case N_SO:
3895  // source file name
3896  type = eSymbolTypeSourceFile;
3897  if (symbol_name == nullptr) {
3898  add_nlist = false;
3899  if (N_SO_index != UINT32_MAX) {
3900  // Set the size of the N_SO to the terminating index of this
3901  // N_SO so that we can always skip the entire N_SO if we need
3902  // to navigate more quickly at the source level when parsing
3903  // STABS
3904  symbol_ptr = symtab.SymbolAtIndex(N_SO_index);
3905  symbol_ptr->SetByteSize(sym_idx);
3906  symbol_ptr->SetSizeIsSibling(true);
3907  }
3908  N_NSYM_indexes.clear();
3909  N_INCL_indexes.clear();
3910  N_BRAC_indexes.clear();
3911  N_COMM_indexes.clear();
3912  N_FUN_indexes.clear();
3913  N_SO_index = UINT32_MAX;
3914  } else {
3915  // We use the current number of symbols in the symbol table in
3916  // lieu of using nlist_idx in case we ever start trimming entries
3917  // out
3918  const bool N_SO_has_full_path = symbol_name[0] == '/';
3919  if (N_SO_has_full_path) {
3920  if ((N_SO_index == sym_idx - 1) && ((sym_idx - 1) < num_syms)) {
3921  // We have two consecutive N_SO entries where the first
3922  // contains a directory and the second contains a full path.
3923  sym[sym_idx - 1].GetMangled().SetValue(ConstString(symbol_name),
3924  false);
3925  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3926  add_nlist = false;
3927  } else {
3928  // This is the first entry in a N_SO that contains a
3929  // directory or a full path to the source file
3930  N_SO_index = sym_idx;
3931  }
3932  } else if ((N_SO_index == sym_idx - 1) &&
3933  ((sym_idx - 1) < num_syms)) {
3934  // This is usually the second N_SO entry that contains just the
3935  // filename, so here we combine it with the first one if we are
3936  // minimizing the symbol table
3937  const char *so_path =
3938  sym[sym_idx - 1].GetMangled().GetDemangledName().AsCString();
3939  if (so_path && so_path[0]) {
3940  std::string full_so_path(so_path);
3941  const size_t double_slash_pos = full_so_path.find("//");
3942  if (double_slash_pos != std::string::npos) {
3943  // The linker has been generating bad N_SO entries with
3944  // doubled up paths in the format "%s%s" where the first
3945  // string in the DW_AT_comp_dir, and the second is the
3946  // directory for the source file so you end up with a path
3947  // that looks like "/tmp/src//tmp/src/"
3948  FileSpec so_dir(so_path);
3949  if (!FileSystem::Instance().Exists(so_dir)) {
3950  so_dir.SetFile(&full_so_path[double_slash_pos + 1],
3951  FileSpec::Style::native);
3952  if (FileSystem::Instance().Exists(so_dir)) {
3953  // Trim off the incorrect path
3954  full_so_path.erase(0, double_slash_pos + 1);
3955  }
3956  }
3957  }
3958  if (*full_so_path.rbegin() != '/')
3959  full_so_path += '/';
3960  full_so_path += symbol_name;
3961  sym[sym_idx - 1].GetMangled().SetValue(
3962  ConstString(full_so_path.c_str()), false);
3963  add_nlist = false;
3964  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3965  }
3966  } else {
3967  // This could be a relative path to a N_SO
3968  N_SO_index = sym_idx;
3969  }
3970  }
3971  break;
3972 
3973  case N_OSO:
3974  // object file name: name,,0,0,st_mtime
3975  type = eSymbolTypeObjectFile;
3976  break;
3977 
3978  case N_LSYM:
3979  // local sym: name,,NO_SECT,type,offset
3980  type = eSymbolTypeLocal;
3981  break;
3982 
3983  // INCL scopes
3984  case N_BINCL:
3985  // include file beginning: name,,NO_SECT,0,sum We use the current
3986  // number of symbols in the symbol table in lieu of using nlist_idx
3987  // in case we ever start trimming entries out
3988  N_INCL_indexes.push_back(sym_idx);
3989  type = eSymbolTypeScopeBegin;
3990  break;
3991 
3992  case N_EINCL:
3993  // include file end: name,,NO_SECT,0,0
3994  // Set the size of the N_BINCL to the terminating index of this
3995  // N_EINCL so that we can always skip the entire symbol if we need
3996  // to navigate more quickly at the source level when parsing STABS
3997  if (!N_INCL_indexes.empty()) {
3998  symbol_ptr = symtab.SymbolAtIndex(N_INCL_indexes.back());
3999  symbol_ptr->SetByteSize(sym_idx + 1);
4000  symbol_ptr->SetSizeIsSibling(true);
4001  N_INCL_indexes.pop_back();
4002  }
4003  type = eSymbolTypeScopeEnd;
4004  break;
4005 
4006  case N_SOL:
4007  // #included file name: name,,n_sect,0,address
4008  type = eSymbolTypeHeaderFile;
4009 
4010  // We currently don't use the header files on darwin
4011  add_nlist = false;
4012  break;
4013 
4014  case N_PARAMS:
4015  // compiler parameters: name,,NO_SECT,0,0
4016  type = eSymbolTypeCompiler;
4017  break;
4018 
4019  case N_VERSION:
4020  // compiler version: name,,NO_SECT,0,0
4021  type = eSymbolTypeCompiler;
4022  break;
4023 
4024  case N_OLEVEL:
4025  // compiler -O level: name,,NO_SECT,0,0
4026  type = eSymbolTypeCompiler;
4027  break;
4028 
4029  case N_PSYM:
4030  // parameter: name,,NO_SECT,type,offset
4031  type = eSymbolTypeVariable;
4032  break;
4033 
4034  case N_ENTRY:
4035  // alternate entry: name,,n_sect,linenumber,address
4036  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4037  type = eSymbolTypeLineEntry;
4038  break;
4039 
4040  // Left and Right Braces
4041  case N_LBRAC:
4042  // left bracket: 0,,NO_SECT,nesting level,address We use the
4043  // current number of symbols in the symbol table in lieu of using
4044  // nlist_idx in case we ever start trimming entries out
4045  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4046  N_BRAC_indexes.push_back(sym_idx);
4047  type = eSymbolTypeScopeBegin;
4048  break;
4049 
4050  case N_RBRAC:
4051  // right bracket: 0,,NO_SECT,nesting level,address Set the size of
4052  // the N_LBRAC to the terminating index of this N_RBRAC so that we
4053  // can always skip the entire symbol if we need to navigate more
4054  // quickly at the source level when parsing STABS
4055  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4056  if (!N_BRAC_indexes.empty()) {
4057  symbol_ptr = symtab.SymbolAtIndex(N_BRAC_indexes.back());
4058  symbol_ptr->SetByteSize(sym_idx + 1);
4059  symbol_ptr->SetSizeIsSibling(true);
4060  N_BRAC_indexes.pop_back();
4061  }
4062  type = eSymbolTypeScopeEnd;
4063  break;
4064 
4065  case N_EXCL:
4066  // deleted include file: name,,NO_SECT,0,sum
4067  type = eSymbolTypeHeaderFile;
4068  break;
4069 
4070  // COMM scopes
4071  case N_BCOMM:
4072  // begin common: name,,NO_SECT,0,0
4073  // We use the current number of symbols in the symbol table in lieu
4074  // of using nlist_idx in case we ever start trimming entries out
4075  type = eSymbolTypeScopeBegin;
4076  N_COMM_indexes.push_back(sym_idx);
4077  break;
4078 
4079  case N_ECOML:
4080  // end common (local name): 0,,n_sect,0,address
4081  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4082  LLVM_FALLTHROUGH;
4083 
4084  case N_ECOMM:
4085  // end common: name,,n_sect,0,0
4086  // Set the size of the N_BCOMM to the terminating index of this
4087  // N_ECOMM/N_ECOML so that we can always skip the entire symbol if
4088  // we need to navigate more quickly at the source level when
4089  // parsing STABS
4090  if (!N_COMM_indexes.empty()) {
4091  symbol_ptr = symtab.SymbolAtIndex(N_COMM_indexes.back());
4092  symbol_ptr->SetByteSize(sym_idx + 1);
4093  symbol_ptr->SetSizeIsSibling(true);
4094  N_COMM_indexes.pop_back();
4095  }
4096  type = eSymbolTypeScopeEnd;
4097  break;
4098 
4099  case N_LENG:
4100  // second stab entry with length information
4101  type = eSymbolTypeAdditional;
4102  break;
4103 
4104  default:
4105  break;
4106  }
4107  } else {
4108  uint8_t n_type = N_TYPE & nlist.n_type;
4109  sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
4110 
4111  switch (n_type) {
4112  case N_INDR: {
4113  const char *reexport_name_cstr = strtab_data.PeekCStr(nlist.n_value);
4114  if (reexport_name_cstr && reexport_name_cstr[0]) {
4115  type = eSymbolTypeReExported;
4116  ConstString reexport_name(reexport_name_cstr +
4117  ((reexport_name_cstr[0] == '_') ? 1 : 0));
4118  sym[sym_idx].SetReExportedSymbolName(reexport_name);
4119  set_value = false;
4120  reexport_shlib_needs_fixup[sym_idx] = reexport_name;
4121  indirect_symbol_names.insert(
4122  ConstString(symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
4123  } else
4124  type = eSymbolTypeUndefined;
4125  } break;
4126 
4127  case N_UNDF:
4128  if (symbol_name && symbol_name[0]) {
4129  ConstString undefined_name(symbol_name +
4130  ((symbol_name[0] == '_') ? 1 : 0));
4131  undefined_name_to_desc[undefined_name] = nlist.n_desc;
4132  }
4133  LLVM_FALLTHROUGH;
4134 
4135  case N_PBUD:
4136  type = eSymbolTypeUndefined;
4137  break;
4138 
4139  case N_ABS:
4140  type = eSymbolTypeAbsolute;
4141  break;
4142 
4143  case N_SECT: {
4144  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4145 
4146  if (!symbol_section) {
4147  // TODO: warn about this?
4148  add_nlist = false;
4149  break;
4150  }
4151 
4152  if (TEXT_eh_frame_sectID == nlist.n_sect) {
4153  type = eSymbolTypeException;
4154  } else {
4155  uint32_t section_type = symbol_section->Get() & SECTION_TYPE;
4156 
4157  switch (section_type) {
4158  case S_CSTRING_LITERALS:
4159  type = eSymbolTypeData;
4160  break; // section with only literal C strings
4161  case S_4BYTE_LITERALS:
4162  type = eSymbolTypeData;
4163  break; // section with only 4 byte literals
4164  case S_8BYTE_LITERALS:
4165  type = eSymbolTypeData;
4166  break; // section with only 8 byte literals
4167  case S_LITERAL_POINTERS:
4168  type = eSymbolTypeTrampoline;
4169  break; // section with only pointers to literals
4170  case S_NON_LAZY_SYMBOL_POINTERS:
4171  type = eSymbolTypeTrampoline;
4172  break; // section with only non-lazy symbol pointers
4173  case S_LAZY_SYMBOL_POINTERS:
4174  type = eSymbolTypeTrampoline;
4175  break; // section with only lazy symbol pointers
4176  case S_SYMBOL_STUBS:
4177  type = eSymbolTypeTrampoline;
4178  break; // section with only symbol stubs, byte size of stub in
4179  // the reserved2 field
4180  case S_MOD_INIT_FUNC_POINTERS:
4181  type = eSymbolTypeCode;
4182  break; // section with only function pointers for initialization
4183  case S_MOD_TERM_FUNC_POINTERS:
4184  type = eSymbolTypeCode;
4185  break; // section with only function pointers for termination
4186  case S_INTERPOSING:
4187  type = eSymbolTypeTrampoline;
4188  break; // section with only pairs of function pointers for
4189  // interposing
4190  case S_16BYTE_LITERALS:
4191  type = eSymbolTypeData;
4192  break; // section with only 16 byte literals
4193  case S_DTRACE_DOF:
4195  break;
4196  case S_LAZY_DYLIB_SYMBOL_POINTERS:
4197  type = eSymbolTypeTrampoline;
4198  break;
4199  default:
4200  switch (symbol_section->GetType()) {
4202  type = eSymbolTypeCode;
4203  break;
4204  case eSectionTypeData:
4205  case eSectionTypeDataCString: // Inlined C string data
4206  case eSectionTypeDataCStringPointers: // Pointers to C string
4207  // data
4208  case eSectionTypeDataSymbolAddress: // Address of a symbol in
4209  // the symbol table
4210  case eSectionTypeData4:
4211  case eSectionTypeData8:
4212  case eSectionTypeData16:
4213  type = eSymbolTypeData;
4214  break;
4215  default:
4216  break;
4217  }
4218  break;
4219  }
4220 
4221  if (type == eSymbolTypeInvalid) {
4222  const char *symbol_sect_name =
4223  symbol_section->GetName().AsCString();
4224  if (symbol_section->IsDescendant(text_section_sp.get())) {
4225  if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS |
4226  S_ATTR_SELF_MODIFYING_CODE |
4227  S_ATTR_SOME_INSTRUCTIONS))
4228  type = eSymbolTypeData;
4229  else
4230  type = eSymbolTypeCode;
4231  } else if (symbol_section->IsDescendant(data_section_sp.get()) ||
4232  symbol_section->IsDescendant(
4233  data_dirty_section_sp.get()) ||
4234  symbol_section->IsDescendant(
4235  data_const_section_sp.get())) {
4236  if (symbol_sect_name &&
4237  ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) {
4238  type = eSymbolTypeRuntime;
4239 
4240  if (symbol_name) {
4241  llvm::StringRef symbol_name_ref(symbol_name);
4242  if (symbol_name_ref.startswith("_OBJC_")) {
4243  llvm::StringRef g_objc_v2_prefix_class(
4244  "_OBJC_CLASS_$_");
4245  llvm::StringRef g_objc_v2_prefix_metaclass(
4246  "_OBJC_METACLASS_$_");
4247  llvm::StringRef g_objc_v2_prefix_ivar(
4248  "_OBJC_IVAR_$_");
4249  if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
4250  symbol_name_non_abi_mangled = symbol_name + 1;
4251  symbol_name =
4252  symbol_name + g_objc_v2_prefix_class.size();
4253  type = eSymbolTypeObjCClass;
4254  demangled_is_synthesized = true;
4255  } else if (symbol_name_ref.startswith(
4256  g_objc_v2_prefix_metaclass)) {
4257  symbol_name_non_abi_mangled = symbol_name + 1;
4258  symbol_name =
4259  symbol_name + g_objc_v2_prefix_metaclass.size();
4260  type = eSymbolTypeObjCMetaClass;
4261  demangled_is_synthesized = true;
4262  } else if (symbol_name_ref.startswith(
4263  g_objc_v2_prefix_ivar)) {
4264  symbol_name_non_abi_mangled = symbol_name + 1;
4265  symbol_name =
4266  symbol_name + g_objc_v2_prefix_ivar.size();
4267  type = eSymbolTypeObjCIVar;
4268  demangled_is_synthesized = true;
4269  }
4270  }
4271  }
4272  } else if (symbol_sect_name &&
4273  ::strstr(symbol_sect_name, "__gcc_except_tab") ==
4274  symbol_sect_name) {
4275  type = eSymbolTypeException;
4276  } else {
4277  type = eSymbolTypeData;
4278  }
4279  } else if (symbol_sect_name &&
4280  ::strstr(symbol_sect_name, "__IMPORT") ==
4281  symbol_sect_name) {
4282  type = eSymbolTypeTrampoline;
4283  } else if (symbol_section->IsDescendant(objc_section_sp.get())) {
4284  type = eSymbolTypeRuntime;
4285  if (symbol_name && symbol_name[0] == '.') {
4286  llvm::StringRef symbol_name_ref(symbol_name);
4287  llvm::StringRef g_objc_v1_prefix_class(
4288  ".objc_class_name_");
4289  if (symbol_name_ref.startswith(g_objc_v1_prefix_class)) {
4290  symbol_name_non_abi_mangled = symbol_name;
4291  symbol_name = symbol_name + g_objc_v1_prefix_class.size();
4292  type = eSymbolTypeObjCClass;
4293  demangled_is_synthesized = true;
4294  }
4295  }
4296  }
4297  }
4298  }
4299  } break;
4300  }
4301  }
4302 
4303  if (!add_nlist) {
4304  sym[sym_idx].Clear();
4305  return true;
4306  }
4307 
4308  uint64_t symbol_value = nlist.n_value;
4309 
4310  if (symbol_name_non_abi_mangled) {
4311  sym[sym_idx].GetMangled().SetMangledName(
4312  ConstString(symbol_name_non_abi_mangled));
4313  sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name));
4314  } else {
4315  bool symbol_name_is_mangled = false;
4316 
4317  if (symbol_name && symbol_name[0] == '_') {
4318  symbol_name_is_mangled = symbol_name[1] == '_';
4319  symbol_name++; // Skip the leading underscore
4320  }
4321 
4322  if (symbol_name) {
4323  ConstString const_symbol_name(symbol_name);
4324  sym[sym_idx].GetMangled().SetValue(const_symbol_name,
4325  symbol_name_is_mangled);
4326  }
4327  }
4328 
4329  if (is_gsym) {
4330  const char *gsym_name = sym[sym_idx]
4331  .GetMangled()
4332  .GetName(Mangled::ePreferMangled)
4333  .GetCString();
4334  if (gsym_name)
4335  N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
4336  }
4337 
4338  if (symbol_section) {
4339  const addr_t section_file_addr = symbol_section->GetFileAddress();
4340  if (symbol_byte_size == 0 && function_starts_count > 0) {
4341  addr_t symbol_lookup_file_addr = nlist.n_value;
4342  // Do an exact address match for non-ARM addresses, else get the
4343  // closest since the symbol might be a thumb symbol which has an
4344  // address with bit zero set.
4345  FunctionStarts::Entry *func_start_entry =
4346  function_starts.FindEntry(symbol_lookup_file_addr, !is_arm);
4347  if (is_arm && func_start_entry) {
4348  // Verify that the function start address is the symbol address
4349  // (ARM) or the symbol address + 1 (thumb).
4350  if (func_start_entry->addr != symbol_lookup_file_addr &&
4351  func_start_entry->addr != (symbol_lookup_file_addr + 1)) {
4352  // Not the right entry, NULL it out...
4353  func_start_entry = nullptr;
4354  }
4355  }
4356  if (func_start_entry) {
4357  func_start_entry->data = true;
4358 
4359  addr_t symbol_file_addr = func_start_entry->addr;
4360  if (is_arm)
4361  symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4362 
4363  const FunctionStarts::Entry *next_func_start_entry =
4364  function_starts.FindNextEntry(func_start_entry);
4365  const addr_t section_end_file_addr =
4366  section_file_addr + symbol_section->GetByteSize();
4367  if (next_func_start_entry) {
4368  addr_t next_symbol_file_addr = next_func_start_entry->addr;
4369  // Be sure the clear the Thumb address bit when we calculate the
4370  // size from the current and next address
4371  if (is_arm)
4372  next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4373  symbol_byte_size = std::min<lldb::addr_t>(
4374  next_symbol_file_addr - symbol_file_addr,
4375  section_end_file_addr - symbol_file_addr);
4376  } else {
4377  symbol_byte_size = section_end_file_addr - symbol_file_addr;
4378  }
4379  }
4380  }
4381  symbol_value -= section_file_addr;
4382  }
4383 
4384  if (!is_debug) {
4385  if (type == eSymbolTypeCode) {
4386  // See if we can find a N_FUN entry for any code symbols. If we do
4387  // find a match, and the name matches, then we can merge the two into
4388  // just the function symbol to avoid duplicate entries in the symbol
4389  // table.
4390  std::pair<ValueToSymbolIndexMap::const_iterator,
4391  ValueToSymbolIndexMap::const_iterator>
4392  range;
4393  range = N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
4394  if (range.first != range.second) {
4395  for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4396  pos != range.second; ++pos) {
4397  if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) ==
4398  sym[pos->second].GetMangled().GetName(
4399  Mangled::ePreferMangled)) {
4400  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4401  // We just need the flags from the linker symbol, so put these
4402  // flags into the N_FUN flags to avoid duplicate symbols in the
4403  // symbol table.
4404  sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4405  sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4406  if (resolver_addresses.find(nlist.n_value) !=
4407  resolver_addresses.end())
4408  sym[pos->second].SetType(eSymbolTypeResolver);
4409  sym[sym_idx].Clear();
4410  return true;
4411  }
4412  }
4413  } else {
4414  if (resolver_addresses.find(nlist.n_value) !=
4415  resolver_addresses.end())
4416  type = eSymbolTypeResolver;
4417  }
4418  } else if (type == eSymbolTypeData || type == eSymbolTypeObjCClass ||
4419  type == eSymbolTypeObjCMetaClass ||
4420  type == eSymbolTypeObjCIVar) {
4421  // See if we can find a N_STSYM entry for any data symbols. If we do
4422  // find a match, and the name matches, then we can merge the two into
4423  // just the Static symbol to avoid duplicate entries in the symbol
4424  // table.
4425  std::pair<ValueToSymbolIndexMap::const_iterator,
4426  ValueToSymbolIndexMap::const_iterator>
4427  range;
4428  range = N_STSYM_addr_to_sym_idx.equal_range(nlist.n_value);
4429  if (range.first != range.second) {
4430  for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4431  pos != range.second; ++pos) {
4432  if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) ==
4433  sym[pos->second].GetMangled().GetName(
4434  Mangled::ePreferMangled)) {
4435  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4436  // We just need the flags from the linker symbol, so put these
4437  // flags into the N_STSYM flags to avoid duplicate symbols in
4438  // the symbol table.
4439  sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4440  sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4441  sym[sym_idx].Clear();
4442  return true;
4443  }
4444  }
4445  } else {
4446  // Combine N_GSYM stab entries with the non stab symbol.
4447  const char *gsym_name = sym[sym_idx]
4448  .GetMangled()
4449  .GetName(Mangled::ePreferMangled)
4450  .GetCString();
4451  if (gsym_name) {
4452  ConstNameToSymbolIndexMap::const_iterator pos =
4453  N_GSYM_name_to_sym_idx.find(gsym_name);
4454  if (pos != N_GSYM_name_to_sym_idx.end()) {
4455  const uint32_t GSYM_sym_idx = pos->second;
4456  m_nlist_idx_to_sym_idx[nlist_idx] = GSYM_sym_idx;
4457  // Copy the address, because often the N_GSYM address has an
4458  // invalid address of zero when the global is a common symbol.
4459  sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section);
4460  sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value);
4461  add_symbol_addr(
4462  sym[GSYM_sym_idx].GetAddress().GetFileAddress());
4463  // We just need the flags from the linker symbol, so put these
4464  // flags into the N_GSYM flags to avoid duplicate symbols in
4465  // the symbol table.
4466  sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4467  sym[sym_idx].Clear();
4468  return true;
4469  }
4470  }
4471  }
4472  }
4473  }
4474 
4475  sym[sym_idx].SetID(nlist_idx);
4476  sym[sym_idx].SetType(type);
4477  if (set_value) {
4478  sym[sym_idx].GetAddressRef().SetSection(symbol_section);
4479  sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
4480  if (symbol_section)
4481  add_symbol_addr(sym[sym_idx].GetAddress().GetFileAddress());
4482  }
4483  sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4484  if (nlist.n_desc & N_WEAK_REF)
4485  sym[sym_idx].SetIsWeak(true);
4486 
4487  if (symbol_byte_size > 0)
4488  sym[sym_idx].SetByteSize(symbol_byte_size);
4489 
4490  if (demangled_is_synthesized)
4491  sym[sym_idx].SetDemangledNameIsSynthesized(true);
4492 
4493  ++sym_idx;
4494  return true;
4495  };
4496 
4497  // First parse all the nlists but don't process them yet. See the next
4498  // comment for an explanation why.
4499  std::vector<struct nlist_64> nlists;
4500  nlists.reserve(symtab_load_command.nsyms);
4501  for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) {
4502  if (auto nlist =
4503  ParseNList(nlist_data, nlist_data_offset, nlist_byte_size))
4504  nlists.push_back(*nlist);
4505  else
4506  break;
4507  }
4508 
4509  // Now parse all the debug symbols. This is needed to merge non-debug
4510  // symbols in the next step. Non-debug symbols are always coalesced into
4511  // the debug symbol. Doing this in one step would mean that some symbols
4512  // won't be merged.
4513  nlist_idx = 0;
4514  for (auto &nlist : nlists) {
4515  if (!ParseSymbolLambda(nlist, nlist_idx++, DebugSymbols))
4516  break;
4517  }
4518 
4519  // Finally parse all the non debug symbols.
4520  nlist_idx = 0;
4521  for (auto &nlist : nlists) {
4522  if (!ParseSymbolLambda(nlist, nlist_idx++, NonDebugSymbols))
4523  break;
4524  }
4525 
4526  for (const auto &pos : reexport_shlib_needs_fixup) {
4527  const auto undef_pos = undefined_name_to_desc.find(pos.second);
4528  if (undef_pos != undefined_name_to_desc.end()) {
4529  const uint8_t dylib_ordinal =
4530  llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
4531  if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
4532  sym[pos.first].SetReExportedSymbolSharedLibrary(
4533  dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
4534  }
4535  }
4536  }
4537 
4538  // Count how many trie symbols we'll add to the symbol table
4539  int trie_symbol_table_augment_count = 0;
4540  for (auto &e : external_sym_trie_entries) {
4541  if (symbols_added.find(e.entry.address) == symbols_added.end())
4542  trie_symbol_table_augment_count++;
4543  }
4544 
4545  if (num_syms < sym_idx + trie_symbol_table_augment_count) {
4546  num_syms = sym_idx + trie_symbol_table_augment_count;
4547  sym = symtab.Resize(num_syms);
4548  }
4549  uint32_t synthetic_sym_id = symtab_load_command.nsyms;
4550 
4551  // Add symbols from the trie to the symbol table.
4552  for (auto &e : external_sym_trie_entries) {
4553  if (symbols_added.contains(e.entry.address))
4554  continue;
4555 
4556  // Find the section that this trie address is in, use that to annotate
4557  // symbol type as we add the trie address and name to the symbol table.
4558  Address symbol_addr;
4559  if (module_sp->ResolveFileAddress(e.entry.address, symbol_addr)) {
4560  SectionSP symbol_section(symbol_addr.GetSection());
4561  const char *symbol_name = e.entry.name.GetCString();
4562  bool demangled_is_synthesized = false;
4563  SymbolType type =
4564  GetSymbolType(symbol_name, demangled_is_synthesized, text_section_sp,
4565  data_section_sp, data_dirty_section_sp,
4566  data_const_section_sp, symbol_section);
4567 
4568  sym[sym_idx].SetType(type);
4569  if (symbol_section) {
4570  sym[sym_idx].SetID(synthetic_sym_id++);
4571  sym[sym_idx].GetMangled().SetMangledName(ConstString(symbol_name));
4572  if (demangled_is_synthesized)
4573  sym[sym_idx].SetDemangledNameIsSynthesized(true);
4574  sym[sym_idx].SetIsSynthetic(true);
4575  sym[sym_idx].SetExternal(true);
4576  sym[sym_idx].GetAddressRef() = symbol_addr;
4577  add_symbol_addr(symbol_addr.GetFileAddress());
4578  if (e.entry.flags & TRIE_SYMBOL_IS_THUMB)
4580  ++sym_idx;
4581  }
4582  }
4583  }
4584 
4585  if (function_starts_count > 0) {
4586  uint32_t num_synthetic_function_symbols = 0;
4587  for (i = 0; i < function_starts_count; ++i) {
4588  if (symbols_added.find(function_starts.GetEntryRef(i).addr) ==
4589  symbols_added.end())
4590  ++num_synthetic_function_symbols;
4591  }
4592 
4593  if (num_synthetic_function_symbols > 0) {
4594  if (num_syms < sym_idx + num_synthetic_function_symbols) {
4595  num_syms = sym_idx + num_synthetic_function_symbols;
4596  sym = symtab.Resize(num_syms);
4597  }
4598  for (i = 0; i < function_starts_count; ++i) {
4599  const FunctionStarts::Entry *func_start_entry =
4600  function_starts.GetEntryAtIndex(i);
4601  if (symbols_added.find(func_start_entry->addr) == symbols_added.end()) {
4602  addr_t symbol_file_addr = func_start_entry->addr;
4603  uint32_t symbol_flags = 0;
4604  if (func_start_entry->data)
4605  symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
4606  Address symbol_addr;
4607  if (module_sp->ResolveFileAddress(symbol_file_addr, symbol_addr)) {
4608  SectionSP symbol_section(symbol_addr.GetSection());
4609  uint32_t symbol_byte_size = 0;
4610  if (symbol_section) {
4611  const addr_t section_file_addr = symbol_section->GetFileAddress();
4612  const FunctionStarts::Entry *next_func_start_entry =
4613  function_starts.FindNextEntry(func_start_entry);
4614  const addr_t section_end_file_addr =
4615  section_file_addr + symbol_section->GetByteSize();
4616  if (next_func_start_entry) {
4617  addr_t next_symbol_file_addr = next_func_start_entry->addr;
4618  if (is_arm)
4619  next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4620  symbol_byte_size = std::min<lldb::addr_t>(
4621  next_symbol_file_addr - symbol_file_addr,
4622  section_end_file_addr - symbol_file_addr);
4623  } else {
4624  symbol_byte_size = section_end_file_addr - symbol_file_addr;
4625  }
4626  sym[sym_idx].SetID(synthetic_sym_id++);
4627  // Don't set the name for any synthetic symbols, the Symbol
4628  // object will generate one if needed when the name is accessed
4629  // via accessors.
4630  sym[sym_idx].GetMangled().SetDemangledName(ConstString());
4631  sym[sym_idx].SetType(eSymbolTypeCode);
4632  sym[sym_idx].SetIsSynthetic(true);
4633  sym[sym_idx].GetAddressRef() = symbol_addr;
4634  add_symbol_addr(symbol_addr.GetFileAddress());
4635  if (symbol_flags)
4636  sym[sym_idx].SetFlags(symbol_flags);
4637  if (symbol_byte_size)
4638  sym[sym_idx].SetByteSize(symbol_byte_size);
4639  ++sym_idx;
4640  }
4641  }
4642  }
4643  }
4644  }
4645  }
4646 
4647  // Trim our symbols down to just what we ended up with after removing any
4648  // symbols.
4649  if (sym_idx < num_syms) {
4650  num_syms = sym_idx;
4651  sym = symtab.Resize(num_syms);
4652  }
4653 
4654  // Now synthesize indirect symbols
4655  if (m_dysymtab.nindirectsyms != 0) {
4656  if (indirect_symbol_index_data.GetByteSize()) {
4657  NListIndexToSymbolIndexMap::const_iterator end_index_pos =
4658  m_nlist_idx_to_sym_idx.end();
4659 
4660  for (uint32_t sect_idx = 1; sect_idx < m_mach_sections.size();
4661  ++sect_idx) {
4662  if ((m_mach_sections[sect_idx].flags & SECTION_TYPE) ==
4663  S_SYMBOL_STUBS) {
4664  uint32_t symbol_stub_byte_size = m_mach_sections[sect_idx].reserved2;
4665  if (symbol_stub_byte_size == 0)
4666  continue;
4667 
4668  const uint32_t num_symbol_stubs =
4669  m_mach_sections[sect_idx].size / symbol_stub_byte_size;
4670 
4671  if (num_symbol_stubs == 0)
4672  continue;
4673 
4674  const uint32_t symbol_stub_index_offset =
4675  m_mach_sections[sect_idx].reserved1;
4676  for (uint32_t stub_idx = 0; stub_idx < num_symbol_stubs; ++stub_idx) {
4677  const uint32_t symbol_stub_index =
4678  symbol_stub_index_offset + stub_idx;
4679  const lldb::addr_t symbol_stub_addr =
4680  m_mach_sections[sect_idx].addr +
4681  (stub_idx * symbol_stub_byte_size);
4682  lldb::offset_t symbol_stub_offset = symbol_stub_index * 4;
4683  if (indirect_symbol_index_data.ValidOffsetForDataOfSize(
4684  symbol_stub_offset, 4)) {
4685  const uint32_t stub_sym_id =
4686  indirect_symbol_index_data.GetU32(&symbol_stub_offset);
4687  if (stub_sym_id & (INDIRECT_SYMBOL_ABS | INDIRECT_SYMBOL_LOCAL))
4688  continue;
4689 
4690  NListIndexToSymbolIndexMap::const_iterator index_pos =
4691  m_nlist_idx_to_sym_idx.find(stub_sym_id);
4692  Symbol *stub_symbol = nullptr;
4693  if (index_pos != end_index_pos) {
4694  // We have a remapping from the original nlist index to a
4695  // current symbol index, so just look this up by index
4696  stub_symbol = symtab.SymbolAtIndex(index_pos->second);
4697  } else {
4698  // We need to lookup a symbol using the original nlist symbol
4699  // index since this index is coming from the S_SYMBOL_STUBS
4700  stub_symbol = symtab.FindSymbolByID(stub_sym_id);
4701  }
4702 
4703  if (stub_symbol) {
4704  Address so_addr(symbol_stub_addr, section_list);
4705 
4706  if (stub_symbol->GetType() == eSymbolTypeUndefined) {
4707  // Change the external symbol into a trampoline that makes
4708  // sense These symbols were N_UNDF N_EXT, and are useless
4709  // to us, so we can re-use them so we don't have to make up
4710  // a synthetic symbol for no good reason.
4711  if (resolver_addresses.find(symbol_stub_addr) ==
4712  resolver_addresses.end())
4713  stub_symbol->SetType(eSymbolTypeTrampoline);
4714  else
4715  stub_symbol->SetType(eSymbolTypeResolver);
4716  stub_symbol->SetExternal(false);
4717  stub_symbol->GetAddressRef() = so_addr;
4718  stub_symbol->SetByteSize(symbol_stub_byte_size);
4719  } else {
4720  // Make a synthetic symbol to describe the trampoline stub
4721  Mangled stub_symbol_mangled_name(stub_symbol->GetMangled());
4722  if (sym_idx >= num_syms) {
4723  sym = symtab.Resize(++num_syms);
4724  stub_symbol = nullptr; // this pointer no longer valid
4725  }
4726  sym[sym_idx].SetID(synthetic_sym_id++);
4727  sym[sym_idx].GetMangled() = stub_symbol_mangled_name;
4728  if (resolver_addresses.find(symbol_stub_addr) ==
4729  resolver_addresses.end())
4730  sym[sym_idx].SetType(eSymbolTypeTrampoline);
4731  else
4732  sym[sym_idx].SetType(eSymbolTypeResolver);
4733  sym[sym_idx].SetIsSynthetic(true);
4734  sym[sym_idx].GetAddressRef() = so_addr;
4735  add_symbol_addr(so_addr.GetFileAddress());
4736  sym[sym_idx].SetByteSize(symbol_stub_byte_size);
4737  ++sym_idx;
4738  }
4739  } else {
4740  if (log)
4741  log->Warning("symbol stub referencing symbol table symbol "
4742  "%u that isn't in our minimal symbol table, "
4743  "fix this!!!",
4744  stub_sym_id);
4745  }
4746  }
4747  }
4748  }
4749  }
4750  }
4751  }
4752 
4753  if (!reexport_trie_entries.empty()) {
4754  for (const auto &e : reexport_trie_entries) {
4755  if (e.entry.import_name) {
4756  // Only add indirect symbols from the Trie entries if we didn't have
4757  // a N_INDR nlist entry for this already
4758  if (indirect_symbol_names.find(e.entry.name) ==
4759  indirect_symbol_names.end()) {
4760  // Make a synthetic symbol to describe re-exported symbol.
4761  if (sym_idx >= num_syms)
4762  sym = symtab.Resize(++num_syms);
4763  sym[sym_idx].SetID(synthetic_sym_id++);
4764  sym[sym_idx].GetMangled() = Mangled(e.entry.name);
4765  sym[sym_idx].SetType(eSymbolTypeReExported);
4766  sym[sym_idx].SetIsSynthetic(true);
4767  sym[sym_idx].SetReExportedSymbolName(e.entry.import_name);
4768  if (e.entry.other > 0 && e.entry.other <= dylib_files.GetSize()) {
4769  sym[sym_idx].SetReExportedSymbolSharedLibrary(
4770  dylib_files.GetFileSpecAtIndex(e.entry.other - 1));
4771  }
4772  ++sym_idx;
4773  }
4774  }
4775  }
4776  }
4777 }
4778 
4780  ModuleSP module_sp(GetModule());
4781  if (module_sp) {
4782  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
4783  s->Printf("%p: ", static_cast<void *>(this));
4784  s->Indent();
4785  if (m_header.magic == MH_MAGIC_64 || m_header.magic == MH_CIGAM_64)
4786  s->PutCString("ObjectFileMachO64");
4787  else
4788  s->PutCString("ObjectFileMachO32");
4789 
4790  *s << ", file = '" << m_file;
4791  ModuleSpecList all_specs;
4792  ModuleSpec base_spec;
4794  base_spec, all_specs);
4795  for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
4796  *s << "', triple";
4797  if (e)
4798  s->Printf("[%d]", i);
4799  *s << " = ";
4800  *s << all_specs.GetModuleSpecRefAtIndex(i)
4801  .GetArchitecture()
4802  .GetTriple()
4803  .getTriple();
4804  }
4805  *s << "\n";
4806  SectionList *sections = GetSectionList();
4807  if (sections)
4808  sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
4809  UINT32_MAX);
4810 
4811  if (m_symtab_up)
4812  m_symtab_up->Dump(s, nullptr, eSortOrderNone);
4813  }
4814 }
4815 
4816 UUID ObjectFileMachO::GetUUID(const llvm::MachO::mach_header &header,
4817  const lldb_private::DataExtractor &data,
4818  lldb::offset_t lc_offset) {
4819  uint32_t i;
4820  llvm::MachO::uuid_command load_cmd;
4821 
4822  lldb::offset_t offset = lc_offset;
4823  for (i = 0; i < header.ncmds; ++i) {
4824  const lldb::offset_t cmd_offset = offset;
4825  if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
4826  break;
4827 
4828  if (load_cmd.cmd == LC_UUID) {
4829  const uint8_t *uuid_bytes = data.PeekData(offset, 16);
4830 
4831  if (uuid_bytes) {
4832  // OpenCL on Mac OS X uses the same UUID for each of its object files.
4833  // We pretend these object files have no UUID to prevent crashing.
4834 
4835  const uint8_t opencl_uuid[] = {0x8c, 0x8e, 0xb3, 0x9b, 0x3b, 0xa8,
4836  0x4b, 0x16, 0xb6, 0xa4, 0x27, 0x63,
4837  0xbb, 0x14, 0xf0, 0x0d};
4838 
4839  if (!memcmp(uuid_bytes, opencl_uuid, 16))
4840  return UUID();
4841 
4842  return UUID::fromOptionalData(uuid_bytes, 16);
4843  }
4844  return UUID();
4845  }
4846  offset = cmd_offset + load_cmd.cmdsize;
4847  }
4848  return UUID();
4849 }
4850 
4851 static llvm::StringRef GetOSName(uint32_t cmd) {
4852  switch (cmd) {
4853  case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
4854  return llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4855  case llvm::MachO::LC_VERSION_MIN_MACOSX:
4856  return llvm::Triple::getOSTypeName(llvm::Triple::MacOSX);
4857  case llvm::MachO::LC_VERSION_MIN_TVOS:
4858  return llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4859  case llvm::MachO::LC_VERSION_MIN_WATCHOS:
4860  return llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4861  default:
4862  llvm_unreachable("unexpected LC_VERSION load command");
4863  }
4864 }
4865 
4866 namespace {
4867 struct OSEnv {
4868  llvm::StringRef os_type;
4869  llvm::StringRef environment;
4870  OSEnv(uint32_t cmd) {
4871  switch (cmd) {
4872  case llvm::MachO::PLATFORM_MACOS:
4873  os_type = llvm::Triple::getOSTypeName(llvm::Triple::MacOSX);
4874  return;
4875  case llvm::MachO::PLATFORM_IOS:
4876  os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4877  return;
4878  case llvm::MachO::PLATFORM_TVOS:
4879  os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4880  return;
4881  case llvm::MachO::PLATFORM_WATCHOS:
4882  os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4883  return;
4884  // TODO: add BridgeOS & DriverKit once in llvm/lib/Support/Triple.cpp
4885  // NEED_BRIDGEOS_TRIPLE
4886  // case llvm::MachO::PLATFORM_BRIDGEOS:
4887  // os_type = llvm::Triple::getOSTypeName(llvm::Triple::BridgeOS);
4888  // return;
4889  // case llvm::MachO::PLATFORM_DRIVERKIT:
4890  // os_type = llvm::Triple::getOSTypeName(llvm::Triple::DriverKit);
4891  // return;
4892  case llvm::MachO::PLATFORM_MACCATALYST:
4893  os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4894  environment = llvm::Triple::getEnvironmentTypeName(llvm::Triple::MacABI);
4895  return;
4896  case llvm::MachO::PLATFORM_IOSSIMULATOR:
4897  os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4898  environment =
4899  llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4900  return;
4901  case llvm::MachO::PLATFORM_TVOSSIMULATOR:
4902  os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4903  environment =
4904  llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4905  return;
4906  case llvm::MachO::PLATFORM_WATCHOSSIMULATOR:
4907  os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4908  environment =
4909  llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4910  return;
4911  default: {
4912  Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process));
4913  LLDB_LOGF(log, "unsupported platform in LC_BUILD_VERSION");
4914  }
4915  }
4916  }
4917 };
4918 
4919 struct MinOS {
4920  uint32_t major_version, minor_version, patch_version;
4921  MinOS(uint32_t version)
4922  : major_version(version >> 16), minor_version((version >> 8) & 0xffu),
4923  patch_version(version & 0xffu) {}
4924 };
4925 } // namespace
4926 
4927 void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header,
4928  const lldb_private::DataExtractor &data,
4929  lldb::offset_t lc_offset,
4930  ModuleSpec &base_spec,
4931  lldb_private::ModuleSpecList &all_specs) {
4932  auto &base_arch = base_spec.GetArchitecture();
4933  base_arch.SetArchitecture(eArchTypeMachO, header.cputype, header.cpusubtype);
4934  if (!base_arch.IsValid())
4935  return;
4936 
4937  bool found_any = false;
4938  auto add_triple = [&](const llvm::Triple &triple) {
4939  auto spec = base_spec;
4940  spec.GetArchitecture().GetTriple() = triple;
4941  if (spec.GetArchitecture().IsValid()) {
4942  spec.GetUUID() = ObjectFileMachO::GetUUID(header, data, lc_offset);
4943  all_specs.Append(spec);
4944  found_any = true;
4945  }
4946  };
4947 
4948  // Set OS to an unspecified unknown or a "*" so it can match any OS
4949  llvm::Triple base_triple = base_arch.GetTriple();
4950  base_triple.setOS(llvm::Triple::UnknownOS);
4951  base_triple.setOSName(llvm::StringRef());
4952 
4953  if (header.filetype == MH_PRELOAD) {
4954  if (header.cputype == CPU_TYPE_ARM) {
4955  // If this is a 32-bit arm binary, and it's a standalone binary, force
4956  // the Vendor to Apple so we don't accidentally pick up the generic
4957  // armv7 ABI at runtime. Apple's armv7 ABI always uses r7 for the
4958  // frame pointer register; most other armv7 ABIs use a combination of
4959  // r7 and r11.
4960  base_triple.setVendor(llvm::Triple::Apple);
4961  } else {
4962  // Set vendor to an unspecified unknown or a "*" so it can match any
4963  // vendor This is required for correct behavior of EFI debugging on
4964  // x86_64
4965  base_triple.setVendor(llvm::Triple::UnknownVendor);
4966  base_triple.setVendorName(llvm::StringRef());
4967  }
4968  return add_triple(base_triple);
4969  }
4970 
4971  llvm::MachO::load_command load_cmd;
4972 
4973  // See if there is an LC_VERSION_MIN_* load command that can give
4974  // us the OS type.
4975  lldb::offset_t offset = lc_offset;
4976  for (uint32_t i = 0; i < header.ncmds; ++i) {
4977  const lldb::offset_t cmd_offset = offset;
4978  if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
4979  break;
4980 
4981  llvm::MachO::version_min_command version_min;
4982  switch (load_cmd.cmd) {
4983  case llvm::MachO::LC_VERSION_MIN_MACOSX:
4984  case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
4985  case llvm::MachO::LC_VERSION_MIN_TVOS:
4986  case llvm::MachO::LC_VERSION_MIN_WATCHOS: {
4987  if (load_cmd.cmdsize != sizeof(version_min))
4988  break;
4989  if (data.ExtractBytes(cmd_offset, sizeof(version_min),
4990  data.GetByteOrder(), &version_min) == 0)
4991  break;
4992  MinOS min_os(version_min.version);
4993  llvm::SmallString<32> os_name;
4994  llvm::raw_svector_ostream os(os_name);
4995  os << GetOSName(load_cmd.cmd) << min_os.major_version << '.'
4996  << min_os.minor_version << '.' << min_os.patch_version;
4997 
4998  auto triple = base_triple;
4999  triple.setOSName(os.str());
5000 
5001  // Disambiguate legacy simulator platforms.
5002  if (load_cmd.cmd != llvm::MachO::LC_VERSION_MIN_MACOSX &&
5003  (base_triple.getArch() == llvm::Triple::x86_64 ||
5004  base_triple.getArch() == llvm::Triple::x86)) {
5005  // The combination of legacy LC_VERSION_MIN load command and
5006  // x86 architecture always indicates a simulator environment.
5007  // The combination of LC_VERSION_MIN and arm architecture only
5008  // appears for native binaries. Back-deploying simulator
5009  // binaries on Apple Silicon Macs use the modern unambigous
5010  // LC_BUILD_VERSION load commands; no special handling required.
5011  triple.setEnvironment(llvm::Triple::Simulator);
5012  }
5013  add_triple(triple);
5014  break;
5015  }
5016  default:
5017  break;
5018  }
5019 
5020  offset = cmd_offset + load_cmd.cmdsize;
5021  }
5022 
5023  // See if there are LC_BUILD_VERSION load commands that can give
5024  // us the OS type.
5025  offset = lc_offset;
5026  for (uint32_t i = 0; i < header.ncmds; ++i) {
5027  const lldb::offset_t cmd_offset = offset;
5028  if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
5029  break;
5030 
5031  do {
5032  if (load_cmd.cmd == llvm::MachO::LC_BUILD_VERSION) {
5033  llvm::MachO::build_version_command build_version;
5034  if (load_cmd.cmdsize < sizeof(build_version)) {
5035  // Malformed load command.
5036  break;
5037  }
5038  if (data.ExtractBytes(cmd_offset, sizeof(build_version),
5039  data.GetByteOrder(), &build_version) == 0)
5040  break;
5041  MinOS min_os(build_version.minos);
5042  OSEnv os_env(build_version.platform);
5043  llvm::SmallString<16> os_name;
5044  llvm::raw_svector_ostream os(os_name);
5045  os << os_env.os_type << min_os.major_version << '.'
5046  << min_os.minor_version << '.' << min_os.patch_version;
5047  auto triple = base_triple;
5048  triple.setOSName(os.str());
5049  os_name.clear();
5050  if (!os_env.environment.empty())
5051  triple.setEnvironmentName(os_env.environment);
5052  add_triple(triple);
5053  }
5054  } while (false);
5055  offset = cmd_offset + load_cmd.cmdsize;
5056  }
5057 
5058  if (!found_any) {
5059  add_triple(base_triple);
5060  }
5061 }
5062 
5064  ModuleSP module_sp, const llvm::MachO::mach_header &header,
5065  const lldb_private::DataExtractor &data, lldb::offset_t lc_offset) {
5066  ModuleSpecList all_specs;
5067  ModuleSpec base_spec;
5068  GetAllArchSpecs(header, data, MachHeaderSizeFromMagic(header.magic),
5069  base_spec, all_specs);
5070 
5071  // If the object file offers multiple alternative load commands,
5072  // pick the one that matches the module.
5073  if (module_sp) {
5074  const ArchSpec &module_arch = module_sp->GetArchitecture();
5075  for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
5076  ArchSpec mach_arch =
5078  if (module_arch.IsCompatibleMatch(mach_arch))
5079  return mach_arch;
5080  }
5081  }
5082 
5083  // Return the first arch we found.
5084  if (all_specs.GetSize() == 0)
5085  return {};
5086  return all_specs.GetModuleSpecRefAtIndex(0).GetArchitecture();
5087 }
5088 
5090  ModuleSP module_sp(GetModule());
5091  if (module_sp) {
5092  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5094  return GetUUID(m_header, m_data, offset);
5095  }
5096  return UUID();
5097 }
5098 
5100  uint32_t count = 0;
5101  ModuleSP module_sp(GetModule());
5102  if (module_sp) {
5103  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5104  llvm::MachO::load_command load_cmd;
5106  std::vector<std::string> rpath_paths;
5107  std::vector<std::string> rpath_relative_paths;
5108  std::vector<std::string> at_exec_relative_paths;
5109  uint32_t i;
5110  for (i = 0; i < m_header.ncmds; ++i) {
5111  const uint32_t cmd_offset = offset;
5112  if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
5113  break;
5114 
5115  switch (load_cmd.cmd) {
5116  case LC_RPATH:
5117  case LC_LOAD_DYLIB:
5118  case LC_LOAD_WEAK_DYLIB:
5119  case LC_REEXPORT_DYLIB:
5120  case LC_LOAD_DYLINKER:
5121  case LC_LOADFVMLIB:
5122  case LC_LOAD_UPWARD_DYLIB: {
5123  uint32_t name_offset = cmd_offset + m_data.GetU32(&offset);
5124  const char *path = m_data.PeekCStr(name_offset);
5125  if (path) {
5126  if (load_cmd.cmd == LC_RPATH)
5127  rpath_paths.push_back(path);
5128  else {
5129  if (path[0] == '@') {
5130  if (strncmp(path, "@rpath", strlen("@rpath")) == 0)
5131  rpath_relative_paths.push_back(path + strlen("@rpath"));
5132  else if (strncmp(path, "@executable_path",
5133  strlen("@executable_path")) == 0)
5134  at_exec_relative_paths.push_back(path +
5135  strlen("@executable_path"));
5136  } else {
5137  FileSpec file_spec(path);
5138  if (files.AppendIfUnique(file_spec))
5139  count++;
5140  }
5141  }
5142  }
5143  } break;
5144 
5145  default:
5146  break;
5147  }
5148  offset = cmd_offset + load_cmd.cmdsize;
5149  }
5150 
5151  FileSpec this_file_spec(m_file);
5152  FileSystem::Instance().Resolve(this_file_spec);
5153 
5154  if (!rpath_paths.empty()) {
5155  // Fixup all LC_RPATH values to be absolute paths
5156  std::string loader_path("@loader_path");
5157  std::string executable_path("@executable_path");
5158  for (auto &rpath : rpath_paths) {
5159  if (llvm::StringRef(rpath).startswith(loader_path)) {
5160  rpath.erase(0, loader_path.size());
5161  rpath.insert(0, this_file_spec.GetDirectory().GetCString());
5162  } else if (llvm::StringRef(rpath).startswith(executable_path)) {
5163  rpath.erase(0, executable_path.size());
5164  rpath.insert(0, this_file_spec.GetDirectory().GetCString());
5165  }
5166  }
5167 
5168  for (const auto &rpath_relative_path : rpath_relative_paths) {
5169  for (const auto &rpath : rpath_paths) {
5170  std::string path = rpath;
5171  path += rpath_relative_path;
5172  // It is OK to resolve this path because we must find a file on disk
5173  // for us to accept it anyway if it is rpath relative.
5174  FileSpec file_spec(path);
5175  FileSystem::Instance().Resolve(file_spec);
5176  if (FileSystem::Instance().Exists(file_spec) &&
5177  files.AppendIfUnique(file_spec)) {
5178  count++;
5179  break;
5180  }
5181  }
5182  }
5183  }
5184 
5185  // We may have @executable_paths but no RPATHS. Figure those out here.
5186  // Only do this if this object file is the executable. We have no way to
5187  // get back to the actual executable otherwise, so we won't get the right
5188  // path.
5189  if (!at_exec_relative_paths.empty() && CalculateType() == eTypeExecutable) {
5190  FileSpec exec_dir = this_file_spec.CopyByRemovingLastPathComponent();
5191  for (const auto &at_exec_relative_path : at_exec_relative_paths) {
5192  FileSpec file_spec =
5193  exec_dir.CopyByAppendingPathComponent(at_exec_relative_path);
5194  if (FileSystem::Instance().Exists(file_spec) &&
5195  files.AppendIfUnique(file_spec))
5196  count++;
5197  }
5198  }
5199  }
5200  return count;
5201 }
5202 
5204  // If the object file is not an executable it can't hold the entry point.
5205  // m_entry_point_address is initialized to an invalid address, so we can just
5206  // return that. If m_entry_point_address is valid it means we've found it
5207  // already, so return the cached value.
5208 
5209  if ((!IsExecutable() && !IsDynamicLoader()) ||
5211  return m_entry_point_address;
5212  }
5213 
5214  // Otherwise, look for the UnixThread or Thread command. The data for the
5215  // Thread command is given in /usr/include/mach-o.h, but it is basically:
5216  //
5217  // uint32_t flavor - this is the flavor argument you would pass to
5218  // thread_get_state
5219  // uint32_t count - this is the count of longs in the thread state data
5220  // struct XXX_thread_state state - this is the structure from
5221  // <machine/thread_status.h> corresponding to the flavor.
5222  // <repeat this trio>
5223  //
5224  // So we just keep reading the various register flavors till we find the GPR
5225  // one, then read the PC out of there.
5226  // FIXME: We will need to have a "RegisterContext data provider" class at some
5227  // point that can get all the registers
5228  // out of data in this form & attach them to a given thread. That should
5229  // underlie the MacOS X User process plugin, and we'll also need it for the
5230  // MacOS X Core File process plugin. When we have that we can also use it
5231  // here.
5232  //
5233  // For now we hard-code the offsets and flavors we need:
5234  //
5235  //
5236 
5237  ModuleSP module_sp(GetModule());
5238  if (module_sp) {
5239  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5240  llvm::MachO::load_command load_cmd;
5242  uint32_t i;
5243  lldb::addr_t start_address = LLDB_INVALID_ADDRESS;
5244  bool done = false;
5245 
5246  for (i = 0; i < m_header.ncmds; ++i) {
5247  const lldb::offset_t cmd_offset = offset;
5248  if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
5249  break;
5250 
5251  switch (load_cmd.cmd) {
5252  case LC_UNIXTHREAD:
5253  case LC_THREAD: {
5254  while (offset < cmd_offset + load_cmd.cmdsize) {
5255  uint32_t flavor = m_data.GetU32(&offset);
5256  uint32_t count = m_data.GetU32(&offset);
5257  if (count == 0) {
5258  // We've gotten off somehow, log and exit;
5259  return m_entry_point_address;
5260  }
5261 
5262  switch (m_header.cputype) {
5263  case llvm::MachO::CPU_TYPE_ARM:
5264  if (flavor == 1 ||
5265  flavor == 9) // ARM_THREAD_STATE/ARM_THREAD_STATE32
5266  // from mach/arm/thread_status.h
5267  {
5268  offset += 60; // This is the offset of pc in the GPR thread state
5269  // data structure.
5270  start_address = m_data.GetU32(&offset);
5271  done = true;
5272  }
5273  break;
5276  if (flavor == 6) // ARM_THREAD_STATE64 from mach/arm/thread_status.h
5277  {
5278  offset += 256; // This is the offset of pc in the GPR thread state
5279  // data structure.
5280  start_address = m_data.GetU64(&offset);
5281  done = true;
5282  }
5283  break;
5284  case llvm::MachO::CPU_TYPE_I386:
5285  if (flavor ==
5286  1) // x86_THREAD_STATE32 from mach/i386/thread_status.h
5287  {
5288  offset += 40; // This is the offset of eip in the GPR thread state
5289  // data structure.
5290  start_address = m_data.GetU32(&offset);
5291  done = true;
5292  }
5293  break;
5294  case llvm::MachO::CPU_TYPE_X86_64:
5295  if (flavor ==
5296  4) // x86_THREAD_STATE64 from mach/i386/thread_status.h
5297  {
5298  offset += 16 * 8; // This is the offset of rip in the GPR thread
5299  // state data structure.
5300  start_address = m_data.GetU64(&offset);
5301  done = true;
5302  }
5303  break;
5304  default:
5305  return m_entry_point_address;
5306  }
5307  // Haven't found the GPR flavor yet, skip over the data for this
5308  // flavor:
5309  if (done)
5310  break;
5311  offset += count * 4;
5312  }
5313  } break;
5314  case LC_MAIN: {
5315  ConstString text_segment_name("__TEXT");
5316  uint64_t entryoffset = m_data.GetU64(&offset);
5317  SectionSP text_segment_sp =
5318  GetSectionList()->FindSectionByName(text_segment_name);
5319  if (text_segment_sp) {
5320  done = true;
5321  start_address = text_segment_sp->GetFileAddress() + entryoffset;
5322  }
5323  } break;
5324 
5325  default:
5326  break;
5327  }
5328  if (done)
5329  break;
5330 
5331  // Go to the next load command:
5332  offset = cmd_offset + load_cmd.cmdsize;
5333  }
5334 
5335  if (start_address == LLDB_INVALID_ADDRESS && IsDynamicLoader()) {
5336  if (GetSymtab()) {
5337  Symbol *dyld_start_sym = GetSymtab()->FindFirstSymbolWithNameAndType(
5338  ConstString("_dyld_start"), SymbolType::eSymbolTypeCode,
5339  Symtab::eDebugAny, Symtab::eVisibilityAny);
5340  if (dyld_start_sym && dyld_start_sym->GetAddress().IsValid()) {
5341  start_address = dyld_start_sym->GetAddress().GetFileAddress();
5342  }
5343  }
5344  }
5345 
5346  if (start_address != LLDB_INVALID_ADDRESS) {
5347  // We got the start address from the load commands, so now resolve that
5348  // address in the sections of this ObjectFile:
5350  start_address, GetSectionList())) {
5352  }
5353  } else {
5354  // We couldn't read the UnixThread load command - maybe it wasn't there.
5355  // As a fallback look for the "start" symbol in the main executable.
5356 
5357  ModuleSP module_sp(GetModule());
5358 
5359  if (module_sp) {
5360  SymbolContextList contexts;
5361  SymbolContext context;
5362  module_sp->FindSymbolsWithNameAndType(ConstString("start"),
5363  eSymbolTypeCode, contexts);
5364  if (contexts.GetSize()) {
5365  if (contexts.GetContextAtIndex(0, context))
5367  }
5368  }
5369  }
5370  }
5371 
5372  return m_entry_point_address;
5373 }
5374 
5376  lldb_private::Address header_addr;
5377  SectionList *section_list = GetSectionList();
5378  if (section_list) {
5379  SectionSP text_segment_sp(
5380  section_list->FindSectionByName(GetSegmentNameTEXT()));
5381  if (text_segment_sp) {
5382  header_addr.SetSection(text_segment_sp);
5383  header_addr.SetOffset(0);
5384  }
5385  }
5386  return header_addr;
5387 }
5388 
5390  ModuleSP module_sp(GetModule());
5391  if (module_sp) {
5392  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5396  FileRangeArray::Entry file_range;
5397  llvm::MachO::thread_command thread_cmd;
5398  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5399  const uint32_t cmd_offset = offset;
5400  if (m_data.GetU32(&offset, &thread_cmd, 2) == nullptr)
5401  break;
5402 
5403  if (thread_cmd.cmd == LC_THREAD) {
5404  file_range.SetRangeBase(offset);
5405  file_range.SetByteSize(thread_cmd.cmdsize - 8);
5406  m_thread_context_offsets.Append(file_range);
5407  }
5408  offset = cmd_offset + thread_cmd.cmdsize;
5409  }
5410  }
5411  }
5413 }
5414 
5416  std::string result;
5417  ModuleSP module_sp(GetModule());
5418  if (module_sp) {
5419  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5420 
5421  // First, look over the load commands for an LC_NOTE load command with
5422  // data_owner string "kern ver str" & use that if found.
5424  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5425  const uint32_t cmd_offset = offset;
5426  llvm::MachO::load_command lc;
5427  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
5428  break;
5429  if (lc.cmd == LC_NOTE) {
5430  char data_owner[17];
5431  m_data.CopyData(offset, 16, data_owner);
5432  data_owner[16] = '\0';
5433  offset += 16;
5434  uint64_t fileoff = m_data.GetU64_unchecked(&offset);
5435  uint64_t size = m_data.GetU64_unchecked(&offset);
5436 
5437  // "kern ver str" has a uint32_t version and then a nul terminated
5438  // c-string.
5439  if (strcmp("kern ver str", data_owner) == 0) {
5440  offset = fileoff;
5441  uint32_t version;
5442  if (m_data.GetU32(&offset, &version, 1) != nullptr) {
5443  if (version == 1) {
5444  uint32_t strsize = size - sizeof(uint32_t);
5445  char *buf = (char *)malloc(strsize);
5446  if (buf) {
5447  m_data.CopyData(offset, strsize, buf);
5448  buf[strsize - 1] = '\0';
5449  result = buf;
5450  if (buf)
5451  free(buf);
5452  return result;
5453  }
5454  }
5455  }
5456  }
5457  }
5458  offset = cmd_offset + lc.cmdsize;
5459  }
5460 
5461  // Second, make a pass over the load commands looking for an obsolete
5462  // LC_IDENT load command.
5463  offset = MachHeaderSizeFromMagic(m_header.magic);
5464  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5465  const uint32_t cmd_offset = offset;
5466  llvm::MachO::ident_command ident_command;
5467  if (m_data.GetU32(&offset, &ident_command, 2) == nullptr)
5468  break;
5469  if (ident_command.cmd == LC_IDENT && ident_command.cmdsize != 0) {
5470  char *buf = (char *)malloc(ident_command.cmdsize);
5471  if (buf != nullptr && m_data.CopyData(offset, ident_command.cmdsize,
5472  buf) == ident_command.cmdsize) {
5473  buf[ident_command.cmdsize - 1] = '\0';
5474  result = buf;
5475  }
5476  if (buf)
5477  free(buf);
5478  }
5479  offset = cmd_offset + ident_command.cmdsize;
5480  }
5481  }
5482  return result;
5483 }
5484 
5486  addr_t mask = 0;
5487  ModuleSP module_sp(GetModule());
5488  if (module_sp) {
5489  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5491  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5492  const uint32_t cmd_offset = offset;
5493  llvm::MachO::load_command lc;
5494  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
5495  break;
5496  if (lc.cmd == LC_NOTE) {
5497  char data_owner[17];
5498  m_data.CopyData(offset, 16, data_owner);
5499  data_owner[16] = '\0';
5500  offset += 16;
5501  uint64_t fileoff = m_data.GetU64_unchecked(&offset);
5502 
5503  // "addrable bits" has a uint32_t version and a uint32_t
5504  // number of bits used in addressing.
5505  if (strcmp("addrable bits", data_owner) == 0) {
5506  offset = fileoff;
5507  uint32_t version;
5508  if (m_data.GetU32(&offset, &version, 1) != nullptr) {
5509  if (version == 3) {
5510  uint32_t num_addr_bits = m_data.GetU32_unchecked(&offset);
5511  if (num_addr_bits != 0) {
5512  mask = ~((1ULL << num_addr_bits) - 1);
5513  }
5514  break;
5515  }
5516  }
5517  }
5518  }
5519  offset = cmd_offset + lc.cmdsize;
5520  }
5521  }
5522  return mask;
5523 }
5524 
5526  bool &value_is_offset,
5527  UUID &uuid,
5528  ObjectFile::BinaryType &type) {
5529  value = LLDB_INVALID_ADDRESS;
5530  value_is_offset = false;
5531  uuid.Clear();
5532  uint32_t log2_pagesize = 0; // not currently passed up to caller
5533  uint32_t platform = 0; // not currently passed up to caller
5534  ModuleSP module_sp(GetModule());
5535  if (module_sp) {
5536  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5538  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5539  const uint32_t cmd_offset = offset;
5540  llvm::MachO::load_command lc;
5541  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
5542  break;
5543  if (lc.cmd == LC_NOTE) {
5544  char data_owner[17];
5545  memset(data_owner, 0, sizeof(data_owner));
5546  m_data.CopyData(offset, 16, data_owner);
5547  offset += 16;
5548  uint64_t fileoff = m_data.GetU64_unchecked(&offset);
5549  uint64_t size = m_data.GetU64_unchecked(&offset);
5550 
5551  // struct main_bin_spec
5552  // {
5553  // uint32_t version; // currently 2
5554  // uint32_t type; // 0 == unspecified, 1 == kernel,
5555  // // 2 == user process,
5556  // // 3 == standalone binary
5557  // uint64_t address; // UINT64_MAX if address not specified
5558  // uint64_t slide; // slide, UINT64_MAX if unspecified
5559  // // 0 if no slide needs to be applied to
5560  // // file address
5561  // uuid_t uuid; // all zero's if uuid not specified
5562  // uint32_t log2_pagesize; // process page size in log base 2,
5563  // // e.g. 4k pages are 12.
5564  // // 0 for unspecified
5565  // uint32_t platform; // The Mach-O platform for this corefile.
5566  // // 0 for unspecified.
5567  // // The values are defined in
5568  // // <mach-o/loader.h>, PLATFORM_*.
5569  // } __attribute((packed));
5570 
5571  // "main bin spec" (main binary specification) data payload is
5572  // formatted:
5573  // uint32_t version [currently 1]
5574  // uint32_t type [0 == unspecified, 1 == kernel,
5575  // 2 == user process, 3 == firmware ]
5576  // uint64_t address [ UINT64_MAX if address not specified ]
5577  // uuid_t uuid [ all zero's if uuid not specified ]
5578  // uint32_t log2_pagesize [ process page size in log base
5579  // 2, e.g. 4k pages are 12.
5580  // 0 for unspecified ]
5581  // uint32_t unused [ for alignment ]
5582 
5583  if (strcmp("main bin spec", data_owner) == 0 && size >= 32) {
5584  offset = fileoff;
5585  uint32_t version;
5586  if (m_data.GetU32(&offset, &version, 1) != nullptr && version <= 2) {
5587  uint32_t binspec_type = 0;
5588  uuid_t raw_uuid;
5589  memset(raw_uuid, 0, sizeof(uuid_t));
5590 
5591  if (!m_data.GetU32(&offset, &binspec_type, 1))
5592  return false;
5593  if (!m_data.GetU64(&offset, &value, 1))
5594  return false;
5595  uint64_t slide = LLDB_INVALID_ADDRESS;
5596  if (version > 1 && !m_data.GetU64(&offset, &slide, 1))
5597  return false;
5598  if (value == LLDB_INVALID_ADDRESS &&
5599  slide != LLDB_INVALID_ADDRESS) {
5600  value = slide;
5601  value_is_offset = true;
5602  }
5603 
5604  if (m_data.CopyData(offset, sizeof(uuid_t), raw_uuid) != 0) {
5605  uuid = UUID::fromOptionalData(raw_uuid, sizeof(uuid_t));
5606  // convert the "main bin spec" type into our
5607  // ObjectFile::BinaryType enum
5608  switch (binspec_type) {
5609  case 0:
5610  type = eBinaryTypeUnknown;
5611  break;
5612  case 1:
5613  type = eBinaryTypeKernel;
5614  break;
5615  case 2:
5616  type = eBinaryTypeUser;
5617  break;
5618  case 3:
5619  type = eBinaryTypeStandalone;
5620  break;
5621  }
5622  if (!m_data.GetU32(&offset, &log2_pagesize, 1))
5623  return false;
5624  if (version > 1 && !m_data.GetU32(&offset, &platform, 1))
5625  return false;
5626  return true;
5627  }
5628  }
5629  }
5630  }
5631  offset = cmd_offset + lc.cmdsize;
5632  }
5633  }
5634  return false;
5635 }
5636 
5637 lldb::RegisterContextSP
5639  lldb_private::Thread &thread) {
5640  lldb::RegisterContextSP reg_ctx_sp;
5641 
5642  ModuleSP module_sp(GetModule());
5643  if (module_sp) {
5644  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5647 
5648  const FileRangeArray::Entry *thread_context_file_range =
5650  if (thread_context_file_range) {
5651 
5652  DataExtractor data(m_data, thread_context_file_range->GetRangeBase(),
5653  thread_context_file_range->GetByteSize());
5654 
5655  switch (m_header.cputype) {
5658  reg_ctx_sp =
5659  std::make_shared<RegisterContextDarwin_arm64_Mach>(thread, data);
5660  break;
5661 
5662  case llvm::MachO::CPU_TYPE_ARM:
5663  reg_ctx_sp =
5664  std::make_shared<RegisterContextDarwin_arm_Mach>(thread, data);
5665  break;
5666 
5667  case llvm::MachO::CPU_TYPE_I386:
5668  reg_ctx_sp =
5669  std::make_shared<RegisterContextDarwin_i386_Mach>(thread, data);
5670  break;
5671 
5672  case llvm::MachO::CPU_TYPE_X86_64:
5673  reg_ctx_sp =
5674  std::make_shared<RegisterContextDarwin_x86_64_Mach>(thread, data);
5675  break;
5676  }
5677  }
5678  }
5679  return reg_ctx_sp;
5680 }
5681 
5683  switch (m_header.filetype) {
5684  case MH_OBJECT: // 0x1u
5685  if (GetAddressByteSize() == 4) {
5686  // 32 bit kexts are just object files, but they do have a valid
5687  // UUID load command.
5688  if (GetUUID()) {
5689  // this checking for the UUID load command is not enough we could
5690  // eventually look for the symbol named "OSKextGetCurrentIdentifier" as
5691  // this is required of kexts
5692  if (m_strata == eStrataInvalid)
5693  m_strata =