LLDB  mainline
ObjectFileMachO.cpp
Go to the documentation of this file.
1 //===-- ObjectFileMachO.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ADT/ScopeExit.h"
10 #include "llvm/ADT/StringRef.h"
11 
16 #include "lldb/Core/Debugger.h"
17 #include "lldb/Core/FileSpecList.h"
18 #include "lldb/Core/Module.h"
19 #include "lldb/Core/ModuleSpec.h"
21 #include "lldb/Core/Progress.h"
22 #include "lldb/Core/Section.h"
23 #include "lldb/Core/StreamFile.h"
24 #include "lldb/Host/Host.h"
27 #include "lldb/Symbol/ObjectFile.h"
30 #include "lldb/Target/Platform.h"
31 #include "lldb/Target/Process.h"
33 #include "lldb/Target/Target.h"
34 #include "lldb/Target/Thread.h"
35 #include "lldb/Target/ThreadList.h"
36 #include "lldb/Utility/ArchSpec.h"
38 #include "lldb/Utility/FileSpec.h"
39 #include "lldb/Utility/LLDBLog.h"
40 #include "lldb/Utility/Log.h"
41 #include "lldb/Utility/RangeMap.h"
43 #include "lldb/Utility/Status.h"
45 #include "lldb/Utility/Timer.h"
46 #include "lldb/Utility/UUID.h"
47 
48 #include "lldb/Host/SafeMachO.h"
49 
50 #include "llvm/ADT/DenseSet.h"
51 #include "llvm/Support/FormatVariadic.h"
52 #include "llvm/Support/MemoryBuffer.h"
53 
54 #include "ObjectFileMachO.h"
55 
56 #if defined(__APPLE__)
57 #include <TargetConditionals.h>
58 // GetLLDBSharedCacheUUID() needs to call dlsym()
59 #include <dlfcn.h>
60 #include <mach/mach_init.h>
61 #include <mach/vm_map.h>
62 #include <lldb/Host/SafeMachO.h>
63 #endif
64 
65 #ifndef __APPLE__
67 #else
68 #include <uuid/uuid.h>
69 #endif
70 
71 #include <bitset>
72 #include <memory>
73 
74 // Unfortunately the signpost header pulls in the system MachO header, too.
75 #ifdef CPU_TYPE_ARM
76 #undef CPU_TYPE_ARM
77 #endif
78 #ifdef CPU_TYPE_ARM64
79 #undef CPU_TYPE_ARM64
80 #endif
81 #ifdef CPU_TYPE_ARM64_32
82 #undef CPU_TYPE_ARM64_32
83 #endif
84 #ifdef CPU_TYPE_I386
85 #undef CPU_TYPE_I386
86 #endif
87 #ifdef CPU_TYPE_X86_64
88 #undef CPU_TYPE_X86_64
89 #endif
90 #ifdef MH_DYLINKER
91 #undef MH_DYLINKER
92 #endif
93 #ifdef MH_OBJECT
94 #undef MH_OBJECT
95 #endif
96 #ifdef LC_VERSION_MIN_MACOSX
97 #undef LC_VERSION_MIN_MACOSX
98 #endif
99 #ifdef LC_VERSION_MIN_IPHONEOS
100 #undef LC_VERSION_MIN_IPHONEOS
101 #endif
102 #ifdef LC_VERSION_MIN_TVOS
103 #undef LC_VERSION_MIN_TVOS
104 #endif
105 #ifdef LC_VERSION_MIN_WATCHOS
106 #undef LC_VERSION_MIN_WATCHOS
107 #endif
108 #ifdef LC_BUILD_VERSION
109 #undef LC_BUILD_VERSION
110 #endif
111 #ifdef PLATFORM_MACOS
112 #undef PLATFORM_MACOS
113 #endif
114 #ifdef PLATFORM_MACCATALYST
115 #undef PLATFORM_MACCATALYST
116 #endif
117 #ifdef PLATFORM_IOS
118 #undef PLATFORM_IOS
119 #endif
120 #ifdef PLATFORM_IOSSIMULATOR
121 #undef PLATFORM_IOSSIMULATOR
122 #endif
123 #ifdef PLATFORM_TVOS
124 #undef PLATFORM_TVOS
125 #endif
126 #ifdef PLATFORM_TVOSSIMULATOR
127 #undef PLATFORM_TVOSSIMULATOR
128 #endif
129 #ifdef PLATFORM_WATCHOS
130 #undef PLATFORM_WATCHOS
131 #endif
132 #ifdef PLATFORM_WATCHOSSIMULATOR
133 #undef PLATFORM_WATCHOSSIMULATOR
134 #endif
135 
136 #define THUMB_ADDRESS_BIT_MASK 0xfffffffffffffffeull
137 using namespace lldb;
138 using namespace lldb_private;
139 using namespace llvm::MachO;
140 
142 
143 // Some structure definitions needed for parsing the dyld shared cache files
144 // found on iOS devices.
145 
147  char magic[16]; // e.g. "dyld_v0 i386", "dyld_v1 armv7", etc.
148  uint32_t mappingOffset; // file offset to first dyld_cache_mapping_info
149  uint32_t mappingCount; // number of dyld_cache_mapping_info entries
152  uint64_t dyldBaseAddress;
155  uint64_t slideInfoOffset;
156  uint64_t slideInfoSize;
159  uint8_t uuid[16]; // v1 and above, also recorded in dyld_all_image_infos v13
160  // and later
161 };
162 
163 static void PrintRegisterValue(RegisterContext *reg_ctx, const char *name,
164  const char *alt_name, size_t reg_byte_size,
165  Stream &data) {
166  const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name);
167  if (reg_info == nullptr)
168  reg_info = reg_ctx->GetRegisterInfoByName(alt_name);
169  if (reg_info) {
170  lldb_private::RegisterValue reg_value;
171  if (reg_ctx->ReadRegister(reg_info, reg_value)) {
172  if (reg_info->byte_size >= reg_byte_size)
173  data.Write(reg_value.GetBytes(), reg_byte_size);
174  else {
175  data.Write(reg_value.GetBytes(), reg_info->byte_size);
176  for (size_t i = 0, n = reg_byte_size - reg_info->byte_size; i < n; ++i)
177  data.PutChar(0);
178  }
179  return;
180  }
181  }
182  // Just write zeros if all else fails
183  for (size_t i = 0; i < reg_byte_size; ++i)
184  data.PutChar(0);
185 }
186 
188 public:
190  const DataExtractor &data)
191  : RegisterContextDarwin_x86_64(thread, 0) {
192  SetRegisterDataFrom_LC_THREAD(data);
193  }
194 
195  void InvalidateAllRegisters() override {
196  // Do nothing... registers are always valid...
197  }
198 
200  lldb::offset_t offset = 0;
201  SetError(GPRRegSet, Read, -1);
202  SetError(FPURegSet, Read, -1);
203  SetError(EXCRegSet, Read, -1);
204  bool done = false;
205 
206  while (!done) {
207  int flavor = data.GetU32(&offset);
208  if (flavor == 0)
209  done = true;
210  else {
211  uint32_t i;
212  uint32_t count = data.GetU32(&offset);
213  switch (flavor) {
214  case GPRRegSet:
215  for (i = 0; i < count; ++i)
216  (&gpr.rax)[i] = data.GetU64(&offset);
217  SetError(GPRRegSet, Read, 0);
218  done = true;
219 
220  break;
221  case FPURegSet:
222  // TODO: fill in FPU regs....
223  // SetError (FPURegSet, Read, -1);
224  done = true;
225 
226  break;
227  case EXCRegSet:
228  exc.trapno = data.GetU32(&offset);
229  exc.err = data.GetU32(&offset);
230  exc.faultvaddr = data.GetU64(&offset);
231  SetError(EXCRegSet, Read, 0);
232  done = true;
233  break;
234  case 7:
235  case 8:
236  case 9:
237  // fancy flavors that encapsulate of the above flavors...
238  break;
239 
240  default:
241  done = true;
242  break;
243  }
244  }
245  }
246  }
247 
248  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
249  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
250  if (reg_ctx_sp) {
251  RegisterContext *reg_ctx = reg_ctx_sp.get();
252 
253  data.PutHex32(GPRRegSet); // Flavor
254  data.PutHex32(GPRWordCount);
255  PrintRegisterValue(reg_ctx, "rax", nullptr, 8, data);
256  PrintRegisterValue(reg_ctx, "rbx", nullptr, 8, data);
257  PrintRegisterValue(reg_ctx, "rcx", nullptr, 8, data);
258  PrintRegisterValue(reg_ctx, "rdx", nullptr, 8, data);
259  PrintRegisterValue(reg_ctx, "rdi", nullptr, 8, data);
260  PrintRegisterValue(reg_ctx, "rsi", nullptr, 8, data);
261  PrintRegisterValue(reg_ctx, "rbp", nullptr, 8, data);
262  PrintRegisterValue(reg_ctx, "rsp", nullptr, 8, data);
263  PrintRegisterValue(reg_ctx, "r8", nullptr, 8, data);
264  PrintRegisterValue(reg_ctx, "r9", nullptr, 8, data);
265  PrintRegisterValue(reg_ctx, "r10", nullptr, 8, data);
266  PrintRegisterValue(reg_ctx, "r11", nullptr, 8, data);
267  PrintRegisterValue(reg_ctx, "r12", nullptr, 8, data);
268  PrintRegisterValue(reg_ctx, "r13", nullptr, 8, data);
269  PrintRegisterValue(reg_ctx, "r14", nullptr, 8, data);
270  PrintRegisterValue(reg_ctx, "r15", nullptr, 8, data);
271  PrintRegisterValue(reg_ctx, "rip", nullptr, 8, data);
272  PrintRegisterValue(reg_ctx, "rflags", nullptr, 8, data);
273  PrintRegisterValue(reg_ctx, "cs", nullptr, 8, data);
274  PrintRegisterValue(reg_ctx, "fs", nullptr, 8, data);
275  PrintRegisterValue(reg_ctx, "gs", nullptr, 8, data);
276 
277  // // Write out the FPU registers
278  // const size_t fpu_byte_size = sizeof(FPU);
279  // size_t bytes_written = 0;
280  // data.PutHex32 (FPURegSet);
281  // data.PutHex32 (fpu_byte_size/sizeof(uint64_t));
282  // bytes_written += data.PutHex32(0); // uint32_t pad[0]
283  // bytes_written += data.PutHex32(0); // uint32_t pad[1]
284  // bytes_written += WriteRegister (reg_ctx, "fcw", "fctrl", 2,
285  // data); // uint16_t fcw; // "fctrl"
286  // bytes_written += WriteRegister (reg_ctx, "fsw" , "fstat", 2,
287  // data); // uint16_t fsw; // "fstat"
288  // bytes_written += WriteRegister (reg_ctx, "ftw" , "ftag", 1,
289  // data); // uint8_t ftw; // "ftag"
290  // bytes_written += data.PutHex8 (0); // uint8_t pad1;
291  // bytes_written += WriteRegister (reg_ctx, "fop" , NULL, 2,
292  // data); // uint16_t fop; // "fop"
293  // bytes_written += WriteRegister (reg_ctx, "fioff", "ip", 4,
294  // data); // uint32_t ip; // "fioff"
295  // bytes_written += WriteRegister (reg_ctx, "fiseg", NULL, 2,
296  // data); // uint16_t cs; // "fiseg"
297  // bytes_written += data.PutHex16 (0); // uint16_t pad2;
298  // bytes_written += WriteRegister (reg_ctx, "dp", "fooff" , 4,
299  // data); // uint32_t dp; // "fooff"
300  // bytes_written += WriteRegister (reg_ctx, "foseg", NULL, 2,
301  // data); // uint16_t ds; // "foseg"
302  // bytes_written += data.PutHex16 (0); // uint16_t pad3;
303  // bytes_written += WriteRegister (reg_ctx, "mxcsr", NULL, 4,
304  // data); // uint32_t mxcsr;
305  // bytes_written += WriteRegister (reg_ctx, "mxcsrmask", NULL,
306  // 4, data);// uint32_t mxcsrmask;
307  // bytes_written += WriteRegister (reg_ctx, "stmm0", NULL,
308  // sizeof(MMSReg), data);
309  // bytes_written += WriteRegister (reg_ctx, "stmm1", NULL,
310  // sizeof(MMSReg), data);
311  // bytes_written += WriteRegister (reg_ctx, "stmm2", NULL,
312  // sizeof(MMSReg), data);
313  // bytes_written += WriteRegister (reg_ctx, "stmm3", NULL,
314  // sizeof(MMSReg), data);
315  // bytes_written += WriteRegister (reg_ctx, "stmm4", NULL,
316  // sizeof(MMSReg), data);
317  // bytes_written += WriteRegister (reg_ctx, "stmm5", NULL,
318  // sizeof(MMSReg), data);
319  // bytes_written += WriteRegister (reg_ctx, "stmm6", NULL,
320  // sizeof(MMSReg), data);
321  // bytes_written += WriteRegister (reg_ctx, "stmm7", NULL,
322  // sizeof(MMSReg), data);
323  // bytes_written += WriteRegister (reg_ctx, "xmm0" , NULL,
324  // sizeof(XMMReg), data);
325  // bytes_written += WriteRegister (reg_ctx, "xmm1" , NULL,
326  // sizeof(XMMReg), data);
327  // bytes_written += WriteRegister (reg_ctx, "xmm2" , NULL,
328  // sizeof(XMMReg), data);
329  // bytes_written += WriteRegister (reg_ctx, "xmm3" , NULL,
330  // sizeof(XMMReg), data);
331  // bytes_written += WriteRegister (reg_ctx, "xmm4" , NULL,
332  // sizeof(XMMReg), data);
333  // bytes_written += WriteRegister (reg_ctx, "xmm5" , NULL,
334  // sizeof(XMMReg), data);
335  // bytes_written += WriteRegister (reg_ctx, "xmm6" , NULL,
336  // sizeof(XMMReg), data);
337  // bytes_written += WriteRegister (reg_ctx, "xmm7" , NULL,
338  // sizeof(XMMReg), data);
339  // bytes_written += WriteRegister (reg_ctx, "xmm8" , NULL,
340  // sizeof(XMMReg), data);
341  // bytes_written += WriteRegister (reg_ctx, "xmm9" , NULL,
342  // sizeof(XMMReg), data);
343  // bytes_written += WriteRegister (reg_ctx, "xmm10", NULL,
344  // sizeof(XMMReg), data);
345  // bytes_written += WriteRegister (reg_ctx, "xmm11", NULL,
346  // sizeof(XMMReg), data);
347  // bytes_written += WriteRegister (reg_ctx, "xmm12", NULL,
348  // sizeof(XMMReg), data);
349  // bytes_written += WriteRegister (reg_ctx, "xmm13", NULL,
350  // sizeof(XMMReg), data);
351  // bytes_written += WriteRegister (reg_ctx, "xmm14", NULL,
352  // sizeof(XMMReg), data);
353  // bytes_written += WriteRegister (reg_ctx, "xmm15", NULL,
354  // sizeof(XMMReg), data);
355  //
356  // // Fill rest with zeros
357  // for (size_t i=0, n = fpu_byte_size - bytes_written; i<n; ++
358  // i)
359  // data.PutChar(0);
360 
361  // Write out the EXC registers
362  data.PutHex32(EXCRegSet);
363  data.PutHex32(EXCWordCount);
364  PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data);
365  PrintRegisterValue(reg_ctx, "err", nullptr, 4, data);
366  PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 8, data);
367  return true;
368  }
369  return false;
370  }
371 
372 protected:
373  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; }
374 
375  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; }
376 
377  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; }
378 
379  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
380  return 0;
381  }
382 
383  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
384  return 0;
385  }
386 
387  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
388  return 0;
389  }
390 };
391 
393 public:
395  const DataExtractor &data)
396  : RegisterContextDarwin_i386(thread, 0) {
397  SetRegisterDataFrom_LC_THREAD(data);
398  }
399 
400  void InvalidateAllRegisters() override {
401  // Do nothing... registers are always valid...
402  }
403 
405  lldb::offset_t offset = 0;
406  SetError(GPRRegSet, Read, -1);
407  SetError(FPURegSet, Read, -1);
408  SetError(EXCRegSet, Read, -1);
409  bool done = false;
410 
411  while (!done) {
412  int flavor = data.GetU32(&offset);
413  if (flavor == 0)
414  done = true;
415  else {
416  uint32_t i;
417  uint32_t count = data.GetU32(&offset);
418  switch (flavor) {
419  case GPRRegSet:
420  for (i = 0; i < count; ++i)
421  (&gpr.eax)[i] = data.GetU32(&offset);
422  SetError(GPRRegSet, Read, 0);
423  done = true;
424 
425  break;
426  case FPURegSet:
427  // TODO: fill in FPU regs....
428  // SetError (FPURegSet, Read, -1);
429  done = true;
430 
431  break;
432  case EXCRegSet:
433  exc.trapno = data.GetU32(&offset);
434  exc.err = data.GetU32(&offset);
435  exc.faultvaddr = data.GetU32(&offset);
436  SetError(EXCRegSet, Read, 0);
437  done = true;
438  break;
439  case 7:
440  case 8:
441  case 9:
442  // fancy flavors that encapsulate of the above flavors...
443  break;
444 
445  default:
446  done = true;
447  break;
448  }
449  }
450  }
451  }
452 
453  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
454  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
455  if (reg_ctx_sp) {
456  RegisterContext *reg_ctx = reg_ctx_sp.get();
457 
458  data.PutHex32(GPRRegSet); // Flavor
459  data.PutHex32(GPRWordCount);
460  PrintRegisterValue(reg_ctx, "eax", nullptr, 4, data);
461  PrintRegisterValue(reg_ctx, "ebx", nullptr, 4, data);
462  PrintRegisterValue(reg_ctx, "ecx", nullptr, 4, data);
463  PrintRegisterValue(reg_ctx, "edx", nullptr, 4, data);
464  PrintRegisterValue(reg_ctx, "edi", nullptr, 4, data);
465  PrintRegisterValue(reg_ctx, "esi", nullptr, 4, data);
466  PrintRegisterValue(reg_ctx, "ebp", nullptr, 4, data);
467  PrintRegisterValue(reg_ctx, "esp", nullptr, 4, data);
468  PrintRegisterValue(reg_ctx, "ss", nullptr, 4, data);
469  PrintRegisterValue(reg_ctx, "eflags", nullptr, 4, data);
470  PrintRegisterValue(reg_ctx, "eip", nullptr, 4, data);
471  PrintRegisterValue(reg_ctx, "cs", nullptr, 4, data);
472  PrintRegisterValue(reg_ctx, "ds", nullptr, 4, data);
473  PrintRegisterValue(reg_ctx, "es", nullptr, 4, data);
474  PrintRegisterValue(reg_ctx, "fs", nullptr, 4, data);
475  PrintRegisterValue(reg_ctx, "gs", nullptr, 4, data);
476 
477  // Write out the EXC registers
478  data.PutHex32(EXCRegSet);
479  data.PutHex32(EXCWordCount);
480  PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data);
481  PrintRegisterValue(reg_ctx, "err", nullptr, 4, data);
482  PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 4, data);
483  return true;
484  }
485  return false;
486  }
487 
488 protected:
489  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; }
490 
491  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; }
492 
493  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; }
494 
495  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
496  return 0;
497  }
498 
499  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
500  return 0;
501  }
502 
503  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
504  return 0;
505  }
506 };
507 
509 public:
511  const DataExtractor &data)
512  : RegisterContextDarwin_arm(thread, 0) {
513  SetRegisterDataFrom_LC_THREAD(data);
514  }
515 
516  void InvalidateAllRegisters() override {
517  // Do nothing... registers are always valid...
518  }
519 
521  lldb::offset_t offset = 0;
522  SetError(GPRRegSet, Read, -1);
523  SetError(FPURegSet, Read, -1);
524  SetError(EXCRegSet, Read, -1);
525  bool done = false;
526 
527  while (!done) {
528  int flavor = data.GetU32(&offset);
529  uint32_t count = data.GetU32(&offset);
530  lldb::offset_t next_thread_state = offset + (count * 4);
531  switch (flavor) {
532  case GPRAltRegSet:
533  case GPRRegSet:
534  // On ARM, the CPSR register is also included in the count but it is
535  // not included in gpr.r so loop until (count-1).
536 
537  // Prevent static analysis warnings by explicitly contstraining 'count'
538  // to acceptable range. Handle possible underflow of count-1
539  if (count > 0 && count <= sizeof(gpr.r) / sizeof(gpr.r[0])) {
540  for (uint32_t i = 0; i < (count - 1); ++i) {
541  gpr.r[i] = data.GetU32(&offset);
542  }
543  }
544  // Save cpsr explicitly.
545  gpr.cpsr = data.GetU32(&offset);
546 
547  SetError(GPRRegSet, Read, 0);
548  offset = next_thread_state;
549  break;
550 
551  case FPURegSet: {
552  uint8_t *fpu_reg_buf = (uint8_t *)&fpu.floats;
553  const int fpu_reg_buf_size = sizeof(fpu.floats);
554  if (data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle,
555  fpu_reg_buf) == fpu_reg_buf_size) {
556  offset += fpu_reg_buf_size;
557  fpu.fpscr = data.GetU32(&offset);
558  SetError(FPURegSet, Read, 0);
559  } else {
560  done = true;
561  }
562  }
563  offset = next_thread_state;
564  break;
565 
566  case EXCRegSet:
567  if (count == 3) {
568  exc.exception = data.GetU32(&offset);
569  exc.fsr = data.GetU32(&offset);
570  exc.far = data.GetU32(&offset);
571  SetError(EXCRegSet, Read, 0);
572  }
573  done = true;
574  offset = next_thread_state;
575  break;
576 
577  // Unknown register set flavor, stop trying to parse.
578  default:
579  done = true;
580  }
581  }
582  }
583 
584  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
585  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
586  if (reg_ctx_sp) {
587  RegisterContext *reg_ctx = reg_ctx_sp.get();
588 
589  data.PutHex32(GPRRegSet); // Flavor
590  data.PutHex32(GPRWordCount);
591  PrintRegisterValue(reg_ctx, "r0", nullptr, 4, data);
592  PrintRegisterValue(reg_ctx, "r1", nullptr, 4, data);
593  PrintRegisterValue(reg_ctx, "r2", nullptr, 4, data);
594  PrintRegisterValue(reg_ctx, "r3", nullptr, 4, data);
595  PrintRegisterValue(reg_ctx, "r4", nullptr, 4, data);
596  PrintRegisterValue(reg_ctx, "r5", nullptr, 4, data);
597  PrintRegisterValue(reg_ctx, "r6", nullptr, 4, data);
598  PrintRegisterValue(reg_ctx, "r7", nullptr, 4, data);
599  PrintRegisterValue(reg_ctx, "r8", nullptr, 4, data);
600  PrintRegisterValue(reg_ctx, "r9", nullptr, 4, data);
601  PrintRegisterValue(reg_ctx, "r10", nullptr, 4, data);
602  PrintRegisterValue(reg_ctx, "r11", nullptr, 4, data);
603  PrintRegisterValue(reg_ctx, "r12", nullptr, 4, data);
604  PrintRegisterValue(reg_ctx, "sp", nullptr, 4, data);
605  PrintRegisterValue(reg_ctx, "lr", nullptr, 4, data);
606  PrintRegisterValue(reg_ctx, "pc", nullptr, 4, data);
607  PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data);
608 
609  // Write out the EXC registers
610  // data.PutHex32 (EXCRegSet);
611  // data.PutHex32 (EXCWordCount);
612  // WriteRegister (reg_ctx, "exception", NULL, 4, data);
613  // WriteRegister (reg_ctx, "fsr", NULL, 4, data);
614  // WriteRegister (reg_ctx, "far", NULL, 4, data);
615  return true;
616  }
617  return false;
618  }
619 
620 protected:
621  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
622 
623  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
624 
625  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
626 
627  int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
628 
629  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
630  return 0;
631  }
632 
633  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
634  return 0;
635  }
636 
637  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
638  return 0;
639  }
640 
641  int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
642  return -1;
643  }
644 };
645 
647 public:
649  const DataExtractor &data)
650  : RegisterContextDarwin_arm64(thread, 0) {
651  SetRegisterDataFrom_LC_THREAD(data);
652  }
653 
654  void InvalidateAllRegisters() override {
655  // Do nothing... registers are always valid...
656  }
657 
659  lldb::offset_t offset = 0;
660  SetError(GPRRegSet, Read, -1);
661  SetError(FPURegSet, Read, -1);
662  SetError(EXCRegSet, Read, -1);
663  bool done = false;
664  while (!done) {
665  int flavor = data.GetU32(&offset);
666  uint32_t count = data.GetU32(&offset);
667  lldb::offset_t next_thread_state = offset + (count * 4);
668  switch (flavor) {
669  case GPRRegSet:
670  // x0-x29 + fp + lr + sp + pc (== 33 64-bit registers) plus cpsr (1
671  // 32-bit register)
672  if (count >= (33 * 2) + 1) {
673  for (uint32_t i = 0; i < 29; ++i)
674  gpr.x[i] = data.GetU64(&offset);
675  gpr.fp = data.GetU64(&offset);
676  gpr.lr = data.GetU64(&offset);
677  gpr.sp = data.GetU64(&offset);
678  gpr.pc = data.GetU64(&offset);
679  gpr.cpsr = data.GetU32(&offset);
680  SetError(GPRRegSet, Read, 0);
681  }
682  offset = next_thread_state;
683  break;
684  case FPURegSet: {
685  uint8_t *fpu_reg_buf = (uint8_t *)&fpu.v[0];
686  const int fpu_reg_buf_size = sizeof(fpu);
687  if (fpu_reg_buf_size == count * sizeof(uint32_t) &&
688  data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle,
689  fpu_reg_buf) == fpu_reg_buf_size) {
690  SetError(FPURegSet, Read, 0);
691  } else {
692  done = true;
693  }
694  }
695  offset = next_thread_state;
696  break;
697  case EXCRegSet:
698  if (count == 4) {
699  exc.far = data.GetU64(&offset);
700  exc.esr = data.GetU32(&offset);
701  exc.exception = data.GetU32(&offset);
702  SetError(EXCRegSet, Read, 0);
703  }
704  offset = next_thread_state;
705  break;
706  default:
707  done = true;
708  break;
709  }
710  }
711  }
712 
713  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
714  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
715  if (reg_ctx_sp) {
716  RegisterContext *reg_ctx = reg_ctx_sp.get();
717 
718  data.PutHex32(GPRRegSet); // Flavor
719  data.PutHex32(GPRWordCount);
720  PrintRegisterValue(reg_ctx, "x0", nullptr, 8, data);
721  PrintRegisterValue(reg_ctx, "x1", nullptr, 8, data);
722  PrintRegisterValue(reg_ctx, "x2", nullptr, 8, data);
723  PrintRegisterValue(reg_ctx, "x3", nullptr, 8, data);
724  PrintRegisterValue(reg_ctx, "x4", nullptr, 8, data);
725  PrintRegisterValue(reg_ctx, "x5", nullptr, 8, data);
726  PrintRegisterValue(reg_ctx, "x6", nullptr, 8, data);
727  PrintRegisterValue(reg_ctx, "x7", nullptr, 8, data);
728  PrintRegisterValue(reg_ctx, "x8", nullptr, 8, data);
729  PrintRegisterValue(reg_ctx, "x9", nullptr, 8, data);
730  PrintRegisterValue(reg_ctx, "x10", nullptr, 8, data);
731  PrintRegisterValue(reg_ctx, "x11", nullptr, 8, data);
732  PrintRegisterValue(reg_ctx, "x12", nullptr, 8, data);
733  PrintRegisterValue(reg_ctx, "x13", nullptr, 8, data);
734  PrintRegisterValue(reg_ctx, "x14", nullptr, 8, data);
735  PrintRegisterValue(reg_ctx, "x15", nullptr, 8, data);
736  PrintRegisterValue(reg_ctx, "x16", nullptr, 8, data);
737  PrintRegisterValue(reg_ctx, "x17", nullptr, 8, data);
738  PrintRegisterValue(reg_ctx, "x18", nullptr, 8, data);
739  PrintRegisterValue(reg_ctx, "x19", nullptr, 8, data);
740  PrintRegisterValue(reg_ctx, "x20", nullptr, 8, data);
741  PrintRegisterValue(reg_ctx, "x21", nullptr, 8, data);
742  PrintRegisterValue(reg_ctx, "x22", nullptr, 8, data);
743  PrintRegisterValue(reg_ctx, "x23", nullptr, 8, data);
744  PrintRegisterValue(reg_ctx, "x24", nullptr, 8, data);
745  PrintRegisterValue(reg_ctx, "x25", nullptr, 8, data);
746  PrintRegisterValue(reg_ctx, "x26", nullptr, 8, data);
747  PrintRegisterValue(reg_ctx, "x27", nullptr, 8, data);
748  PrintRegisterValue(reg_ctx, "x28", nullptr, 8, data);
749  PrintRegisterValue(reg_ctx, "fp", nullptr, 8, data);
750  PrintRegisterValue(reg_ctx, "lr", nullptr, 8, data);
751  PrintRegisterValue(reg_ctx, "sp", nullptr, 8, data);
752  PrintRegisterValue(reg_ctx, "pc", nullptr, 8, data);
753  PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data);
754  data.PutHex32(0); // uint32_t pad at the end
755 
756  // Write out the EXC registers
757  data.PutHex32(EXCRegSet);
758  data.PutHex32(EXCWordCount);
759  PrintRegisterValue(reg_ctx, "far", nullptr, 8, data);
760  PrintRegisterValue(reg_ctx, "esr", nullptr, 4, data);
761  PrintRegisterValue(reg_ctx, "exception", nullptr, 4, data);
762  return true;
763  }
764  return false;
765  }
766 
767 protected:
768  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
769 
770  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
771 
772  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
773 
774  int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
775 
776  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
777  return 0;
778  }
779 
780  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
781  return 0;
782  }
783 
784  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
785  return 0;
786  }
787 
788  int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
789  return -1;
790  }
791 };
792 
794  switch (magic) {
795  case MH_MAGIC:
796  case MH_CIGAM:
797  return sizeof(struct llvm::MachO::mach_header);
798 
799  case MH_MAGIC_64:
800  case MH_CIGAM_64:
801  return sizeof(struct llvm::MachO::mach_header_64);
802  break;
803 
804  default:
805  break;
806  }
807  return 0;
808 }
809 
810 #define MACHO_NLIST_ARM_SYMBOL_IS_THUMB 0x0008
811 
813 
815  PluginManager::RegisterPlugin(
816  GetPluginNameStatic(), GetPluginDescriptionStatic(), CreateInstance,
817  CreateMemoryInstance, GetModuleSpecifications, SaveCore);
818 }
819 
821  PluginManager::UnregisterPlugin(CreateInstance);
822 }
823 
824 ObjectFile *ObjectFileMachO::CreateInstance(const lldb::ModuleSP &module_sp,
825  DataBufferSP data_sp,
826  lldb::offset_t data_offset,
827  const FileSpec *file,
828  lldb::offset_t file_offset,
829  lldb::offset_t length) {
830  if (!data_sp) {
831  data_sp = MapFileData(*file, length, file_offset);
832  if (!data_sp)
833  return nullptr;
834  data_offset = 0;
835  }
836 
837  if (!ObjectFileMachO::MagicBytesMatch(data_sp, data_offset, length))
838  return nullptr;
839 
840  // Update the data to contain the entire file if it doesn't already
841  if (data_sp->GetByteSize() < length) {
842  data_sp = MapFileData(*file, length, file_offset);
843  if (!data_sp)
844  return nullptr;
845  data_offset = 0;
846  }
847  auto objfile_up = std::make_unique<ObjectFileMachO>(
848  module_sp, data_sp, data_offset, file, file_offset, length);
849  if (!objfile_up || !objfile_up->ParseHeader())
850  return nullptr;
851 
852  return objfile_up.release();
853 }
854 
856  const lldb::ModuleSP &module_sp, WritableDataBufferSP data_sp,
857  const ProcessSP &process_sp, lldb::addr_t header_addr) {
858  if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) {
859  std::unique_ptr<ObjectFile> objfile_up(
860  new ObjectFileMachO(module_sp, data_sp, process_sp, header_addr));
861  if (objfile_up.get() && objfile_up->ParseHeader())
862  return objfile_up.release();
863  }
864  return nullptr;
865 }
866 
868  const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp,
869  lldb::offset_t data_offset, lldb::offset_t file_offset,
871  const size_t initial_count = specs.GetSize();
872 
873  if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) {
874  DataExtractor data;
875  data.SetData(data_sp);
876  llvm::MachO::mach_header header;
877  if (ParseHeader(data, &data_offset, header)) {
878  size_t header_and_load_cmds =
879  header.sizeofcmds + MachHeaderSizeFromMagic(header.magic);
880  if (header_and_load_cmds >= data_sp->GetByteSize()) {
881  data_sp = MapFileData(file, header_and_load_cmds, file_offset);
882  data.SetData(data_sp);
883  data_offset = MachHeaderSizeFromMagic(header.magic);
884  }
885  if (data_sp) {
886  ModuleSpec base_spec;
887  base_spec.GetFileSpec() = file;
888  base_spec.SetObjectOffset(file_offset);
889  base_spec.SetObjectSize(length);
890  GetAllArchSpecs(header, data, data_offset, base_spec, specs);
891  }
892  }
893  }
894  return specs.GetSize() - initial_count;
895 }
896 
898  static ConstString g_segment_name_TEXT("__TEXT");
899  return g_segment_name_TEXT;
900 }
901 
903  static ConstString g_segment_name_DATA("__DATA");
904  return g_segment_name_DATA;
905 }
906 
908  static ConstString g_segment_name("__DATA_DIRTY");
909  return g_segment_name;
910 }
911 
913  static ConstString g_segment_name("__DATA_CONST");
914  return g_segment_name;
915 }
916 
918  static ConstString g_segment_name_OBJC("__OBJC");
919  return g_segment_name_OBJC;
920 }
921 
923  static ConstString g_section_name_LINKEDIT("__LINKEDIT");
924  return g_section_name_LINKEDIT;
925 }
926 
928  static ConstString g_section_name("__DWARF");
929  return g_section_name;
930 }
931 
933  static ConstString g_section_name_eh_frame("__eh_frame");
934  return g_section_name_eh_frame;
935 }
936 
937 bool ObjectFileMachO::MagicBytesMatch(DataBufferSP data_sp,
938  lldb::addr_t data_offset,
939  lldb::addr_t data_length) {
940  DataExtractor data;
941  data.SetData(data_sp, data_offset, data_length);
942  lldb::offset_t offset = 0;
943  uint32_t magic = data.GetU32(&offset);
944 
945  offset += 4; // cputype
946  offset += 4; // cpusubtype
947  uint32_t filetype = data.GetU32(&offset);
948 
949  // A fileset has a Mach-O header but is not an
950  // individual file and must be handled via an
951  // ObjectContainer plugin.
952  if (filetype == llvm::MachO::MH_FILESET)
953  return false;
954 
955  return MachHeaderSizeFromMagic(magic) != 0;
956 }
957 
958 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp,
959  DataBufferSP data_sp,
960  lldb::offset_t data_offset,
961  const FileSpec *file,
962  lldb::offset_t file_offset,
963  lldb::offset_t length)
964  : ObjectFile(module_sp, file, file_offset, length, data_sp, data_offset),
965  m_mach_segments(), m_mach_sections(), m_entry_point_address(),
966  m_thread_context_offsets(), m_thread_context_offsets_valid(false),
967  m_reexported_dylibs(), m_allow_assembly_emulation_unwind_plans(true) {
968  ::memset(&m_header, 0, sizeof(m_header));
969  ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
970 }
971 
972 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp,
973  lldb::WritableDataBufferSP header_data_sp,
974  const lldb::ProcessSP &process_sp,
975  lldb::addr_t header_addr)
976  : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
977  m_mach_segments(), m_mach_sections(), m_entry_point_address(),
978  m_thread_context_offsets(), m_thread_context_offsets_valid(false),
979  m_reexported_dylibs(), m_allow_assembly_emulation_unwind_plans(true) {
980  ::memset(&m_header, 0, sizeof(m_header));
981  ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
982 }
983 
985  lldb::offset_t *data_offset_ptr,
986  llvm::MachO::mach_header &header) {
988  // Leave magic in the original byte order
989  header.magic = data.GetU32(data_offset_ptr);
990  bool can_parse = false;
991  bool is_64_bit = false;
992  switch (header.magic) {
993  case MH_MAGIC:
995  data.SetAddressByteSize(4);
996  can_parse = true;
997  break;
998 
999  case MH_MAGIC_64:
1001  data.SetAddressByteSize(8);
1002  can_parse = true;
1003  is_64_bit = true;
1004  break;
1005 
1006  case MH_CIGAM:
1009  : eByteOrderBig);
1010  data.SetAddressByteSize(4);
1011  can_parse = true;
1012  break;
1013 
1014  case MH_CIGAM_64:
1017  : eByteOrderBig);
1018  data.SetAddressByteSize(8);
1019  is_64_bit = true;
1020  can_parse = true;
1021  break;
1022 
1023  default:
1024  break;
1025  }
1026 
1027  if (can_parse) {
1028  data.GetU32(data_offset_ptr, &header.cputype, 6);
1029  if (is_64_bit)
1030  *data_offset_ptr += 4;
1031  return true;
1032  } else {
1033  memset(&header, 0, sizeof(header));
1034  }
1035  return false;
1036 }
1037 
1039  ModuleSP module_sp(GetModule());
1040  if (!module_sp)
1041  return false;
1042 
1043  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
1044  bool can_parse = false;
1045  lldb::offset_t offset = 0;
1047  // Leave magic in the original byte order
1048  m_header.magic = m_data.GetU32(&offset);
1049  switch (m_header.magic) {
1050  case MH_MAGIC:
1053  can_parse = true;
1054  break;
1055 
1056  case MH_MAGIC_64:
1059  can_parse = true;
1060  break;
1061 
1062  case MH_CIGAM:
1065  : eByteOrderBig);
1067  can_parse = true;
1068  break;
1069 
1070  case MH_CIGAM_64:
1073  : eByteOrderBig);
1075  can_parse = true;
1076  break;
1077 
1078  default:
1079  break;
1080  }
1081 
1082  if (can_parse) {
1083  m_data.GetU32(&offset, &m_header.cputype, 6);
1084 
1085  ModuleSpecList all_specs;
1086  ModuleSpec base_spec;
1088  base_spec, all_specs);
1089 
1090  for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
1091  ArchSpec mach_arch =
1093 
1094  // Check if the module has a required architecture
1095  const ArchSpec &module_arch = module_sp->GetArchitecture();
1096  if (module_arch.IsValid() && !module_arch.IsCompatibleMatch(mach_arch))
1097  continue;
1098 
1099  if (SetModulesArchitecture(mach_arch)) {
1100  const size_t header_and_lc_size =
1101  m_header.sizeofcmds + MachHeaderSizeFromMagic(m_header.magic);
1102  if (m_data.GetByteSize() < header_and_lc_size) {
1103  DataBufferSP data_sp;
1104  ProcessSP process_sp(m_process_wp.lock());
1105  if (process_sp) {
1106  data_sp = ReadMemory(process_sp, m_memory_addr, header_and_lc_size);
1107  } else {
1108  // Read in all only the load command data from the file on disk
1109  data_sp = MapFileData(m_file, header_and_lc_size, m_file_offset);
1110  if (data_sp->GetByteSize() != header_and_lc_size)
1111  continue;
1112  }
1113  if (data_sp)
1114  m_data.SetData(data_sp);
1115  }
1116  }
1117  return true;
1118  }
1119  // None found.
1120  return false;
1121  } else {
1122  memset(&m_header, 0, sizeof(struct llvm::MachO::mach_header));
1123  }
1124  return false;
1125 }
1126 
1128  return m_data.GetByteOrder();
1129 }
1130 
1132  return m_header.filetype == MH_EXECUTE;
1133 }
1134 
1136  return m_header.filetype == MH_DYLINKER;
1137 }
1138 
1140  return m_header.flags & MH_DYLIB_IN_CACHE;
1141 }
1142 
1144  return m_data.GetAddressByteSize();
1145 }
1146 
1148  Symtab *symtab = GetSymtab();
1149  if (!symtab)
1150  return AddressClass::eUnknown;
1151 
1152  Symbol *symbol = symtab->FindSymbolContainingFileAddress(file_addr);
1153  if (symbol) {
1154  if (symbol->ValueIsAddress()) {
1155  SectionSP section_sp(symbol->GetAddressRef().GetSection());
1156  if (section_sp) {
1157  const lldb::SectionType section_type = section_sp->GetType();
1158  switch (section_type) {
1159  case eSectionTypeInvalid:
1160  return AddressClass::eUnknown;
1161 
1162  case eSectionTypeCode:
1163  if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1164  // For ARM we have a bit in the n_desc field of the symbol that
1165  // tells us ARM/Thumb which is bit 0x0008.
1166  if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB)
1167  return AddressClass::eCodeAlternateISA;
1168  }
1169  return AddressClass::eCode;
1170 
1171  case eSectionTypeContainer:
1172  return AddressClass::eUnknown;
1173 
1174  case eSectionTypeData:
1178  case eSectionTypeData4:
1179  case eSectionTypeData8:
1180  case eSectionTypeData16:
1182  case eSectionTypeZeroFill:
1185  case eSectionTypeGoSymtab:
1186  return AddressClass::eData;
1187 
1188  case eSectionTypeDebug:
1223  return AddressClass::eDebug;
1224 
1225  case eSectionTypeEHFrame:
1226  case eSectionTypeARMexidx:
1227  case eSectionTypeARMextab:
1229  return AddressClass::eRuntime;
1230 
1236  case eSectionTypeOther:
1237  return AddressClass::eUnknown;
1238  }
1239  }
1240  }
1241 
1242  const SymbolType symbol_type = symbol->GetType();
1243  switch (symbol_type) {
1244  case eSymbolTypeAny:
1245  return AddressClass::eUnknown;
1246  case eSymbolTypeAbsolute:
1247  return AddressClass::eUnknown;
1248 
1249  case eSymbolTypeCode:
1250  case eSymbolTypeTrampoline:
1251  case eSymbolTypeResolver:
1252  if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1253  // For ARM we have a bit in the n_desc field of the symbol that tells
1254  // us ARM/Thumb which is bit 0x0008.
1255  if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB)
1256  return AddressClass::eCodeAlternateISA;
1257  }
1258  return AddressClass::eCode;
1259 
1260  case eSymbolTypeData:
1261  return AddressClass::eData;
1262  case eSymbolTypeRuntime:
1263  return AddressClass::eRuntime;
1264  case eSymbolTypeException:
1265  return AddressClass::eRuntime;
1266  case eSymbolTypeSourceFile:
1267  return AddressClass::eDebug;
1268  case eSymbolTypeHeaderFile:
1269  return AddressClass::eDebug;
1270  case eSymbolTypeObjectFile:
1271  return AddressClass::eDebug;
1273  return AddressClass::eDebug;
1274  case eSymbolTypeBlock:
1275  return AddressClass::eDebug;
1276  case eSymbolTypeLocal:
1277  return AddressClass::eData;
1278  case eSymbolTypeParam:
1279  return AddressClass::eData;
1280  case eSymbolTypeVariable:
1281  return AddressClass::eData;
1283  return AddressClass::eDebug;
1284  case eSymbolTypeLineEntry:
1285  return AddressClass::eDebug;
1286  case eSymbolTypeLineHeader:
1287  return AddressClass::eDebug;
1288  case eSymbolTypeScopeBegin:
1289  return AddressClass::eDebug;
1290  case eSymbolTypeScopeEnd:
1291  return AddressClass::eDebug;
1292  case eSymbolTypeAdditional:
1293  return AddressClass::eUnknown;
1294  case eSymbolTypeCompiler:
1295  return AddressClass::eDebug;
1297  return AddressClass::eDebug;
1298  case eSymbolTypeUndefined:
1299  return AddressClass::eUnknown;
1300  case eSymbolTypeObjCClass:
1301  return AddressClass::eRuntime;
1303  return AddressClass::eRuntime;
1304  case eSymbolTypeObjCIVar:
1305  return AddressClass::eRuntime;
1306  case eSymbolTypeReExported:
1307  return AddressClass::eRuntime;
1308  }
1309  }
1310  return AddressClass::eUnknown;
1311 }
1312 
1314  if (m_dysymtab.cmd == 0) {
1315  ModuleSP module_sp(GetModule());
1316  if (module_sp) {
1318  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1319  const lldb::offset_t load_cmd_offset = offset;
1320 
1321  llvm::MachO::load_command lc = {};
1322  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
1323  break;
1324  if (lc.cmd == LC_DYSYMTAB) {
1325  m_dysymtab.cmd = lc.cmd;
1326  m_dysymtab.cmdsize = lc.cmdsize;
1327  if (m_data.GetU32(&offset, &m_dysymtab.ilocalsym,
1328  (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2) ==
1329  nullptr) {
1330  // Clear m_dysymtab if we were unable to read all items from the
1331  // load command
1332  ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
1333  }
1334  }
1335  offset = load_cmd_offset + lc.cmdsize;
1336  }
1337  }
1338  }
1339  if (m_dysymtab.cmd)
1340  return m_dysymtab.nlocalsym <= 1;
1341  return false;
1342 }
1343 
1345  EncryptedFileRanges result;
1347 
1348  llvm::MachO::encryption_info_command encryption_cmd;
1349  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1350  const lldb::offset_t load_cmd_offset = offset;
1351  if (m_data.GetU32(&offset, &encryption_cmd, 2) == nullptr)
1352  break;
1353 
1354  // LC_ENCRYPTION_INFO and LC_ENCRYPTION_INFO_64 have the same sizes for the
1355  // 3 fields we care about, so treat them the same.
1356  if (encryption_cmd.cmd == LC_ENCRYPTION_INFO ||
1357  encryption_cmd.cmd == LC_ENCRYPTION_INFO_64) {
1358  if (m_data.GetU32(&offset, &encryption_cmd.cryptoff, 3)) {
1359  if (encryption_cmd.cryptid != 0) {
1361  entry.SetRangeBase(encryption_cmd.cryptoff);
1362  entry.SetByteSize(encryption_cmd.cryptsize);
1363  result.Append(entry);
1364  }
1365  }
1366  }
1367  offset = load_cmd_offset + encryption_cmd.cmdsize;
1368  }
1369 
1370  return result;
1371 }
1372 
1374  llvm::MachO::segment_command_64 &seg_cmd, uint32_t cmd_idx) {
1375  if (m_length == 0 || seg_cmd.filesize == 0)
1376  return;
1377 
1378  if (IsSharedCacheBinary() && !IsInMemory()) {
1379  // In shared cache images, the load commands are relative to the
1380  // shared cache file, and not the specific image we are
1381  // examining. Let's fix this up so that it looks like a normal
1382  // image.
1383  if (strncmp(seg_cmd.segname, "__TEXT", sizeof(seg_cmd.segname)) == 0)
1384  m_text_address = seg_cmd.vmaddr;
1385  if (strncmp(seg_cmd.segname, "__LINKEDIT", sizeof(seg_cmd.segname)) == 0)
1386  m_linkedit_original_offset = seg_cmd.fileoff;
1387 
1388  seg_cmd.fileoff = seg_cmd.vmaddr - m_text_address;
1389  }
1390 
1391  if (seg_cmd.fileoff > m_length) {
1392  // We have a load command that says it extends past the end of the file.
1393  // This is likely a corrupt file. We don't have any way to return an error
1394  // condition here (this method was likely invoked from something like
1395  // ObjectFile::GetSectionList()), so we just null out the section contents,
1396  // and dump a message to stdout. The most common case here is core file
1397  // debugging with a truncated file.
1398  const char *lc_segment_name =
1399  seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1400  GetModule()->ReportWarning(
1401  "load command %u %s has a fileoff (0x%" PRIx64
1402  ") that extends beyond the end of the file (0x%" PRIx64
1403  "), ignoring this section",
1404  cmd_idx, lc_segment_name, seg_cmd.fileoff, m_length);
1405 
1406  seg_cmd.fileoff = 0;
1407  seg_cmd.filesize = 0;
1408  }
1409 
1410  if (seg_cmd.fileoff + seg_cmd.filesize > m_length) {
1411  // We have a load command that says it extends past the end of the file.
1412  // This is likely a corrupt file. We don't have any way to return an error
1413  // condition here (this method was likely invoked from something like
1414  // ObjectFile::GetSectionList()), so we just null out the section contents,
1415  // and dump a message to stdout. The most common case here is core file
1416  // debugging with a truncated file.
1417  const char *lc_segment_name =
1418  seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1419  GetModule()->ReportWarning(
1420  "load command %u %s has a fileoff + filesize (0x%" PRIx64
1421  ") that extends beyond the end of the file (0x%" PRIx64
1422  "), the segment will be truncated to match",
1423  cmd_idx, lc_segment_name, seg_cmd.fileoff + seg_cmd.filesize, m_length);
1424 
1425  // Truncate the length
1426  seg_cmd.filesize = m_length - seg_cmd.fileoff;
1427  }
1428 }
1429 
1430 static uint32_t
1431 GetSegmentPermissions(const llvm::MachO::segment_command_64 &seg_cmd) {
1432  uint32_t result = 0;
1433  if (seg_cmd.initprot & VM_PROT_READ)
1434  result |= ePermissionsReadable;
1435  if (seg_cmd.initprot & VM_PROT_WRITE)
1436  result |= ePermissionsWritable;
1437  if (seg_cmd.initprot & VM_PROT_EXECUTE)
1438  result |= ePermissionsExecutable;
1439  return result;
1440 }
1441 
1443  ConstString section_name) {
1444 
1445  if (flags & (S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS))
1446  return eSectionTypeCode;
1447 
1448  uint32_t mach_sect_type = flags & SECTION_TYPE;
1449  static ConstString g_sect_name_objc_data("__objc_data");
1450  static ConstString g_sect_name_objc_msgrefs("__objc_msgrefs");
1451  static ConstString g_sect_name_objc_selrefs("__objc_selrefs");
1452  static ConstString g_sect_name_objc_classrefs("__objc_classrefs");
1453  static ConstString g_sect_name_objc_superrefs("__objc_superrefs");
1454  static ConstString g_sect_name_objc_const("__objc_const");
1455  static ConstString g_sect_name_objc_classlist("__objc_classlist");
1456  static ConstString g_sect_name_cfstring("__cfstring");
1457 
1458  static ConstString g_sect_name_dwarf_debug_abbrev("__debug_abbrev");
1459  static ConstString g_sect_name_dwarf_debug_aranges("__debug_aranges");
1460  static ConstString g_sect_name_dwarf_debug_frame("__debug_frame");
1461  static ConstString g_sect_name_dwarf_debug_info("__debug_info");
1462  static ConstString g_sect_name_dwarf_debug_line("__debug_line");
1463  static ConstString g_sect_name_dwarf_debug_loc("__debug_loc");
1464  static ConstString g_sect_name_dwarf_debug_loclists("__debug_loclists");
1465  static ConstString g_sect_name_dwarf_debug_macinfo("__debug_macinfo");
1466  static ConstString g_sect_name_dwarf_debug_names("__debug_names");
1467  static ConstString g_sect_name_dwarf_debug_pubnames("__debug_pubnames");
1468  static ConstString g_sect_name_dwarf_debug_pubtypes("__debug_pubtypes");
1469  static ConstString g_sect_name_dwarf_debug_ranges("__debug_ranges");
1470  static ConstString g_sect_name_dwarf_debug_str("__debug_str");
1471  static ConstString g_sect_name_dwarf_debug_types("__debug_types");
1472  static ConstString g_sect_name_dwarf_apple_names("__apple_names");
1473  static ConstString g_sect_name_dwarf_apple_types("__apple_types");
1474  static ConstString g_sect_name_dwarf_apple_namespaces("__apple_namespac");
1475  static ConstString g_sect_name_dwarf_apple_objc("__apple_objc");
1476  static ConstString g_sect_name_eh_frame("__eh_frame");
1477  static ConstString g_sect_name_compact_unwind("__unwind_info");
1478  static ConstString g_sect_name_text("__text");
1479  static ConstString g_sect_name_data("__data");
1480  static ConstString g_sect_name_go_symtab("__gosymtab");
1481 
1482  if (section_name == g_sect_name_dwarf_debug_abbrev)
1484  if (section_name == g_sect_name_dwarf_debug_aranges)
1486  if (section_name == g_sect_name_dwarf_debug_frame)
1488  if (section_name == g_sect_name_dwarf_debug_info)
1490  if (section_name == g_sect_name_dwarf_debug_line)
1492  if (section_name == g_sect_name_dwarf_debug_loc)
1494  if (section_name == g_sect_name_dwarf_debug_loclists)
1496  if (section_name == g_sect_name_dwarf_debug_macinfo)
1498  if (section_name == g_sect_name_dwarf_debug_names)
1500  if (section_name == g_sect_name_dwarf_debug_pubnames)
1502  if (section_name == g_sect_name_dwarf_debug_pubtypes)
1504  if (section_name == g_sect_name_dwarf_debug_ranges)
1506  if (section_name == g_sect_name_dwarf_debug_str)
1508  if (section_name == g_sect_name_dwarf_debug_types)
1510  if (section_name == g_sect_name_dwarf_apple_names)
1512  if (section_name == g_sect_name_dwarf_apple_types)
1514  if (section_name == g_sect_name_dwarf_apple_namespaces)
1516  if (section_name == g_sect_name_dwarf_apple_objc)
1518  if (section_name == g_sect_name_objc_selrefs)
1520  if (section_name == g_sect_name_objc_msgrefs)
1522  if (section_name == g_sect_name_eh_frame)
1523  return eSectionTypeEHFrame;
1524  if (section_name == g_sect_name_compact_unwind)
1526  if (section_name == g_sect_name_cfstring)
1528  if (section_name == g_sect_name_go_symtab)
1529  return eSectionTypeGoSymtab;
1530  if (section_name == g_sect_name_objc_data ||
1531  section_name == g_sect_name_objc_classrefs ||
1532  section_name == g_sect_name_objc_superrefs ||
1533  section_name == g_sect_name_objc_const ||
1534  section_name == g_sect_name_objc_classlist) {
1535  return eSectionTypeDataPointers;
1536  }
1537 
1538  switch (mach_sect_type) {
1539  // TODO: categorize sections by other flags for regular sections
1540  case S_REGULAR:
1541  if (section_name == g_sect_name_text)
1542  return eSectionTypeCode;
1543  if (section_name == g_sect_name_data)
1544  return eSectionTypeData;
1545  return eSectionTypeOther;
1546  case S_ZEROFILL:
1547  return eSectionTypeZeroFill;
1548  case S_CSTRING_LITERALS: // section with only literal C strings
1549  return eSectionTypeDataCString;
1550  case S_4BYTE_LITERALS: // section with only 4 byte literals
1551  return eSectionTypeData4;
1552  case S_8BYTE_LITERALS: // section with only 8 byte literals
1553  return eSectionTypeData8;
1554  case S_LITERAL_POINTERS: // section with only pointers to literals
1555  return eSectionTypeDataPointers;
1556  case S_NON_LAZY_SYMBOL_POINTERS: // section with only non-lazy symbol pointers
1557  return eSectionTypeDataPointers;
1558  case S_LAZY_SYMBOL_POINTERS: // section with only lazy symbol pointers
1559  return eSectionTypeDataPointers;
1560  case S_SYMBOL_STUBS: // section with only symbol stubs, byte size of stub in
1561  // the reserved2 field
1562  return eSectionTypeCode;
1563  case S_MOD_INIT_FUNC_POINTERS: // section with only function pointers for
1564  // initialization
1565  return eSectionTypeDataPointers;
1566  case S_MOD_TERM_FUNC_POINTERS: // section with only function pointers for
1567  // termination
1568  return eSectionTypeDataPointers;
1569  case S_COALESCED:
1570  return eSectionTypeOther;
1571  case S_GB_ZEROFILL:
1572  return eSectionTypeZeroFill;
1573  case S_INTERPOSING: // section with only pairs of function pointers for
1574  // interposing
1575  return eSectionTypeCode;
1576  case S_16BYTE_LITERALS: // section with only 16 byte literals
1577  return eSectionTypeData16;
1578  case S_DTRACE_DOF:
1579  return eSectionTypeDebug;
1580  case S_LAZY_DYLIB_SYMBOL_POINTERS:
1581  return eSectionTypeDataPointers;
1582  default:
1583  return eSectionTypeOther;
1584  }
1585 }
1586 
1592  bool FileAddressesChanged = false;
1593 
1597 };
1598 
1600  const llvm::MachO::load_command &load_cmd_, lldb::offset_t offset,
1601  uint32_t cmd_idx, SegmentParsingContext &context) {
1602  llvm::MachO::segment_command_64 load_cmd;
1603  memcpy(&load_cmd, &load_cmd_, sizeof(load_cmd_));
1604 
1605  if (!m_data.GetU8(&offset, (uint8_t *)load_cmd.segname, 16))
1606  return;
1607 
1608  ModuleSP module_sp = GetModule();
1609  const bool is_core = GetType() == eTypeCoreFile;
1610  const bool is_dsym = (m_header.filetype == MH_DSYM);
1611  bool add_section = true;
1612  bool add_to_unified = true;
1613  ConstString const_segname(
1614  load_cmd.segname, strnlen(load_cmd.segname, sizeof(load_cmd.segname)));
1615 
1616  SectionSP unified_section_sp(
1617  context.UnifiedList.FindSectionByName(const_segname));
1618  if (is_dsym && unified_section_sp) {
1619  if (const_segname == GetSegmentNameLINKEDIT()) {
1620  // We need to keep the __LINKEDIT segment private to this object file
1621  // only
1622  add_to_unified = false;
1623  } else {
1624  // This is the dSYM file and this section has already been created by the
1625  // object file, no need to create it.
1626  add_section = false;
1627  }
1628  }
1629  load_cmd.vmaddr = m_data.GetAddress(&offset);
1630  load_cmd.vmsize = m_data.GetAddress(&offset);
1631  load_cmd.fileoff = m_data.GetAddress(&offset);
1632  load_cmd.filesize = m_data.GetAddress(&offset);
1633  if (!m_data.GetU32(&offset, &load_cmd.maxprot, 4))
1634  return;
1635 
1636  SanitizeSegmentCommand(load_cmd, cmd_idx);
1637 
1638  const uint32_t segment_permissions = GetSegmentPermissions(load_cmd);
1639  const bool segment_is_encrypted =
1640  (load_cmd.flags & SG_PROTECTED_VERSION_1) != 0;
1641 
1642  // Keep a list of mach segments around in case we need to get at data that
1643  // isn't stored in the abstracted Sections.
1644  m_mach_segments.push_back(load_cmd);
1645 
1646  // Use a segment ID of the segment index shifted left by 8 so they never
1647  // conflict with any of the sections.
1648  SectionSP segment_sp;
1649  if (add_section && (const_segname || is_core)) {
1650  segment_sp = std::make_shared<Section>(
1651  module_sp, // Module to which this section belongs
1652  this, // Object file to which this sections belongs
1653  ++context.NextSegmentIdx
1654  << 8, // Section ID is the 1 based segment index
1655  // shifted right by 8 bits as not to collide with any of the 256
1656  // section IDs that are possible
1657  const_segname, // Name of this section
1658  eSectionTypeContainer, // This section is a container of other
1659  // sections.
1660  load_cmd.vmaddr, // File VM address == addresses as they are
1661  // found in the object file
1662  load_cmd.vmsize, // VM size in bytes of this section
1663  load_cmd.fileoff, // Offset to the data for this section in
1664  // the file
1665  load_cmd.filesize, // Size in bytes of this section as found
1666  // in the file
1667  0, // Segments have no alignment information
1668  load_cmd.flags); // Flags for this section
1669 
1670  segment_sp->SetIsEncrypted(segment_is_encrypted);
1671  m_sections_up->AddSection(segment_sp);
1672  segment_sp->SetPermissions(segment_permissions);
1673  if (add_to_unified)
1674  context.UnifiedList.AddSection(segment_sp);
1675  } else if (unified_section_sp) {
1676  // If this is a dSYM and the file addresses in the dSYM differ from the
1677  // file addresses in the ObjectFile, we must use the file base address for
1678  // the Section from the dSYM for the DWARF to resolve correctly.
1679  // This only happens with binaries in the shared cache in practice;
1680  // normally a mismatch like this would give a binary & dSYM that do not
1681  // match UUIDs. When a binary is included in the shared cache, its
1682  // segments are rearranged to optimize the shared cache, so its file
1683  // addresses will differ from what the ObjectFile had originally,
1684  // and what the dSYM has.
1685  if (is_dsym && unified_section_sp->GetFileAddress() != load_cmd.vmaddr) {
1686  Log *log = GetLog(LLDBLog::Symbols);
1687  if (log) {
1688  log->Printf(
1689  "Installing dSYM's %s segment file address over ObjectFile's "
1690  "so symbol table/debug info resolves correctly for %s",
1691  const_segname.AsCString(),
1692  module_sp->GetFileSpec().GetFilename().AsCString());
1693  }
1694 
1695  // Make sure we've parsed the symbol table from the ObjectFile before
1696  // we go around changing its Sections.
1697  module_sp->GetObjectFile()->GetSymtab();
1698  // eh_frame would present the same problems but we parse that on a per-
1699  // function basis as-needed so it's more difficult to remove its use of
1700  // the Sections. Realistically, the environments where this code path
1701  // will be taken will not have eh_frame sections.
1702 
1703  unified_section_sp->SetFileAddress(load_cmd.vmaddr);
1704 
1705  // Notify the module that the section addresses have been changed once
1706  // we're done so any file-address caches can be updated.
1707  context.FileAddressesChanged = true;
1708  }
1709  m_sections_up->AddSection(unified_section_sp);
1710  }
1711 
1712  llvm::MachO::section_64 sect64;
1713  ::memset(&sect64, 0, sizeof(sect64));
1714  // Push a section into our mach sections for the section at index zero
1715  // (NO_SECT) if we don't have any mach sections yet...
1716  if (m_mach_sections.empty())
1717  m_mach_sections.push_back(sect64);
1718  uint32_t segment_sect_idx;
1719  const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1;
1720 
1721  const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8;
1722  for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects;
1723  ++segment_sect_idx) {
1724  if (m_data.GetU8(&offset, (uint8_t *)sect64.sectname,
1725  sizeof(sect64.sectname)) == nullptr)
1726  break;
1727  if (m_data.GetU8(&offset, (uint8_t *)sect64.segname,
1728  sizeof(sect64.segname)) == nullptr)
1729  break;
1730  sect64.addr = m_data.GetAddress(&offset);
1731  sect64.size = m_data.GetAddress(&offset);
1732 
1733  if (m_data.GetU32(&offset, &sect64.offset, num_u32s) == nullptr)
1734  break;
1735 
1736  if (IsSharedCacheBinary() && !IsInMemory()) {
1737  sect64.offset = sect64.addr - m_text_address;
1738  }
1739 
1740  // Keep a list of mach sections around in case we need to get at data that
1741  // isn't stored in the abstracted Sections.
1742  m_mach_sections.push_back(sect64);
1743 
1744  if (add_section) {
1745  ConstString section_name(
1746  sect64.sectname, strnlen(sect64.sectname, sizeof(sect64.sectname)));
1747  if (!const_segname) {
1748  // We have a segment with no name so we need to conjure up segments
1749  // that correspond to the section's segname if there isn't already such
1750  // a section. If there is such a section, we resize the section so that
1751  // it spans all sections. We also mark these sections as fake so
1752  // address matches don't hit if they land in the gaps between the child
1753  // sections.
1754  const_segname.SetTrimmedCStringWithLength(sect64.segname,
1755  sizeof(sect64.segname));
1756  segment_sp = context.UnifiedList.FindSectionByName(const_segname);
1757  if (segment_sp.get()) {
1758  Section *segment = segment_sp.get();
1759  // Grow the section size as needed.
1760  const lldb::addr_t sect64_min_addr = sect64.addr;
1761  const lldb::addr_t sect64_max_addr = sect64_min_addr + sect64.size;
1762  const lldb::addr_t curr_seg_byte_size = segment->GetByteSize();
1763  const lldb::addr_t curr_seg_min_addr = segment->GetFileAddress();
1764  const lldb::addr_t curr_seg_max_addr =
1765  curr_seg_min_addr + curr_seg_byte_size;
1766  if (sect64_min_addr >= curr_seg_min_addr) {
1767  const lldb::addr_t new_seg_byte_size =
1768  sect64_max_addr - curr_seg_min_addr;
1769  // Only grow the section size if needed
1770  if (new_seg_byte_size > curr_seg_byte_size)
1771  segment->SetByteSize(new_seg_byte_size);
1772  } else {
1773  // We need to change the base address of the segment and adjust the
1774  // child section offsets for all existing children.
1775  const lldb::addr_t slide_amount =
1776  sect64_min_addr - curr_seg_min_addr;
1777  segment->Slide(slide_amount, false);
1778  segment->GetChildren().Slide(-slide_amount, false);
1779  segment->SetByteSize(curr_seg_max_addr - sect64_min_addr);
1780  }
1781 
1782  // Grow the section size as needed.
1783  if (sect64.offset) {
1784  const lldb::addr_t segment_min_file_offset =
1785  segment->GetFileOffset();
1786  const lldb::addr_t segment_max_file_offset =
1787  segment_min_file_offset + segment->GetFileSize();
1788 
1789  const lldb::addr_t section_min_file_offset = sect64.offset;
1790  const lldb::addr_t section_max_file_offset =
1791  section_min_file_offset + sect64.size;
1792  const lldb::addr_t new_file_offset =
1793  std::min(section_min_file_offset, segment_min_file_offset);
1794  const lldb::addr_t new_file_size =
1795  std::max(section_max_file_offset, segment_max_file_offset) -
1796  new_file_offset;
1797  segment->SetFileOffset(new_file_offset);
1798  segment->SetFileSize(new_file_size);
1799  }
1800  } else {
1801  // Create a fake section for the section's named segment
1802  segment_sp = std::make_shared<Section>(
1803  segment_sp, // Parent section
1804  module_sp, // Module to which this section belongs
1805  this, // Object file to which this section belongs
1806  ++context.NextSegmentIdx
1807  << 8, // Section ID is the 1 based segment index
1808  // shifted right by 8 bits as not to
1809  // collide with any of the 256 section IDs
1810  // that are possible
1811  const_segname, // Name of this section
1812  eSectionTypeContainer, // This section is a container of
1813  // other sections.
1814  sect64.addr, // File VM address == addresses as they are
1815  // found in the object file
1816  sect64.size, // VM size in bytes of this section
1817  sect64.offset, // Offset to the data for this section in
1818  // the file
1819  sect64.offset ? sect64.size : 0, // Size in bytes of
1820  // this section as
1821  // found in the file
1822  sect64.align,
1823  load_cmd.flags); // Flags for this section
1824  segment_sp->SetIsFake(true);
1825  segment_sp->SetPermissions(segment_permissions);
1826  m_sections_up->AddSection(segment_sp);
1827  if (add_to_unified)
1828  context.UnifiedList.AddSection(segment_sp);
1829  segment_sp->SetIsEncrypted(segment_is_encrypted);
1830  }
1831  }
1832  assert(segment_sp.get());
1833 
1834  lldb::SectionType sect_type = GetSectionType(sect64.flags, section_name);
1835 
1836  SectionSP section_sp(new Section(
1837  segment_sp, module_sp, this, ++context.NextSectionIdx, section_name,
1838  sect_type, sect64.addr - segment_sp->GetFileAddress(), sect64.size,
1839  sect64.offset, sect64.offset == 0 ? 0 : sect64.size, sect64.align,
1840  sect64.flags));
1841  // Set the section to be encrypted to match the segment
1842 
1843  bool section_is_encrypted = false;
1844  if (!segment_is_encrypted && load_cmd.filesize != 0)
1845  section_is_encrypted = context.EncryptedRanges.FindEntryThatContains(
1846  sect64.offset) != nullptr;
1847 
1848  section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted);
1849  section_sp->SetPermissions(segment_permissions);
1850  segment_sp->GetChildren().AddSection(section_sp);
1851 
1852  if (segment_sp->IsFake()) {
1853  segment_sp.reset();
1854  const_segname.Clear();
1855  }
1856  }
1857  }
1858  if (segment_sp && is_dsym) {
1859  if (first_segment_sectID <= context.NextSectionIdx) {
1860  lldb::user_id_t sect_uid;
1861  for (sect_uid = first_segment_sectID; sect_uid <= context.NextSectionIdx;
1862  ++sect_uid) {
1863  SectionSP curr_section_sp(
1864  segment_sp->GetChildren().FindSectionByID(sect_uid));
1865  SectionSP next_section_sp;
1866  if (sect_uid + 1 <= context.NextSectionIdx)
1867  next_section_sp =
1868  segment_sp->GetChildren().FindSectionByID(sect_uid + 1);
1869 
1870  if (curr_section_sp.get()) {
1871  if (curr_section_sp->GetByteSize() == 0) {
1872  if (next_section_sp.get() != nullptr)
1873  curr_section_sp->SetByteSize(next_section_sp->GetFileAddress() -
1874  curr_section_sp->GetFileAddress());
1875  else
1876  curr_section_sp->SetByteSize(load_cmd.vmsize);
1877  }
1878  }
1879  }
1880  }
1881  }
1882 }
1883 
1885  const llvm::MachO::load_command &load_cmd, lldb::offset_t offset) {
1886  m_dysymtab.cmd = load_cmd.cmd;
1887  m_dysymtab.cmdsize = load_cmd.cmdsize;
1888  m_data.GetU32(&offset, &m_dysymtab.ilocalsym,
1889  (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2);
1890 }
1891 
1892 void ObjectFileMachO::CreateSections(SectionList &unified_section_list) {
1893  if (m_sections_up)
1894  return;
1895 
1896  m_sections_up = std::make_unique<SectionList>();
1897 
1899  // bool dump_sections = false;
1900  ModuleSP module_sp(GetModule());
1901 
1902  offset = MachHeaderSizeFromMagic(m_header.magic);
1903 
1904  SegmentParsingContext context(GetEncryptedFileRanges(), unified_section_list);
1905  llvm::MachO::load_command load_cmd;
1906  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1907  const lldb::offset_t load_cmd_offset = offset;
1908  if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
1909  break;
1910 
1911  if (load_cmd.cmd == LC_SEGMENT || load_cmd.cmd == LC_SEGMENT_64)
1912  ProcessSegmentCommand(load_cmd, offset, i, context);
1913  else if (load_cmd.cmd == LC_DYSYMTAB)
1914  ProcessDysymtabCommand(load_cmd, offset);
1915 
1916  offset = load_cmd_offset + load_cmd.cmdsize;
1917  }
1918 
1919  if (context.FileAddressesChanged && module_sp)
1920  module_sp->SectionFileAddressesChanged();
1921 }
1922 
1924 public:
1926  : m_section_list(section_list), m_section_infos() {
1927  // Get the number of sections down to a depth of 1 to include all segments
1928  // and their sections, but no other sections that may be added for debug
1929  // map or
1930  m_section_infos.resize(section_list->GetNumSections(1));
1931  }
1932 
1933  SectionSP GetSection(uint8_t n_sect, addr_t file_addr) {
1934  if (n_sect == 0)
1935  return SectionSP();
1936  if (n_sect < m_section_infos.size()) {
1937  if (!m_section_infos[n_sect].section_sp) {
1938  SectionSP section_sp(m_section_list->FindSectionByID(n_sect));
1939  m_section_infos[n_sect].section_sp = section_sp;
1940  if (section_sp) {
1941  m_section_infos[n_sect].vm_range.SetBaseAddress(
1942  section_sp->GetFileAddress());
1943  m_section_infos[n_sect].vm_range.SetByteSize(
1944  section_sp->GetByteSize());
1945  } else {
1946  std::string filename = "<unknown>";
1947  SectionSP first_section_sp(m_section_list->GetSectionAtIndex(0));
1948  if (first_section_sp)
1949  filename = first_section_sp->GetObjectFile()->GetFileSpec().GetPath();
1950 
1951  Debugger::ReportError(
1952  llvm::formatv("unable to find section {0} for a symbol in "
1953  "{1}, corrupt file?",
1954  n_sect, filename));
1955  }
1956  }
1957  if (m_section_infos[n_sect].vm_range.Contains(file_addr)) {
1958  // Symbol is in section.
1959  return m_section_infos[n_sect].section_sp;
1960  } else if (m_section_infos[n_sect].vm_range.GetByteSize() == 0 &&
1961  m_section_infos[n_sect].vm_range.GetBaseAddress() ==
1962  file_addr) {
1963  // Symbol is in section with zero size, but has the same start address
1964  // as the section. This can happen with linker symbols (symbols that
1965  // start with the letter 'l' or 'L'.
1966  return m_section_infos[n_sect].section_sp;
1967  }
1968  }
1970  }
1971 
1972 protected:
1973  struct SectionInfo {
1975 
1977  SectionSP section_sp;
1978  };
1980  std::vector<SectionInfo> m_section_infos;
1981 };
1982 
1983 #define TRIE_SYMBOL_IS_THUMB (1ULL << 63)
1984 struct TrieEntry {
1985  void Dump() const {
1986  printf("0x%16.16llx 0x%16.16llx 0x%16.16llx \"%s\"",
1987  static_cast<unsigned long long>(address),
1988  static_cast<unsigned long long>(flags),
1989  static_cast<unsigned long long>(other), name.GetCString());
1990  if (import_name)
1991  printf(" -> \"%s\"\n", import_name.GetCString());
1992  else
1993  printf("\n");
1994  }
1997  uint64_t flags =
1998  0; // EXPORT_SYMBOL_FLAGS_REEXPORT, EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER,
1999  // TRIE_SYMBOL_IS_THUMB
2000  uint64_t other = 0;
2002 };
2003 
2007 
2009 
2010  void Dump(uint32_t idx) const {
2011  printf("[%3u] 0x%16.16llx: ", idx,
2012  static_cast<unsigned long long>(nodeOffset));
2013  entry.Dump();
2014  }
2015 
2016  bool operator<(const TrieEntryWithOffset &other) const {
2017  return (nodeOffset < other.nodeOffset);
2018  }
2019 };
2020 
2022  const bool is_arm, addr_t text_seg_base_addr,
2023  std::vector<llvm::StringRef> &nameSlices,
2024  std::set<lldb::addr_t> &resolver_addresses,
2025  std::vector<TrieEntryWithOffset> &reexports,
2026  std::vector<TrieEntryWithOffset> &ext_symbols) {
2027  if (!data.ValidOffset(offset))
2028  return true;
2029 
2030  // Terminal node -- end of a branch, possibly add this to
2031  // the symbol table or resolver table.
2032  const uint64_t terminalSize = data.GetULEB128(&offset);
2033  lldb::offset_t children_offset = offset + terminalSize;
2034  if (terminalSize != 0) {
2035  TrieEntryWithOffset e(offset);
2036  e.entry.flags = data.GetULEB128(&offset);
2037  const char *import_name = nullptr;
2038  if (e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
2039  e.entry.address = 0;
2040  e.entry.other = data.GetULEB128(&offset); // dylib ordinal
2041  import_name = data.GetCStr(&offset);
2042  } else {
2043  e.entry.address = data.GetULEB128(&offset);
2044  if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
2045  e.entry.address += text_seg_base_addr;
2046  if (e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
2047  e.entry.other = data.GetULEB128(&offset);
2048  uint64_t resolver_addr = e.entry.other;
2049  if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
2050  resolver_addr += text_seg_base_addr;
2051  if (is_arm)
2052  resolver_addr &= THUMB_ADDRESS_BIT_MASK;
2053  resolver_addresses.insert(resolver_addr);
2054  } else
2055  e.entry.other = 0;
2056  }
2057  bool add_this_entry = false;
2058  if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT) &&
2059  import_name && import_name[0]) {
2060  // add symbols that are reexport symbols with a valid import name.
2061  add_this_entry = true;
2062  } else if (e.entry.flags == 0 &&
2063  (import_name == nullptr || import_name[0] == '\0')) {
2064  // add externally visible symbols, in case the nlist record has
2065  // been stripped/omitted.
2066  add_this_entry = true;
2067  }
2068  if (add_this_entry) {
2069  std::string name;
2070  if (!nameSlices.empty()) {
2071  for (auto name_slice : nameSlices)
2072  name.append(name_slice.data(), name_slice.size());
2073  }
2074  if (name.size() > 1) {
2075  // Skip the leading '_'
2076  e.entry.name.SetCStringWithLength(name.c_str() + 1, name.size() - 1);
2077  }
2078  if (import_name) {
2079  // Skip the leading '_'
2080  e.entry.import_name.SetCString(import_name + 1);
2081  }
2082  if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT)) {
2083  reexports.push_back(e);
2084  } else {
2085  if (is_arm && (e.entry.address & 1)) {
2088  }
2089  ext_symbols.push_back(e);
2090  }
2091  }
2092  }
2093 
2094  const uint8_t childrenCount = data.GetU8(&children_offset);
2095  for (uint8_t i = 0; i < childrenCount; ++i) {
2096  const char *cstr = data.GetCStr(&children_offset);
2097  if (cstr)
2098  nameSlices.push_back(llvm::StringRef(cstr));
2099  else
2100  return false; // Corrupt data
2101  lldb::offset_t childNodeOffset = data.GetULEB128(&children_offset);
2102  if (childNodeOffset) {
2103  if (!ParseTrieEntries(data, childNodeOffset, is_arm, text_seg_base_addr,
2104  nameSlices, resolver_addresses, reexports,
2105  ext_symbols)) {
2106  return false;
2107  }
2108  }
2109  nameSlices.pop_back();
2110  }
2111  return true;
2112 }
2113 
2114 static SymbolType GetSymbolType(const char *&symbol_name,
2115  bool &demangled_is_synthesized,
2116  const SectionSP &text_section_sp,
2117  const SectionSP &data_section_sp,
2118  const SectionSP &data_dirty_section_sp,
2119  const SectionSP &data_const_section_sp,
2120  const SectionSP &symbol_section) {
2122 
2123  const char *symbol_sect_name = symbol_section->GetName().AsCString();
2124  if (symbol_section->IsDescendant(text_section_sp.get())) {
2125  if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS |
2126  S_ATTR_SELF_MODIFYING_CODE |
2127  S_ATTR_SOME_INSTRUCTIONS))
2128  type = eSymbolTypeData;
2129  else
2130  type = eSymbolTypeCode;
2131  } else if (symbol_section->IsDescendant(data_section_sp.get()) ||
2132  symbol_section->IsDescendant(data_dirty_section_sp.get()) ||
2133  symbol_section->IsDescendant(data_const_section_sp.get())) {
2134  if (symbol_sect_name &&
2135  ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) {
2136  type = eSymbolTypeRuntime;
2137 
2138  if (symbol_name) {
2139  llvm::StringRef symbol_name_ref(symbol_name);
2140  if (symbol_name_ref.startswith("OBJC_")) {
2141  static const llvm::StringRef g_objc_v2_prefix_class("OBJC_CLASS_$_");
2142  static const llvm::StringRef g_objc_v2_prefix_metaclass(
2143  "OBJC_METACLASS_$_");
2144  static const llvm::StringRef g_objc_v2_prefix_ivar("OBJC_IVAR_$_");
2145  if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
2146  symbol_name = symbol_name + g_objc_v2_prefix_class.size();
2147  type = eSymbolTypeObjCClass;
2148  demangled_is_synthesized = true;
2149  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_metaclass)) {
2150  symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
2151  type = eSymbolTypeObjCMetaClass;
2152  demangled_is_synthesized = true;
2153  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_ivar)) {
2154  symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
2155  type = eSymbolTypeObjCIVar;
2156  demangled_is_synthesized = true;
2157  }
2158  }
2159  }
2160  } else if (symbol_sect_name &&
2161  ::strstr(symbol_sect_name, "__gcc_except_tab") ==
2162  symbol_sect_name) {
2163  type = eSymbolTypeException;
2164  } else {
2165  type = eSymbolTypeData;
2166  }
2167  } else if (symbol_sect_name &&
2168  ::strstr(symbol_sect_name, "__IMPORT") == symbol_sect_name) {
2169  type = eSymbolTypeTrampoline;
2170  }
2171  return type;
2172 }
2173 
2174 // Read the UUID out of a dyld_shared_cache file on-disk.
2176  const ByteOrder byte_order,
2177  const uint32_t addr_byte_size) {
2178  UUID dsc_uuid;
2179  DataBufferSP DscData = MapFileData(
2180  dyld_shared_cache, sizeof(struct lldb_copy_dyld_cache_header_v1), 0);
2181  if (!DscData)
2182  return dsc_uuid;
2183  DataExtractor dsc_header_data(DscData, byte_order, addr_byte_size);
2184 
2185  char version_str[7];
2186  lldb::offset_t offset = 0;
2187  memcpy(version_str, dsc_header_data.GetData(&offset, 6), 6);
2188  version_str[6] = '\0';
2189  if (strcmp(version_str, "dyld_v") == 0) {
2190  offset = offsetof(struct lldb_copy_dyld_cache_header_v1, uuid);
2191  dsc_uuid = UUID(dsc_header_data.GetData(&offset, sizeof(uuid_t)),
2192  sizeof(uuid_t));
2193  }
2194  Log *log = GetLog(LLDBLog::Symbols);
2195  if (log && dsc_uuid.IsValid()) {
2196  LLDB_LOGF(log, "Shared cache %s has UUID %s",
2197  dyld_shared_cache.GetPath().c_str(),
2198  dsc_uuid.GetAsString().c_str());
2199  }
2200  return dsc_uuid;
2201 }
2202 
2203 static llvm::Optional<struct nlist_64>
2204 ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset,
2205  size_t nlist_byte_size) {
2206  struct nlist_64 nlist;
2207  if (!nlist_data.ValidOffsetForDataOfSize(nlist_data_offset, nlist_byte_size))
2208  return {};
2209  nlist.n_strx = nlist_data.GetU32_unchecked(&nlist_data_offset);
2210  nlist.n_type = nlist_data.GetU8_unchecked(&nlist_data_offset);
2211  nlist.n_sect = nlist_data.GetU8_unchecked(&nlist_data_offset);
2212  nlist.n_desc = nlist_data.GetU16_unchecked(&nlist_data_offset);
2213  nlist.n_value = nlist_data.GetAddress_unchecked(&nlist_data_offset);
2214  return nlist;
2215 }
2216 
2217 enum { DebugSymbols = true, NonDebugSymbols = false };
2218 
2220  ModuleSP module_sp(GetModule());
2221  if (!module_sp)
2222  return;
2223 
2224  const FileSpec &file = m_file ? m_file : module_sp->GetFileSpec();
2225  const char *file_name = file.GetFilename().AsCString("<Unknown>");
2226  LLDB_SCOPED_TIMERF("ObjectFileMachO::ParseSymtab () module = %s", file_name);
2227  Progress progress(llvm::formatv("Parsing symbol table for {0}", file_name));
2228 
2229  llvm::MachO::symtab_command symtab_load_command = {0, 0, 0, 0, 0, 0};
2230  llvm::MachO::linkedit_data_command function_starts_load_command = {0, 0, 0, 0};
2231  llvm::MachO::linkedit_data_command exports_trie_load_command = {0, 0, 0, 0};
2232  llvm::MachO::dyld_info_command dyld_info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2233  llvm::MachO::dysymtab_command dysymtab = m_dysymtab;
2234  // The data element of type bool indicates that this entry is thumb
2235  // code.
2236  typedef AddressDataArray<lldb::addr_t, bool, 100> FunctionStarts;
2237 
2238  // Record the address of every function/data that we add to the symtab.
2239  // We add symbols to the table in the order of most information (nlist
2240  // records) to least (function starts), and avoid duplicating symbols
2241  // via this set.
2242  llvm::DenseSet<addr_t> symbols_added;
2243 
2244  // We are using a llvm::DenseSet for "symbols_added" so we must be sure we
2245  // do not add the tombstone or empty keys to the set.
2246  auto add_symbol_addr = [&symbols_added](lldb::addr_t file_addr) {
2247  // Don't add the tombstone or empty keys.
2248  if (file_addr == UINT64_MAX || file_addr == UINT64_MAX - 1)
2249  return;
2250  symbols_added.insert(file_addr);
2251  };
2252  FunctionStarts function_starts;
2254  uint32_t i;
2255  FileSpecList dylib_files;
2256  Log *log = GetLog(LLDBLog::Symbols);
2257  llvm::StringRef g_objc_v2_prefix_class("_OBJC_CLASS_$_");
2258  llvm::StringRef g_objc_v2_prefix_metaclass("_OBJC_METACLASS_$_");
2259  llvm::StringRef g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
2260  UUID image_uuid;
2261 
2262  for (i = 0; i < m_header.ncmds; ++i) {
2263  const lldb::offset_t cmd_offset = offset;
2264  // Read in the load command and load command size
2265  llvm::MachO::load_command lc;
2266  if (m_data.GetU32(&offset, &lc, 2) == nullptr)
2267  break;
2268  // Watch for the symbol table load command
2269  switch (lc.cmd) {
2270  case LC_SYMTAB:
2271  symtab_load_command.cmd = lc.cmd;
2272  symtab_load_command.cmdsize = lc.cmdsize;
2273  // Read in the rest of the symtab load command
2274  if (m_data.GetU32(&offset, &symtab_load_command.symoff, 4) ==
2275  nullptr) // fill in symoff, nsyms, stroff, strsize fields
2276  return;
2277  break;
2278 
2279  case LC_DYLD_INFO:
2280  case LC_DYLD_INFO_ONLY:
2281  if (m_data.GetU32(&offset, &dyld_info.rebase_off, 10)) {
2282  dyld_info.cmd = lc.cmd;
2283  dyld_info.cmdsize = lc.cmdsize;
2284  } else {
2285  memset(&dyld_info, 0, sizeof(dyld_info));
2286  }
2287  break;
2288 
2289  case LC_LOAD_DYLIB:
2290  case LC_LOAD_WEAK_DYLIB:
2291  case LC_REEXPORT_DYLIB:
2292  case LC_LOADFVMLIB:
2293  case LC_LOAD_UPWARD_DYLIB: {
2294  uint32_t name_offset = cmd_offset + m_data.GetU32(&offset);
2295  const char *path = m_data.PeekCStr(name_offset);
2296  if (path) {
2297  FileSpec file_spec(path);
2298  // Strip the path if there is @rpath, @executable, etc so we just use
2299  // the basename
2300  if (path[0] == '@')
2301  file_spec.ClearDirectory();
2302 
2303  if (lc.cmd == LC_REEXPORT_DYLIB) {
2304  m_reexported_dylibs.AppendIfUnique(file_spec);
2305  }
2306 
2307  dylib_files.Append(file_spec);
2308  }
2309  } break;
2310 
2311  case LC_DYLD_EXPORTS_TRIE:
2312  exports_trie_load_command.cmd = lc.cmd;
2313  exports_trie_load_command.cmdsize = lc.cmdsize;
2314  if (m_data.GetU32(&offset, &exports_trie_load_command.dataoff, 2) ==
2315  nullptr) // fill in offset and size fields
2316  memset(&exports_trie_load_command, 0,
2317  sizeof(exports_trie_load_command));
2318  break;
2319  case LC_FUNCTION_STARTS:
2320  function_starts_load_command.cmd = lc.cmd;
2321  function_starts_load_command.cmdsize = lc.cmdsize;
2322  if (m_data.GetU32(&offset, &function_starts_load_command.dataoff, 2) ==
2323  nullptr) // fill in data offset and size fields
2324  memset(&function_starts_load_command, 0,
2325  sizeof(function_starts_load_command));
2326  break;
2327 
2328  case LC_UUID: {
2329  const uint8_t *uuid_bytes = m_data.PeekData(offset, 16);
2330 
2331  if (uuid_bytes)
2332  image_uuid = UUID(uuid_bytes, 16);
2333  break;
2334  }
2335 
2336  default:
2337  break;
2338  }
2339  offset = cmd_offset + lc.cmdsize;
2340  }
2341 
2342  if (!symtab_load_command.cmd)
2343  return;
2344 
2345  SectionList *section_list = GetSectionList();
2346  if (section_list == nullptr)
2347  return;
2348 
2349  const uint32_t addr_byte_size = m_data.GetAddressByteSize();
2350  const ByteOrder byte_order = m_data.GetByteOrder();
2351  bool bit_width_32 = addr_byte_size == 4;
2352  const size_t nlist_byte_size =
2353  bit_width_32 ? sizeof(struct nlist) : sizeof(struct nlist_64);
2354 
2355  DataExtractor nlist_data(nullptr, 0, byte_order, addr_byte_size);
2356  DataExtractor strtab_data(nullptr, 0, byte_order, addr_byte_size);
2357  DataExtractor function_starts_data(nullptr, 0, byte_order, addr_byte_size);
2358  DataExtractor indirect_symbol_index_data(nullptr, 0, byte_order,
2359  addr_byte_size);
2360  DataExtractor dyld_trie_data(nullptr, 0, byte_order, addr_byte_size);
2361 
2362  const addr_t nlist_data_byte_size =
2363  symtab_load_command.nsyms * nlist_byte_size;
2364  const addr_t strtab_data_byte_size = symtab_load_command.strsize;
2365  addr_t strtab_addr = LLDB_INVALID_ADDRESS;
2366 
2367  ProcessSP process_sp(m_process_wp.lock());
2368  Process *process = process_sp.get();
2369 
2370  uint32_t memory_module_load_level = eMemoryModuleLoadLevelComplete;
2371  bool is_shared_cache_image = IsSharedCacheBinary();
2372  bool is_local_shared_cache_image = is_shared_cache_image && !IsInMemory();
2373  SectionSP linkedit_section_sp(
2374  section_list->FindSectionByName(GetSegmentNameLINKEDIT()));
2375 
2376  if (process && m_header.filetype != llvm::MachO::MH_OBJECT &&
2377  !is_local_shared_cache_image) {
2378  Target &target = process->GetTarget();
2379 
2380  memory_module_load_level = target.GetMemoryModuleLoadLevel();
2381 
2382  // Reading mach file from memory in a process or core file...
2383 
2384  if (linkedit_section_sp) {
2385  addr_t linkedit_load_addr =
2386  linkedit_section_sp->GetLoadBaseAddress(&target);
2387  if (linkedit_load_addr == LLDB_INVALID_ADDRESS) {
2388  // We might be trying to access the symbol table before the
2389  // __LINKEDIT's load address has been set in the target. We can't
2390  // fail to read the symbol table, so calculate the right address
2391  // manually
2392  linkedit_load_addr = CalculateSectionLoadAddressForMemoryImage(
2393  m_memory_addr, GetMachHeaderSection(), linkedit_section_sp.get());
2394  }
2395 
2396  const addr_t linkedit_file_offset = linkedit_section_sp->GetFileOffset();
2397  const addr_t symoff_addr = linkedit_load_addr +
2398  symtab_load_command.symoff -
2399  linkedit_file_offset;
2400  strtab_addr = linkedit_load_addr + symtab_load_command.stroff -
2401  linkedit_file_offset;
2402 
2403  // Always load dyld - the dynamic linker - from memory if we didn't
2404  // find a binary anywhere else. lldb will not register
2405  // dylib/framework/bundle loads/unloads if we don't have the dyld
2406  // symbols, we force dyld to load from memory despite the user's
2407  // target.memory-module-load-level setting.
2408  if (memory_module_load_level == eMemoryModuleLoadLevelComplete ||
2409  m_header.filetype == llvm::MachO::MH_DYLINKER) {
2410  DataBufferSP nlist_data_sp(
2411  ReadMemory(process_sp, symoff_addr, nlist_data_byte_size));
2412  if (nlist_data_sp)
2413  nlist_data.SetData(nlist_data_sp, 0, nlist_data_sp->GetByteSize());
2414  if (dysymtab.nindirectsyms != 0) {
2415  const addr_t indirect_syms_addr = linkedit_load_addr +
2416  dysymtab.indirectsymoff -
2417  linkedit_file_offset;
2418  DataBufferSP indirect_syms_data_sp(ReadMemory(
2419  process_sp, indirect_syms_addr, dysymtab.nindirectsyms * 4));
2420  if (indirect_syms_data_sp)
2421  indirect_symbol_index_data.SetData(
2422  indirect_syms_data_sp, 0,
2423  indirect_syms_data_sp->GetByteSize());
2424  // If this binary is outside the shared cache,
2425  // cache the string table.
2426  // Binaries in the shared cache all share a giant string table,
2427  // and we can't share the string tables across multiple
2428  // ObjectFileMachO's, so we'd end up re-reading this mega-strtab
2429  // for every binary in the shared cache - it would be a big perf
2430  // problem. For binaries outside the shared cache, it's faster to
2431  // read the entire strtab at once instead of piece-by-piece as we
2432  // process the nlist records.
2433  if (!is_shared_cache_image) {
2434  DataBufferSP strtab_data_sp(
2435  ReadMemory(process_sp, strtab_addr, strtab_data_byte_size));
2436  if (strtab_data_sp) {
2437  strtab_data.SetData(strtab_data_sp, 0,
2438  strtab_data_sp->GetByteSize());
2439  }
2440  }
2441  }
2442  if (memory_module_load_level >= eMemoryModuleLoadLevelPartial) {
2443  if (function_starts_load_command.cmd) {
2444  const addr_t func_start_addr =
2445  linkedit_load_addr + function_starts_load_command.dataoff -
2446  linkedit_file_offset;
2447  DataBufferSP func_start_data_sp(
2448  ReadMemory(process_sp, func_start_addr,
2449  function_starts_load_command.datasize));
2450  if (func_start_data_sp)
2451  function_starts_data.SetData(func_start_data_sp, 0,
2452  func_start_data_sp->GetByteSize());
2453  }
2454  }
2455  }
2456  }
2457  } else {
2458  if (is_local_shared_cache_image) {
2459  // The load commands in shared cache images are relative to the
2460  // beginning of the shared cache, not the library image. The
2461  // data we get handed when creating the ObjectFileMachO starts
2462  // at the beginning of a specific library and spans to the end
2463  // of the cache to be able to reach the shared LINKEDIT
2464  // segments. We need to convert the load command offsets to be
2465  // relative to the beginning of our specific image.
2466  lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset();
2467  lldb::offset_t linkedit_slide =
2468  linkedit_offset - m_linkedit_original_offset;
2469  symtab_load_command.symoff += linkedit_slide;
2470  symtab_load_command.stroff += linkedit_slide;
2471  dyld_info.export_off += linkedit_slide;
2472  dysymtab.indirectsymoff += linkedit_slide;
2473  function_starts_load_command.dataoff += linkedit_slide;
2474  exports_trie_load_command.dataoff += linkedit_slide;
2475  }
2476 
2477  nlist_data.SetData(m_data, symtab_load_command.symoff,
2478  nlist_data_byte_size);
2479  strtab_data.SetData(m_data, symtab_load_command.stroff,
2480  strtab_data_byte_size);
2481 
2482  // We shouldn't have exports data from both the LC_DYLD_INFO command
2483  // AND the LC_DYLD_EXPORTS_TRIE command in the same binary:
2484  lldbassert(!((dyld_info.export_size > 0)
2485  && (exports_trie_load_command.datasize > 0)));
2486  if (dyld_info.export_size > 0) {
2487  dyld_trie_data.SetData(m_data, dyld_info.export_off,
2488  dyld_info.export_size);
2489  } else if (exports_trie_load_command.datasize > 0) {
2490  dyld_trie_data.SetData(m_data, exports_trie_load_command.dataoff,
2491  exports_trie_load_command.datasize);
2492  }
2493 
2494  if (dysymtab.nindirectsyms != 0) {
2495  indirect_symbol_index_data.SetData(m_data, dysymtab.indirectsymoff,
2496  dysymtab.nindirectsyms * 4);
2497  }
2498  if (function_starts_load_command.cmd) {
2499  function_starts_data.SetData(m_data, function_starts_load_command.dataoff,
2500  function_starts_load_command.datasize);
2501  }
2502  }
2503 
2504  const bool have_strtab_data = strtab_data.GetByteSize() > 0;
2505 
2506  ConstString g_segment_name_TEXT = GetSegmentNameTEXT();
2507  ConstString g_segment_name_DATA = GetSegmentNameDATA();
2508  ConstString g_segment_name_DATA_DIRTY = GetSegmentNameDATA_DIRTY();
2509  ConstString g_segment_name_DATA_CONST = GetSegmentNameDATA_CONST();
2510  ConstString g_segment_name_OBJC = GetSegmentNameOBJC();
2511  ConstString g_section_name_eh_frame = GetSectionNameEHFrame();
2512  SectionSP text_section_sp(
2513  section_list->FindSectionByName(g_segment_name_TEXT));
2514  SectionSP data_section_sp(
2515  section_list->FindSectionByName(g_segment_name_DATA));
2516  SectionSP data_dirty_section_sp(
2517  section_list->FindSectionByName(g_segment_name_DATA_DIRTY));
2518  SectionSP data_const_section_sp(
2519  section_list->FindSectionByName(g_segment_name_DATA_CONST));
2520  SectionSP objc_section_sp(
2521  section_list->FindSectionByName(g_segment_name_OBJC));
2522  SectionSP eh_frame_section_sp;
2523  if (text_section_sp.get())
2524  eh_frame_section_sp = text_section_sp->GetChildren().FindSectionByName(
2525  g_section_name_eh_frame);
2526  else
2527  eh_frame_section_sp =
2528  section_list->FindSectionByName(g_section_name_eh_frame);
2529 
2530  const bool is_arm = (m_header.cputype == llvm::MachO::CPU_TYPE_ARM);
2531  const bool always_thumb = GetArchitecture().IsAlwaysThumbInstructions();
2532 
2533  // lldb works best if it knows the start address of all functions in a
2534  // module. Linker symbols or debug info are normally the best source of
2535  // information for start addr / size but they may be stripped in a released
2536  // binary. Two additional sources of information exist in Mach-O binaries:
2537  // LC_FUNCTION_STARTS - a list of ULEB128 encoded offsets of each
2538  // function's start address in the
2539  // binary, relative to the text section.
2540  // eh_frame - the eh_frame FDEs have the start addr & size of
2541  // each function
2542  // LC_FUNCTION_STARTS is the fastest source to read in, and is present on
2543  // all modern binaries.
2544  // Binaries built to run on older releases may need to use eh_frame
2545  // information.
2546 
2547  if (text_section_sp && function_starts_data.GetByteSize()) {
2548  FunctionStarts::Entry function_start_entry;
2549  function_start_entry.data = false;
2550  lldb::offset_t function_start_offset = 0;
2551  function_start_entry.addr = text_section_sp->GetFileAddress();
2552  uint64_t delta;
2553  while ((delta = function_starts_data.GetULEB128(&function_start_offset)) >
2554  0) {
2555  // Now append the current entry
2556  function_start_entry.addr += delta;
2557  if (is_arm) {
2558  if (function_start_entry.addr & 1) {
2559  function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2560  function_start_entry.data = true;
2561  } else if (always_thumb) {
2562  function_start_entry.data = true;
2563  }
2564  }
2565  function_starts.Append(function_start_entry);
2566  }
2567  } else {
2568  // If m_type is eTypeDebugInfo, then this is a dSYM - it will have the
2569  // load command claiming an eh_frame but it doesn't actually have the
2570  // eh_frame content. And if we have a dSYM, we don't need to do any of
2571  // this fill-in-the-missing-symbols works anyway - the debug info should
2572  // give us all the functions in the module.
2573  if (text_section_sp.get() && eh_frame_section_sp.get() &&
2574  m_type != eTypeDebugInfo) {
2575  DWARFCallFrameInfo eh_frame(*this, eh_frame_section_sp,
2576  DWARFCallFrameInfo::EH);
2578  eh_frame.GetFunctionAddressAndSizeVector(functions);
2579  addr_t text_base_addr = text_section_sp->GetFileAddress();
2580  size_t count = functions.GetSize();
2581  for (size_t i = 0; i < count; ++i) {
2583  functions.GetEntryAtIndex(i);
2584  if (func) {
2585  FunctionStarts::Entry function_start_entry;
2586  function_start_entry.addr = func->base - text_base_addr;
2587  if (is_arm) {
2588  if (function_start_entry.addr & 1) {
2589  function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2590  function_start_entry.data = true;
2591  } else if (always_thumb) {
2592  function_start_entry.data = true;
2593  }
2594  }
2595  function_starts.Append(function_start_entry);
2596  }
2597  }
2598  }
2599  }
2600 
2601  const size_t function_starts_count = function_starts.GetSize();
2602 
2603  // For user process binaries (executables, dylibs, frameworks, bundles), if
2604  // we don't have LC_FUNCTION_STARTS/eh_frame section in this binary, we're
2605  // going to assume the binary has been stripped. Don't allow assembly
2606  // language instruction emulation because we don't know proper function
2607  // start boundaries.
2608  //
2609  // For all other types of binaries (kernels, stand-alone bare board
2610  // binaries, kexts), they may not have LC_FUNCTION_STARTS / eh_frame
2611  // sections - we should not make any assumptions about them based on that.
2612  if (function_starts_count == 0 && CalculateStrata() == eStrataUser) {
2614  Log *unwind_or_symbol_log(GetLog(LLDBLog::Symbols | LLDBLog::Unwind));
2615 
2616  if (unwind_or_symbol_log)
2617  module_sp->LogMessage(
2618  unwind_or_symbol_log,
2619  "no LC_FUNCTION_STARTS, will not allow assembly profiled unwinds");
2620  }
2621 
2622  const user_id_t TEXT_eh_frame_sectID = eh_frame_section_sp.get()
2623  ? eh_frame_section_sp->GetID()
2624  : static_cast<user_id_t>(NO_SECT);
2625 
2626  uint32_t N_SO_index = UINT32_MAX;
2627 
2628  MachSymtabSectionInfo section_info(section_list);
2629  std::vector<uint32_t> N_FUN_indexes;
2630  std::vector<uint32_t> N_NSYM_indexes;
2631  std::vector<uint32_t> N_INCL_indexes;
2632  std::vector<uint32_t> N_BRAC_indexes;
2633  std::vector<uint32_t> N_COMM_indexes;
2634  typedef std::multimap<uint64_t, uint32_t> ValueToSymbolIndexMap;
2635  typedef llvm::DenseMap<uint32_t, uint32_t> NListIndexToSymbolIndexMap;
2636  typedef llvm::DenseMap<const char *, uint32_t> ConstNameToSymbolIndexMap;
2637  ValueToSymbolIndexMap N_FUN_addr_to_sym_idx;
2638  ValueToSymbolIndexMap N_STSYM_addr_to_sym_idx;
2639  ConstNameToSymbolIndexMap N_GSYM_name_to_sym_idx;
2640  // Any symbols that get merged into another will get an entry in this map
2641  // so we know
2642  NListIndexToSymbolIndexMap m_nlist_idx_to_sym_idx;
2643  uint32_t nlist_idx = 0;
2644  Symbol *symbol_ptr = nullptr;
2645 
2646  uint32_t sym_idx = 0;
2647  Symbol *sym = nullptr;
2648  size_t num_syms = 0;
2649  std::string memory_symbol_name;
2650  uint32_t unmapped_local_symbols_found = 0;
2651 
2652  std::vector<TrieEntryWithOffset> reexport_trie_entries;
2653  std::vector<TrieEntryWithOffset> external_sym_trie_entries;
2654  std::set<lldb::addr_t> resolver_addresses;
2655 
2656  if (dyld_trie_data.GetByteSize() > 0) {
2657  ConstString text_segment_name("__TEXT");
2658  SectionSP text_segment_sp =
2659  GetSectionList()->FindSectionByName(text_segment_name);
2660  lldb::addr_t text_segment_file_addr = LLDB_INVALID_ADDRESS;
2661  if (text_segment_sp)
2662  text_segment_file_addr = text_segment_sp->GetFileAddress();
2663  std::vector<llvm::StringRef> nameSlices;
2664  ParseTrieEntries(dyld_trie_data, 0, is_arm, text_segment_file_addr,
2665  nameSlices, resolver_addresses, reexport_trie_entries,
2666  external_sym_trie_entries);
2667  }
2668 
2669  typedef std::set<ConstString> IndirectSymbols;
2670  IndirectSymbols indirect_symbol_names;
2671 
2672 #if TARGET_OS_IPHONE
2673 
2674  // Some recent builds of the dyld_shared_cache (hereafter: DSC) have been
2675  // optimized by moving LOCAL symbols out of the memory mapped portion of
2676  // the DSC. The symbol information has all been retained, but it isn't
2677  // available in the normal nlist data. However, there *are* duplicate
2678  // entries of *some*
2679  // LOCAL symbols in the normal nlist data. To handle this situation
2680  // correctly, we must first attempt
2681  // to parse any DSC unmapped symbol information. If we find any, we set a
2682  // flag that tells the normal nlist parser to ignore all LOCAL symbols.
2683 
2684  if (IsSharedCacheBinary()) {
2685  // Before we can start mapping the DSC, we need to make certain the
2686  // target process is actually using the cache we can find.
2687 
2688  // Next we need to determine the correct path for the dyld shared cache.
2689 
2690  ArchSpec header_arch = GetArchitecture();
2691 
2692  UUID dsc_uuid;
2693  UUID process_shared_cache_uuid;
2694  addr_t process_shared_cache_base_addr;
2695 
2696  if (process) {
2697  GetProcessSharedCacheUUID(process, process_shared_cache_base_addr,
2698  process_shared_cache_uuid);
2699  }
2700 
2701  __block bool found_image = false;
2702  __block void *nlist_buffer = nullptr;
2703  __block unsigned nlist_count = 0;
2704  __block char *string_table = nullptr;
2705  __block vm_offset_t vm_nlist_memory = 0;
2706  __block mach_msg_type_number_t vm_nlist_bytes_read = 0;
2707  __block vm_offset_t vm_string_memory = 0;
2708  __block mach_msg_type_number_t vm_string_bytes_read = 0;
2709 
2710  auto _ = llvm::make_scope_exit(^{
2711  if (vm_nlist_memory)
2712  vm_deallocate(mach_task_self(), vm_nlist_memory, vm_nlist_bytes_read);
2713  if (vm_string_memory)
2714  vm_deallocate(mach_task_self(), vm_string_memory, vm_string_bytes_read);
2715  });
2716 
2717  typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
2718  typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
2719  UndefinedNameToDescMap undefined_name_to_desc;
2720  SymbolIndexToName reexport_shlib_needs_fixup;
2721 
2722  dyld_for_each_installed_shared_cache(^(dyld_shared_cache_t shared_cache) {
2723  uuid_t cache_uuid;
2724  dyld_shared_cache_copy_uuid(shared_cache, &cache_uuid);
2725  if (found_image)
2726  return;
2727 
2728  if (process_shared_cache_uuid.IsValid() &&
2729  process_shared_cache_uuid != UUID::fromData(&cache_uuid, 16))
2730  return;
2731 
2732  dyld_shared_cache_for_each_image(shared_cache, ^(dyld_image_t image) {
2733  uuid_t dsc_image_uuid;
2734  if (found_image)
2735  return;
2736 
2737  dyld_image_copy_uuid(image, &dsc_image_uuid);
2738  if (image_uuid != UUID::fromData(dsc_image_uuid, 16))
2739  return;
2740 
2741  found_image = true;
2742 
2743  // Compute the size of the string table. We need to ask dyld for a
2744  // new SPI to avoid this step.
2745  dyld_image_local_nlist_content_4Symbolication(
2746  image, ^(const void *nlistStart, uint64_t nlistCount,
2747  const char *stringTable) {
2748  if (!nlistStart || !nlistCount)
2749  return;
2750 
2751  // The buffers passed here are valid only inside the block.
2752  // Use vm_read to make a cheap copy of them available for our
2753  // processing later.
2754  kern_return_t ret =
2755  vm_read(mach_task_self(), (vm_address_t)nlistStart,
2756  nlist_byte_size * nlistCount, &vm_nlist_memory,
2757  &vm_nlist_bytes_read);
2758  if (ret != KERN_SUCCESS)
2759  return;
2760  assert(vm_nlist_bytes_read == nlist_byte_size * nlistCount);
2761 
2762  // We don't know the size of the string table. It's cheaper
2763  // to map the whol VM region than to determine the size by
2764  // parsing all teh nlist entries.
2765  vm_address_t string_address = (vm_address_t)stringTable;
2766  vm_size_t region_size;
2767  mach_msg_type_number_t info_count = VM_REGION_BASIC_INFO_COUNT_64;
2768  vm_region_basic_info_data_t info;
2769  memory_object_name_t object;
2770  ret = vm_region_64(mach_task_self(), &string_address,
2771  &region_size, VM_REGION_BASIC_INFO_64,
2772  (vm_region_info_t)&info, &info_count, &object);
2773  if (ret != KERN_SUCCESS)
2774  return;
2775 
2776  ret = vm_read(mach_task_self(), (vm_address_t)stringTable,
2777  region_size -
2778  ((vm_address_t)stringTable - string_address),
2779  &vm_string_memory, &vm_string_bytes_read);
2780  if (ret != KERN_SUCCESS)
2781  return;
2782 
2783  nlist_buffer = (void *)vm_nlist_memory;
2784  string_table = (char *)vm_string_memory;
2785  nlist_count = nlistCount;
2786  });
2787  });
2788  });
2789  if (nlist_buffer) {
2790  DataExtractor dsc_local_symbols_data(nlist_buffer,
2791  nlist_count * nlist_byte_size,
2792  byte_order, addr_byte_size);
2793  unmapped_local_symbols_found = nlist_count;
2794 
2795  // The normal nlist code cannot correctly size the Symbols
2796  // array, we need to allocate it here.
2797  sym = symtab.Resize(
2798  symtab_load_command.nsyms + m_dysymtab.nindirectsyms +
2799  unmapped_local_symbols_found - m_dysymtab.nlocalsym);
2800  num_syms = symtab.GetNumSymbols();
2801 
2802  lldb::offset_t nlist_data_offset = 0;
2803 
2804  for (uint32_t nlist_index = 0;
2805  nlist_index < nlist_count;
2806  nlist_index++) {
2807  /////////////////////////////
2808  {
2809  llvm::Optional<struct nlist_64> nlist_maybe =
2810  ParseNList(dsc_local_symbols_data, nlist_data_offset,
2811  nlist_byte_size);
2812  if (!nlist_maybe)
2813  break;
2814  struct nlist_64 nlist = *nlist_maybe;
2815 
2817  const char *symbol_name = string_table + nlist.n_strx;
2818 
2819  if (symbol_name == NULL) {
2820  // No symbol should be NULL, even the symbols with no
2821  // string values should have an offset zero which
2822  // points to an empty C-string
2823  Debugger::ReportError(llvm::formatv(
2824  "DSC unmapped local symbol[{0}] has invalid "
2825  "string table offset {1:x} in {2}, ignoring symbol",
2826  nlist_index, nlist.n_strx,
2827  module_sp->GetFileSpec().GetPath());
2828  continue;
2829  }
2830  if (symbol_name[0] == '\0')
2831  symbol_name = NULL;
2832 
2833  const char *symbol_name_non_abi_mangled = NULL;
2834 
2835  SectionSP symbol_section;
2836  uint32_t symbol_byte_size = 0;
2837  bool add_nlist = true;
2838  bool is_debug = ((nlist.n_type & N_STAB) != 0);
2839  bool demangled_is_synthesized = false;
2840  bool is_gsym = false;
2841  bool set_value = true;
2842 
2843  assert(sym_idx < num_syms);
2844 
2845  sym[sym_idx].SetDebug(is_debug);
2846 
2847  if (is_debug) {
2848  switch (nlist.n_type) {
2849  case N_GSYM:
2850  // global symbol: name,,NO_SECT,type,0
2851  // Sometimes the N_GSYM value contains the address.
2852 
2853  // FIXME: In the .o files, we have a GSYM and a debug
2854  // symbol for all the ObjC data. They
2855  // have the same address, but we want to ensure that
2856  // we always find only the real symbol, 'cause we
2857  // don't currently correctly attribute the
2858  // GSYM one to the ObjCClass/Ivar/MetaClass
2859  // symbol type. This is a temporary hack to make
2860  // sure the ObjectiveC symbols get treated correctly.
2861  // To do this right, we should coalesce all the GSYM
2862  // & global symbols that have the same address.
2863 
2864  is_gsym = true;
2865  sym[sym_idx].SetExternal(true);
2866 
2867  if (symbol_name && symbol_name[0] == '_' &&
2868  symbol_name[1] == 'O') {
2869  llvm::StringRef symbol_name_ref(symbol_name);
2870  if (symbol_name_ref.startswith(
2871  g_objc_v2_prefix_class)) {
2872  symbol_name_non_abi_mangled = symbol_name + 1;
2873  symbol_name =
2874  symbol_name + g_objc_v2_prefix_class.size();
2875  type = eSymbolTypeObjCClass;
2876  demangled_is_synthesized = true;
2877 
2878  } else if (symbol_name_ref.startswith(
2879  g_objc_v2_prefix_metaclass)) {
2880  symbol_name_non_abi_mangled = symbol_name + 1;
2881  symbol_name =
2882  symbol_name + g_objc_v2_prefix_metaclass.size();
2883  type = eSymbolTypeObjCMetaClass;
2884  demangled_is_synthesized = true;
2885  } else if (symbol_name_ref.startswith(
2886  g_objc_v2_prefix_ivar)) {
2887  symbol_name_non_abi_mangled = symbol_name + 1;
2888  symbol_name =
2889  symbol_name + g_objc_v2_prefix_ivar.size();
2890  type = eSymbolTypeObjCIVar;
2891  demangled_is_synthesized = true;
2892  }
2893  } else {
2894  if (nlist.n_value != 0)
2895  symbol_section = section_info.GetSection(
2896  nlist.n_sect, nlist.n_value);
2897  type = eSymbolTypeData;
2898  }
2899  break;
2900 
2901  case N_FNAME:
2902  // procedure name (f77 kludge): name,,NO_SECT,0,0
2903  type = eSymbolTypeCompiler;
2904  break;
2905 
2906  case N_FUN:
2907  // procedure: name,,n_sect,linenumber,address
2908  if (symbol_name) {
2909  type = eSymbolTypeCode;
2910  symbol_section = section_info.GetSection(
2911  nlist.n_sect, nlist.n_value);
2912 
2913  N_FUN_addr_to_sym_idx.insert(
2914  std::make_pair(nlist.n_value, sym_idx));
2915  // We use the current number of symbols in the
2916  // symbol table in lieu of using nlist_idx in case
2917  // we ever start trimming entries out
2918  N_FUN_indexes.push_back(sym_idx);
2919  } else {
2920  type = eSymbolTypeCompiler;
2921 
2922  if (!N_FUN_indexes.empty()) {
2923  // Copy the size of the function into the
2924  // original
2925  // STAB entry so we don't have
2926  // to hunt for it later
2927  symtab.SymbolAtIndex(N_FUN_indexes.back())
2928  ->SetByteSize(nlist.n_value);
2929  N_FUN_indexes.pop_back();
2930  // We don't really need the end function STAB as
2931  // it contains the size which we already placed
2932  // with the original symbol, so don't add it if
2933  // we want a minimal symbol table
2934  add_nlist = false;
2935  }
2936  }
2937  break;
2938 
2939  case N_STSYM:
2940  // static symbol: name,,n_sect,type,address
2941  N_STSYM_addr_to_sym_idx.insert(
2942  std::make_pair(nlist.n_value, sym_idx));
2943  symbol_section = section_info.GetSection(nlist.n_sect,
2944  nlist.n_value);
2945  if (symbol_name && symbol_name[0]) {
2946  type = ObjectFile::GetSymbolTypeFromName(
2947  symbol_name + 1, eSymbolTypeData);
2948  }
2949  break;
2950 
2951  case N_LCSYM:
2952  // .lcomm symbol: name,,n_sect,type,address
2953  symbol_section = section_info.GetSection(nlist.n_sect,
2954  nlist.n_value);
2955  type = eSymbolTypeCommonBlock;
2956  break;
2957 
2958  case N_BNSYM:
2959  // We use the current number of symbols in the symbol
2960  // table in lieu of using nlist_idx in case we ever
2961  // start trimming entries out Skip these if we want
2962  // minimal symbol tables
2963  add_nlist = false;
2964  break;
2965 
2966  case N_ENSYM:
2967  // Set the size of the N_BNSYM to the terminating
2968  // index of this N_ENSYM so that we can always skip
2969  // the entire symbol if we need to navigate more
2970  // quickly at the source level when parsing STABS
2971  // Skip these if we want minimal symbol tables
2972  add_nlist = false;
2973  break;
2974 
2975  case N_OPT:
2976  // emitted with gcc2_compiled and in gcc source
2977  type = eSymbolTypeCompiler;
2978  break;
2979 
2980  case N_RSYM:
2981  // register sym: name,,NO_SECT,type,register
2982  type = eSymbolTypeVariable;
2983  break;
2984 
2985  case N_SLINE:
2986  // src line: 0,,n_sect,linenumber,address
2987  symbol_section = section_info.GetSection(nlist.n_sect,
2988  nlist.n_value);
2989  type = eSymbolTypeLineEntry;
2990  break;
2991 
2992  case N_SSYM:
2993  // structure elt: name,,NO_SECT,type,struct_offset
2994  type = eSymbolTypeVariableType;
2995  break;
2996 
2997  case N_SO:
2998  // source file name
2999  type = eSymbolTypeSourceFile;
3000  if (symbol_name == NULL) {
3001  add_nlist = false;
3002  if (N_SO_index != UINT32_MAX) {
3003  // Set the size of the N_SO to the terminating
3004  // index of this N_SO so that we can always skip
3005  // the entire N_SO if we need to navigate more
3006  // quickly at the source level when parsing STABS
3007  symbol_ptr = symtab.SymbolAtIndex(N_SO_index);
3008  symbol_ptr->SetByteSize(sym_idx);
3009  symbol_ptr->SetSizeIsSibling(true);
3010  }
3011  N_NSYM_indexes.clear();
3012  N_INCL_indexes.clear();
3013  N_BRAC_indexes.clear();
3014  N_COMM_indexes.clear();
3015  N_FUN_indexes.clear();
3016  N_SO_index = UINT32_MAX;
3017  } else {
3018  // We use the current number of symbols in the
3019  // symbol table in lieu of using nlist_idx in case
3020  // we ever start trimming entries out
3021  const bool N_SO_has_full_path = symbol_name[0] == '/';
3022  if (N_SO_has_full_path) {
3023  if ((N_SO_index == sym_idx - 1) &&
3024  ((sym_idx - 1) < num_syms)) {
3025  // We have two consecutive N_SO entries where
3026  // the first contains a directory and the
3027  // second contains a full path.
3028  sym[sym_idx - 1].GetMangled().SetValue(
3029  ConstString(symbol_name), false);
3030  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3031  add_nlist = false;
3032  } else {
3033  // This is the first entry in a N_SO that
3034  // contains a directory or
3035  // a full path to the source file
3036  N_SO_index = sym_idx;
3037  }
3038  } else if ((N_SO_index == sym_idx - 1) &&
3039  ((sym_idx - 1) < num_syms)) {
3040  // This is usually the second N_SO entry that
3041  // contains just the filename, so here we combine
3042  // it with the first one if we are minimizing the
3043  // symbol table
3044  const char *so_path = sym[sym_idx - 1]
3045  .GetMangled()
3046  .GetDemangledName()
3047  .AsCString();
3048  if (so_path && so_path[0]) {
3049  std::string full_so_path(so_path);
3050  const size_t double_slash_pos =
3051  full_so_path.find("//");
3052  if (double_slash_pos != std::string::npos) {
3053  // The linker has been generating bad N_SO
3054  // entries with doubled up paths
3055  // in the format "%s%s" where the first
3056  // string in the DW_AT_comp_dir, and the
3057  // second is the directory for the source
3058  // file so you end up with a path that looks
3059  // like "/tmp/src//tmp/src/"
3060  FileSpec so_dir(so_path);
3061  if (!FileSystem::Instance().Exists(so_dir)) {
3062  so_dir.SetFile(
3063  &full_so_path[double_slash_pos + 1],
3064  FileSpec::Style::native);
3065  if (FileSystem::Instance().Exists(so_dir)) {
3066  // Trim off the incorrect path
3067  full_so_path.erase(0, double_slash_pos + 1);
3068  }
3069  }
3070  }
3071  if (*full_so_path.rbegin() != '/')
3072  full_so_path += '/';
3073  full_so_path += symbol_name;
3074  sym[sym_idx - 1].GetMangled().SetValue(
3075  ConstString(full_so_path.c_str()), false);
3076  add_nlist = false;
3077  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3078  }
3079  } else {
3080  // This could be a relative path to a N_SO
3081  N_SO_index = sym_idx;
3082  }
3083  }
3084  break;
3085 
3086  case N_OSO:
3087  // object file name: name,,0,0,st_mtime
3088  type = eSymbolTypeObjectFile;
3089  break;
3090 
3091  case N_LSYM:
3092  // local sym: name,,NO_SECT,type,offset
3093  type = eSymbolTypeLocal;
3094  break;
3095 
3096  // INCL scopes
3097  case N_BINCL:
3098  // include file beginning: name,,NO_SECT,0,sum We use
3099  // the current number of symbols in the symbol table
3100  // in lieu of using nlist_idx in case we ever start
3101  // trimming entries out
3102  N_INCL_indexes.push_back(sym_idx);
3103  type = eSymbolTypeScopeBegin;
3104  break;
3105 
3106  case N_EINCL:
3107  // include file end: name,,NO_SECT,0,0
3108  // Set the size of the N_BINCL to the terminating
3109  // index of this N_EINCL so that we can always skip
3110  // the entire symbol if we need to navigate more
3111  // quickly at the source level when parsing STABS
3112  if (!N_INCL_indexes.empty()) {
3113  symbol_ptr =
3114  symtab.SymbolAtIndex(N_INCL_indexes.back());
3115  symbol_ptr->SetByteSize(sym_idx + 1);
3116  symbol_ptr->SetSizeIsSibling(true);
3117  N_INCL_indexes.pop_back();
3118  }
3119  type = eSymbolTypeScopeEnd;
3120  break;
3121 
3122  case N_SOL:
3123  // #included file name: name,,n_sect,0,address
3124  type = eSymbolTypeHeaderFile;
3125 
3126  // We currently don't use the header files on darwin
3127  add_nlist = false;
3128  break;
3129 
3130  case N_PARAMS:
3131  // compiler parameters: name,,NO_SECT,0,0
3132  type = eSymbolTypeCompiler;
3133  break;
3134 
3135  case N_VERSION:
3136  // compiler version: name,,NO_SECT,0,0
3137  type = eSymbolTypeCompiler;
3138  break;
3139 
3140  case N_OLEVEL:
3141  // compiler -O level: name,,NO_SECT,0,0
3142  type = eSymbolTypeCompiler;
3143  break;
3144 
3145  case N_PSYM:
3146  // parameter: name,,NO_SECT,type,offset
3147  type = eSymbolTypeVariable;
3148  break;
3149 
3150  case N_ENTRY:
3151  // alternate entry: name,,n_sect,linenumber,address
3152  symbol_section = section_info.GetSection(nlist.n_sect,
3153  nlist.n_value);
3154  type = eSymbolTypeLineEntry;
3155  break;
3156 
3157  // Left and Right Braces
3158  case N_LBRAC:
3159  // left bracket: 0,,NO_SECT,nesting level,address We
3160  // use the current number of symbols in the symbol
3161  // table in lieu of using nlist_idx in case we ever
3162  // start trimming entries out
3163  symbol_section = section_info.GetSection(nlist.n_sect,
3164  nlist.n_value);
3165  N_BRAC_indexes.push_back(sym_idx);
3166  type = eSymbolTypeScopeBegin;
3167  break;
3168 
3169  case N_RBRAC:
3170  // right bracket: 0,,NO_SECT,nesting level,address
3171  // Set the size of the N_LBRAC to the terminating
3172  // index of this N_RBRAC so that we can always skip
3173  // the entire symbol if we need to navigate more
3174  // quickly at the source level when parsing STABS
3175  symbol_section = section_info.GetSection(nlist.n_sect,
3176  nlist.n_value);
3177  if (!N_BRAC_indexes.empty()) {
3178  symbol_ptr =
3179  symtab.SymbolAtIndex(N_BRAC_indexes.back());
3180  symbol_ptr->SetByteSize(sym_idx + 1);
3181  symbol_ptr->SetSizeIsSibling(true);
3182  N_BRAC_indexes.pop_back();
3183  }
3184  type = eSymbolTypeScopeEnd;
3185  break;
3186 
3187  case N_EXCL:
3188  // deleted include file: name,,NO_SECT,0,sum
3189  type = eSymbolTypeHeaderFile;
3190  break;
3191 
3192  // COMM scopes
3193  case N_BCOMM:
3194  // begin common: name,,NO_SECT,0,0
3195  // We use the current number of symbols in the symbol
3196  // table in lieu of using nlist_idx in case we ever
3197  // start trimming entries out
3198  type = eSymbolTypeScopeBegin;
3199  N_COMM_indexes.push_back(sym_idx);
3200  break;
3201 
3202  case N_ECOML:
3203  // end common (local name): 0,,n_sect,0,address
3204  symbol_section = section_info.GetSection(nlist.n_sect,
3205  nlist.n_value);
3206  // Fall through
3207 
3208  case N_ECOMM:
3209  // end common: name,,n_sect,0,0
3210  // Set the size of the N_BCOMM to the terminating
3211  // index of this N_ECOMM/N_ECOML so that we can
3212  // always skip the entire symbol if we need to
3213  // navigate more quickly at the source level when
3214  // parsing STABS
3215  if (!N_COMM_indexes.empty()) {
3216  symbol_ptr =
3217  symtab.SymbolAtIndex(N_COMM_indexes.back());
3218  symbol_ptr->SetByteSize(sym_idx + 1);
3219  symbol_ptr->SetSizeIsSibling(true);
3220  N_COMM_indexes.pop_back();
3221  }
3222  type = eSymbolTypeScopeEnd;
3223  break;
3224 
3225  case N_LENG:
3226  // second stab entry with length information
3227  type = eSymbolTypeAdditional;
3228  break;
3229 
3230  default:
3231  break;
3232  }
3233  } else {
3234  // uint8_t n_pext = N_PEXT & nlist.n_type;
3235  uint8_t n_type = N_TYPE & nlist.n_type;
3236  sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
3237 
3238  switch (n_type) {
3239  case N_INDR: {
3240  const char *reexport_name_cstr =
3241  strtab_data.PeekCStr(nlist.n_value);
3242  if (reexport_name_cstr && reexport_name_cstr[0]) {
3243  type = eSymbolTypeReExported;
3244  ConstString reexport_name(
3245  reexport_name_cstr +
3246  ((reexport_name_cstr[0] == '_') ? 1 : 0));
3247  sym[sym_idx].SetReExportedSymbolName(reexport_name);
3248  set_value = false;
3249  reexport_shlib_needs_fixup[sym_idx] = reexport_name;
3250  indirect_symbol_names.insert(ConstString(
3251  symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
3252  } else
3253  type = eSymbolTypeUndefined;
3254  } break;
3255 
3256  case N_UNDF:
3257  if (symbol_name && symbol_name[0]) {
3258  ConstString undefined_name(
3259  symbol_name + ((symbol_name[0] == '_') ? 1 : 0));
3260  undefined_name_to_desc[undefined_name] = nlist.n_desc;
3261  }
3262  // Fall through
3263  case N_PBUD:
3264  type = eSymbolTypeUndefined;
3265  break;
3266 
3267  case N_ABS:
3268  type = eSymbolTypeAbsolute;
3269  break;
3270 
3271  case N_SECT: {
3272  symbol_section = section_info.GetSection(nlist.n_sect,
3273  nlist.n_value);
3274 
3275  if (symbol_section == NULL) {
3276  // TODO: warn about this?
3277  add_nlist = false;
3278  break;
3279  }
3280 
3281  if (TEXT_eh_frame_sectID == nlist.n_sect) {
3282  type = eSymbolTypeException;
3283  } else {
3284  uint32_t section_type =
3285  symbol_section->Get() & SECTION_TYPE;
3286 
3287  switch (section_type) {
3288  case S_CSTRING_LITERALS:
3289  type = eSymbolTypeData;
3290  break; // section with only literal C strings
3291  case S_4BYTE_LITERALS:
3292  type = eSymbolTypeData;
3293  break; // section with only 4 byte literals
3294  case S_8BYTE_LITERALS:
3295  type = eSymbolTypeData;
3296  break; // section with only 8 byte literals
3297  case S_LITERAL_POINTERS:
3298  type = eSymbolTypeTrampoline;
3299  break; // section with only pointers to literals
3300  case S_NON_LAZY_SYMBOL_POINTERS:
3301  type = eSymbolTypeTrampoline;
3302  break; // section with only non-lazy symbol
3303  // pointers
3304  case S_LAZY_SYMBOL_POINTERS:
3305  type = eSymbolTypeTrampoline;
3306  break; // section with only lazy symbol pointers
3307  case S_SYMBOL_STUBS:
3308  type = eSymbolTypeTrampoline;
3309  break; // section with only symbol stubs, byte
3310  // size of stub in the reserved2 field
3311  case S_MOD_INIT_FUNC_POINTERS:
3312  type = eSymbolTypeCode;
3313  break; // section with only function pointers for
3314  // initialization
3315  case S_MOD_TERM_FUNC_POINTERS:
3316  type = eSymbolTypeCode;
3317  break; // section with only function pointers for
3318  // termination
3319  case S_INTERPOSING:
3320  type = eSymbolTypeTrampoline;
3321  break; // section with only pairs of function
3322  // pointers for interposing
3323  case S_16BYTE_LITERALS:
3324  type = eSymbolTypeData;
3325  break; // section with only 16 byte literals
3326  case S_DTRACE_DOF:
3328  break;
3329  case S_LAZY_DYLIB_SYMBOL_POINTERS:
3330  type = eSymbolTypeTrampoline;
3331  break;
3332  default:
3333  switch (symbol_section->GetType()) {
3335  type = eSymbolTypeCode;
3336  break;
3337  case eSectionTypeData:
3338  case eSectionTypeDataCString: // Inlined C string
3339  // data
3340  case eSectionTypeDataCStringPointers: // Pointers
3341  // to C
3342  // string
3343  // data
3344  case eSectionTypeDataSymbolAddress: // Address of
3345  // a symbol in
3346  // the symbol
3347  // table
3348  case eSectionTypeData4:
3349  case eSectionTypeData8:
3350  case eSectionTypeData16:
3351  type = eSymbolTypeData;
3352  break;
3353  default:
3354  break;
3355  }
3356  break;
3357  }
3358 
3359  if (type == eSymbolTypeInvalid) {
3360  const char *symbol_sect_name =
3361  symbol_section->GetName().AsCString();
3362  if (symbol_section->IsDescendant(
3363  text_section_sp.get())) {
3364  if (symbol_section->IsClear(
3365  S_ATTR_PURE_INSTRUCTIONS |
3366  S_ATTR_SELF_MODIFYING_CODE |
3367  S_ATTR_SOME_INSTRUCTIONS))
3368  type = eSymbolTypeData;
3369  else
3370  type = eSymbolTypeCode;
3371  } else if (symbol_section->IsDescendant(
3372  data_section_sp.get()) ||
3373  symbol_section->IsDescendant(
3374  data_dirty_section_sp.get()) ||
3375  symbol_section->IsDescendant(
3376  data_const_section_sp.get())) {
3377  if (symbol_sect_name &&
3378  ::strstr(symbol_sect_name, "__objc") ==
3379  symbol_sect_name) {
3380  type = eSymbolTypeRuntime;
3381 
3382  if (symbol_name) {
3383  llvm::StringRef symbol_name_ref(symbol_name);
3384  if (symbol_name_ref.startswith("_OBJC_")) {
3385  llvm::StringRef
3386  g_objc_v2_prefix_class(
3387  "_OBJC_CLASS_$_");
3388  llvm::StringRef
3389  g_objc_v2_prefix_metaclass(
3390  "_OBJC_METACLASS_$_");
3391  llvm::StringRef
3392  g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
3393  if (symbol_name_ref.startswith(
3394  g_objc_v2_prefix_class)) {
3395  symbol_name_non_abi_mangled =
3396  symbol_name + 1;
3397  symbol_name =
3398  symbol_name +
3399  g_objc_v2_prefix_class.size();
3400  type = eSymbolTypeObjCClass;
3401  demangled_is_synthesized = true;
3402  } else if (
3403  symbol_name_ref.startswith(
3404  g_objc_v2_prefix_metaclass)) {
3405  symbol_name_non_abi_mangled =
3406  symbol_name + 1;
3407  symbol_name =
3408  symbol_name +
3409  g_objc_v2_prefix_metaclass.size();
3410  type = eSymbolTypeObjCMetaClass;
3411  demangled_is_synthesized = true;
3412  } else if (symbol_name_ref.startswith(
3413  g_objc_v2_prefix_ivar)) {
3414  symbol_name_non_abi_mangled =
3415  symbol_name + 1;
3416  symbol_name =
3417  symbol_name +
3418  g_objc_v2_prefix_ivar.size();
3419  type = eSymbolTypeObjCIVar;
3420  demangled_is_synthesized = true;
3421  }
3422  }
3423  }
3424  } else if (symbol_sect_name &&
3425  ::strstr(symbol_sect_name,
3426  "__gcc_except_tab") ==
3427  symbol_sect_name) {
3428  type = eSymbolTypeException;
3429  } else {
3430  type = eSymbolTypeData;
3431  }
3432  } else if (symbol_sect_name &&
3433  ::strstr(symbol_sect_name, "__IMPORT") ==
3434  symbol_sect_name) {
3435  type = eSymbolTypeTrampoline;
3436  } else if (symbol_section->IsDescendant(
3437  objc_section_sp.get())) {
3438  type = eSymbolTypeRuntime;
3439  if (symbol_name && symbol_name[0] == '.') {
3440  llvm::StringRef symbol_name_ref(symbol_name);
3441  llvm::StringRef
3442  g_objc_v1_prefix_class(".objc_class_name_");
3443  if (symbol_name_ref.startswith(
3444  g_objc_v1_prefix_class)) {
3445  symbol_name_non_abi_mangled = symbol_name;
3446  symbol_name = symbol_name +
3447  g_objc_v1_prefix_class.size();
3448  type = eSymbolTypeObjCClass;
3449  demangled_is_synthesized = true;
3450  }
3451  }
3452  }
3453  }
3454  }
3455  } break;
3456  }
3457  }
3458 
3459  if (add_nlist) {
3460  uint64_t symbol_value = nlist.n_value;
3461  if (symbol_name_non_abi_mangled) {
3462  sym[sym_idx].GetMangled().SetMangledName(
3463  ConstString(symbol_name_non_abi_mangled));
3464  sym[sym_idx].GetMangled().SetDemangledName(
3465  ConstString(symbol_name));
3466  } else {
3467  bool symbol_name_is_mangled = false;
3468 
3469  if (symbol_name && symbol_name[0] == '_') {
3470  symbol_name_is_mangled = symbol_name[1] == '_';
3471  symbol_name++; // Skip the leading underscore
3472  }
3473 
3474  if (symbol_name) {
3475  ConstString const_symbol_name(symbol_name);
3476  sym[sym_idx].GetMangled().SetValue(
3477  const_symbol_name, symbol_name_is_mangled);
3478  if (is_gsym && is_debug) {
3479  const char *gsym_name =
3480  sym[sym_idx]
3481  .GetMangled()
3482  .GetName(Mangled::ePreferMangled)
3483  .GetCString();
3484  if (gsym_name)
3485  N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
3486  }
3487  }
3488  }
3489  if (symbol_section) {
3490  const addr_t section_file_addr =
3491  symbol_section->GetFileAddress();
3492  if (symbol_byte_size == 0 &&
3493  function_starts_count > 0) {
3494  addr_t symbol_lookup_file_addr = nlist.n_value;
3495  // Do an exact address match for non-ARM addresses,
3496  // else get the closest since the symbol might be a
3497  // thumb symbol which has an address with bit zero
3498  // set
3499  FunctionStarts::Entry *func_start_entry =
3500  function_starts.FindEntry(symbol_lookup_file_addr,
3501  !is_arm);
3502  if (is_arm && func_start_entry) {
3503  // Verify that the function start address is the
3504  // symbol address (ARM) or the symbol address + 1
3505  // (thumb)
3506  if (func_start_entry->addr !=
3507  symbol_lookup_file_addr &&
3508  func_start_entry->addr !=
3509  (symbol_lookup_file_addr + 1)) {
3510  // Not the right entry, NULL it out...
3511  func_start_entry = NULL;
3512  }
3513  }
3514  if (func_start_entry) {
3515  func_start_entry->data = true;
3516 
3517  addr_t symbol_file_addr = func_start_entry->addr;
3518  uint32_t symbol_flags = 0;
3519  if (is_arm) {
3520  if (symbol_file_addr & 1)
3521  symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
3522  symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3523  }
3524 
3525  const FunctionStarts::Entry *next_func_start_entry =
3526  function_starts.FindNextEntry(func_start_entry);
3527  const addr_t section_end_file_addr =
3528  section_file_addr +
3529  symbol_section->GetByteSize();
3530  if (next_func_start_entry) {
3531  addr_t next_symbol_file_addr =
3532  next_func_start_entry->addr;
3533  // Be sure the clear the Thumb address bit when
3534  // we calculate the size from the current and
3535  // next address
3536  if (is_arm)
3537  next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3538  symbol_byte_size = std::min<lldb::addr_t>(
3539  next_symbol_file_addr - symbol_file_addr,
3540  section_end_file_addr - symbol_file_addr);
3541  } else {
3542  symbol_byte_size =
3543  section_end_file_addr - symbol_file_addr;
3544  }
3545  }
3546  }
3547  symbol_value -= section_file_addr;
3548  }
3549 
3550  if (is_debug == false) {
3551  if (type == eSymbolTypeCode) {
3552  // See if we can find a N_FUN entry for any code
3553  // symbols. If we do find a match, and the name
3554  // matches, then we can merge the two into just the
3555  // function symbol to avoid duplicate entries in
3556  // the symbol table
3557  auto range =
3558  N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
3559  if (range.first != range.second) {
3560  bool found_it = false;
3561  for (auto pos = range.first; pos != range.second;
3562  ++pos) {
3563  if (sym[sym_idx].GetMangled().GetName(
3564  Mangled::ePreferMangled) ==
3565  sym[pos->second].GetMangled().GetName(
3566  Mangled::ePreferMangled)) {
3567  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3568  // We just need the flags from the linker
3569  // symbol, so put these flags
3570  // into the N_FUN flags to avoid duplicate
3571  // symbols in the symbol table
3572  sym[pos->second].SetExternal(
3573  sym[sym_idx].IsExternal());
3574  sym[pos->second].SetFlags(nlist.n_type << 16 |
3575  nlist.n_desc);
3576  if (resolver_addresses.find(nlist.n_value) !=
3577  resolver_addresses.end())
3578  sym[pos->second].SetType(eSymbolTypeResolver);
3579  sym[sym_idx].Clear();
3580  found_it = true;
3581  break;
3582  }
3583  }
3584  if (found_it)
3585  continue;
3586  } else {
3587  if (resolver_addresses.find(nlist.n_value) !=
3588  resolver_addresses.end())
3589  type = eSymbolTypeResolver;
3590  }
3591  } else if (type == eSymbolTypeData ||
3592  type == eSymbolTypeObjCClass ||
3593  type == eSymbolTypeObjCMetaClass ||
3594  type == eSymbolTypeObjCIVar) {
3595  // See if we can find a N_STSYM entry for any data
3596  // symbols. If we do find a match, and the name
3597  // matches, then we can merge the two into just the
3598  // Static symbol to avoid duplicate entries in the
3599  // symbol table
3600  auto range = N_STSYM_addr_to_sym_idx.equal_range(
3601  nlist.n_value);
3602  if (range.first != range.second) {
3603  bool found_it = false;
3604  for (auto pos = range.first; pos != range.second;
3605  ++pos) {
3606  if (sym[sym_idx].GetMangled().GetName(
3607  Mangled::ePreferMangled) ==
3608  sym[pos->second].GetMangled().GetName(
3609  Mangled::ePreferMangled)) {
3610  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3611  // We just need the flags from the linker
3612  // symbol, so put these flags
3613  // into the N_STSYM flags to avoid duplicate
3614  // symbols in the symbol table
3615  sym[pos->second].SetExternal(
3616  sym[sym_idx].IsExternal());
3617  sym[pos->second].SetFlags(nlist.n_type << 16 |
3618  nlist.n_desc);
3619  sym[sym_idx].Clear();
3620  found_it = true;
3621  break;
3622  }
3623  }
3624  if (found_it)
3625  continue;
3626  } else {
3627  const char *gsym_name =
3628  sym[sym_idx]
3629  .GetMangled()
3630  .GetName(Mangled::ePreferMangled)
3631  .GetCString();
3632  if (gsym_name) {
3633  // Combine N_GSYM stab entries with the non
3634  // stab symbol
3635  ConstNameToSymbolIndexMap::const_iterator pos =
3636  N_GSYM_name_to_sym_idx.find(gsym_name);
3637  if (pos != N_GSYM_name_to_sym_idx.end()) {
3638  const uint32_t GSYM_sym_idx = pos->second;
3639  m_nlist_idx_to_sym_idx[nlist_idx] =
3640  GSYM_sym_idx;
3641  // Copy the address, because often the N_GSYM
3642  // address has an invalid address of zero
3643  // when the global is a common symbol
3644  sym[GSYM_sym_idx].GetAddressRef().SetSection(
3645  symbol_section);
3646  sym[GSYM_sym_idx].GetAddressRef().SetOffset(
3647  symbol_value);
3648  add_symbol_addr(sym[GSYM_sym_idx]
3649  .GetAddress()
3650  .GetFileAddress());
3651  // We just need the flags from the linker
3652  // symbol, so put these flags
3653  // into the N_GSYM flags to avoid duplicate
3654  // symbols in the symbol table
3655  sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 |
3656  nlist.n_desc);
3657  sym[sym_idx].Clear();
3658  continue;
3659  }
3660  }
3661  }
3662  }
3663  }
3664 
3665  sym[sym_idx].SetID(nlist_idx);
3666  sym[sym_idx].SetType(type);
3667  if (set_value) {
3668  sym[sym_idx].GetAddressRef().SetSection(symbol_section);
3669  sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
3670  add_symbol_addr(
3671  sym[sym_idx].GetAddress().GetFileAddress());
3672  }
3673  sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
3674 
3675  if (symbol_byte_size > 0)
3676  sym[sym_idx].SetByteSize(symbol_byte_size);
3677 
3678  if (demangled_is_synthesized)
3679  sym[sym_idx].SetDemangledNameIsSynthesized(true);
3680  ++sym_idx;
3681  } else {
3682  sym[sym_idx].Clear();
3683  }
3684  }
3685  /////////////////////////////
3686  }
3687  }
3688 
3689  for (const auto &pos : reexport_shlib_needs_fixup) {
3690  const auto undef_pos = undefined_name_to_desc.find(pos.second);
3691  if (undef_pos != undefined_name_to_desc.end()) {
3692  const uint8_t dylib_ordinal =
3693  llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
3694  if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
3695  sym[pos.first].SetReExportedSymbolSharedLibrary(
3696  dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
3697  }
3698  }
3699  }
3700 
3701 #endif
3702  lldb::offset_t nlist_data_offset = 0;
3703 
3704  if (nlist_data.GetByteSize() > 0) {
3705 
3706  // If the sym array was not created while parsing the DSC unmapped
3707  // symbols, create it now.
3708  if (sym == nullptr) {
3709  sym =
3710  symtab.Resize(symtab_load_command.nsyms + m_dysymtab.nindirectsyms);
3711  num_syms = symtab.GetNumSymbols();
3712  }
3713 
3714  if (unmapped_local_symbols_found) {
3715  assert(m_dysymtab.ilocalsym == 0);
3716  nlist_data_offset += (m_dysymtab.nlocalsym * nlist_byte_size);
3717  nlist_idx = m_dysymtab.nlocalsym;
3718  } else {
3719  nlist_idx = 0;
3720  }
3721 
3722  typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
3723  typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
3724  UndefinedNameToDescMap undefined_name_to_desc;
3725  SymbolIndexToName reexport_shlib_needs_fixup;
3726 
3727  // Symtab parsing is a huge mess. Everything is entangled and the code
3728  // requires access to a ridiculous amount of variables. LLDB depends
3729  // heavily on the proper merging of symbols and to get that right we need
3730  // to make sure we have parsed all the debug symbols first. Therefore we
3731  // invoke the lambda twice, once to parse only the debug symbols and then
3732  // once more to parse the remaining symbols.
3733  auto ParseSymbolLambda = [&](struct nlist_64 &nlist, uint32_t nlist_idx,
3734  bool debug_only) {
3735  const bool is_debug = ((nlist.n_type & N_STAB) != 0);
3736  if (is_debug != debug_only)
3737  return true;
3738 
3739  const char *symbol_name_non_abi_mangled = nullptr;
3740  const char *symbol_name = nullptr;
3741 
3742  if (have_strtab_data) {
3743  symbol_name = strtab_data.PeekCStr(nlist.n_strx);
3744 
3745  if (symbol_name == nullptr) {
3746  // No symbol should be NULL, even the symbols with no string values
3747  // should have an offset zero which points to an empty C-string
3748  Debugger::ReportError(llvm::formatv(
3749  "symbol[{0}] has invalid string table offset {1:x} in {2}, "
3750  "ignoring symbol",
3751  nlist_idx, nlist.n_strx, module_sp->GetFileSpec().GetPath()));
3752  return true;
3753  }
3754  if (symbol_name[0] == '\0')
3755  symbol_name = nullptr;
3756  } else {
3757  const addr_t str_addr = strtab_addr + nlist.n_strx;
3758  Status str_error;
3759  if (process->ReadCStringFromMemory(str_addr, memory_symbol_name,
3760  str_error))
3761  symbol_name = memory_symbol_name.c_str();
3762  }
3763 
3765  SectionSP symbol_section;
3766  lldb::addr_t symbol_byte_size = 0;
3767  bool add_nlist = true;
3768  bool is_gsym = false;
3769  bool demangled_is_synthesized = false;
3770  bool set_value = true;
3771 
3772  assert(sym_idx < num_syms);
3773  sym[sym_idx].SetDebug(is_debug);
3774 
3775  if (is_debug) {
3776  switch (nlist.n_type) {
3777  case N_GSYM:
3778  // global symbol: name,,NO_SECT,type,0
3779  // Sometimes the N_GSYM value contains the address.
3780 
3781  // FIXME: In the .o files, we have a GSYM and a debug symbol for all
3782  // the ObjC data. They
3783  // have the same address, but we want to ensure that we always find
3784  // only the real symbol, 'cause we don't currently correctly
3785  // attribute the GSYM one to the ObjCClass/Ivar/MetaClass symbol
3786  // type. This is a temporary hack to make sure the ObjectiveC
3787  // symbols get treated correctly. To do this right, we should
3788  // coalesce all the GSYM & global symbols that have the same
3789  // address.
3790  is_gsym = true;
3791  sym[sym_idx].SetExternal(true);
3792 
3793  if (symbol_name && symbol_name[0] == '_' && symbol_name[1] == 'O') {
3794  llvm::StringRef symbol_name_ref(symbol_name);
3795  if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
3796  symbol_name_non_abi_mangled = symbol_name + 1;
3797  symbol_name = symbol_name + g_objc_v2_prefix_class.size();
3798  type = eSymbolTypeObjCClass;
3799  demangled_is_synthesized = true;
3800 
3801  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_metaclass)) {
3802  symbol_name_non_abi_mangled = symbol_name + 1;
3803  symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
3804  type = eSymbolTypeObjCMetaClass;
3805  demangled_is_synthesized = true;
3806  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_ivar)) {
3807  symbol_name_non_abi_mangled = symbol_name + 1;
3808  symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
3809  type = eSymbolTypeObjCIVar;
3810  demangled_is_synthesized = true;
3811  }
3812  } else {
3813  if (nlist.n_value != 0)
3814  symbol_section =
3815  section_info.GetSection(nlist.n_sect, nlist.n_value);
3816  type = eSymbolTypeData;
3817  }
3818  break;
3819 
3820  case N_FNAME:
3821  // procedure name (f77 kludge): name,,NO_SECT,0,0
3822  type = eSymbolTypeCompiler;
3823  break;
3824 
3825  case N_FUN:
3826  // procedure: name,,n_sect,linenumber,address
3827  if (symbol_name) {
3828  type = eSymbolTypeCode;
3829  symbol_section =
3830  section_info.GetSection(nlist.n_sect, nlist.n_value);
3831 
3832  N_FUN_addr_to_sym_idx.insert(
3833  std::make_pair(nlist.n_value, sym_idx));
3834  // We use the current number of symbols in the symbol table in
3835  // lieu of using nlist_idx in case we ever start trimming entries
3836  // out
3837  N_FUN_indexes.push_back(sym_idx);
3838  } else {
3839  type = eSymbolTypeCompiler;
3840 
3841  if (!N_FUN_indexes.empty()) {
3842  // Copy the size of the function into the original STAB entry
3843  // so we don't have to hunt for it later
3844  symtab.SymbolAtIndex(N_FUN_indexes.back())
3845  ->SetByteSize(nlist.n_value);
3846  N_FUN_indexes.pop_back();
3847  // We don't really need the end function STAB as it contains
3848  // the size which we already placed with the original symbol,
3849  // so don't add it if we want a minimal symbol table
3850  add_nlist = false;
3851  }
3852  }
3853  break;
3854 
3855  case N_STSYM:
3856  // static symbol: name,,n_sect,type,address
3857  N_STSYM_addr_to_sym_idx.insert(
3858  std::make_pair(nlist.n_value, sym_idx));
3859  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3860  if (symbol_name && symbol_name[0]) {
3861  type = ObjectFile::GetSymbolTypeFromName(symbol_name + 1,
3862  eSymbolTypeData);
3863  }
3864  break;
3865 
3866  case N_LCSYM:
3867  // .lcomm symbol: name,,n_sect,type,address
3868  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3869  type = eSymbolTypeCommonBlock;
3870  break;
3871 
3872  case N_BNSYM:
3873  // We use the current number of symbols in the symbol table in lieu
3874  // of using nlist_idx in case we ever start trimming entries out
3875  // Skip these if we want minimal symbol tables
3876  add_nlist = false;
3877  break;
3878 
3879  case N_ENSYM:
3880  // Set the size of the N_BNSYM to the terminating index of this
3881  // N_ENSYM so that we can always skip the entire symbol if we need
3882  // to navigate more quickly at the source level when parsing STABS
3883  // Skip these if we want minimal symbol tables
3884  add_nlist = false;
3885  break;
3886 
3887  case N_OPT:
3888  // emitted with gcc2_compiled and in gcc source
3889  type = eSymbolTypeCompiler;
3890  break;
3891 
3892  case N_RSYM:
3893  // register sym: name,,NO_SECT,type,register
3894  type = eSymbolTypeVariable;
3895  break;
3896 
3897  case N_SLINE:
3898  // src line: 0,,n_sect,linenumber,address
3899  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3900  type = eSymbolTypeLineEntry;
3901  break;
3902 
3903  case N_SSYM:
3904  // structure elt: name,,NO_SECT,type,struct_offset
3905  type = eSymbolTypeVariableType;
3906  break;
3907 
3908  case N_SO:
3909  // source file name
3910  type = eSymbolTypeSourceFile;
3911  if (symbol_name == nullptr) {
3912  add_nlist = false;
3913  if (N_SO_index != UINT32_MAX) {
3914  // Set the size of the N_SO to the terminating index of this
3915  // N_SO so that we can always skip the entire N_SO if we need
3916  // to navigate more quickly at the source level when parsing
3917  // STABS
3918  symbol_ptr = symtab.SymbolAtIndex(N_SO_index);
3919  symbol_ptr->SetByteSize(sym_idx);
3920  symbol_ptr->SetSizeIsSibling(true);
3921  }
3922  N_NSYM_indexes.clear();
3923  N_INCL_indexes.clear();
3924  N_BRAC_indexes.clear();
3925  N_COMM_indexes.clear();
3926  N_FUN_indexes.clear();
3927  N_SO_index = UINT32_MAX;
3928  } else {
3929  // We use the current number of symbols in the symbol table in
3930  // lieu of using nlist_idx in case we ever start trimming entries
3931  // out
3932  const bool N_SO_has_full_path = symbol_name[0] == '/';
3933  if (N_SO_has_full_path) {
3934  if ((N_SO_index == sym_idx - 1) && ((sym_idx - 1) < num_syms)) {
3935  // We have two consecutive N_SO entries where the first
3936  // contains a directory and the second contains a full path.
3937  sym[sym_idx - 1].GetMangled().SetValue(ConstString(symbol_name),
3938  false);
3939  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3940  add_nlist = false;
3941  } else {
3942  // This is the first entry in a N_SO that contains a
3943  // directory or a full path to the source file
3944  N_SO_index = sym_idx;
3945  }
3946  } else if ((N_SO_index == sym_idx - 1) &&
3947  ((sym_idx - 1) < num_syms)) {
3948  // This is usually the second N_SO entry that contains just the
3949  // filename, so here we combine it with the first one if we are
3950  // minimizing the symbol table
3951  const char *so_path =
3952  sym[sym_idx - 1].GetMangled().GetDemangledName().AsCString();
3953  if (so_path && so_path[0]) {
3954  std::string full_so_path(so_path);
3955  const size_t double_slash_pos = full_so_path.find("//");
3956  if (double_slash_pos != std::string::npos) {
3957  // The linker has been generating bad N_SO entries with
3958  // doubled up paths in the format "%s%s" where the first
3959  // string in the DW_AT_comp_dir, and the second is the
3960  // directory for the source file so you end up with a path
3961  // that looks like "/tmp/src//tmp/src/"
3962  FileSpec so_dir(so_path);
3963  if (!FileSystem::Instance().Exists(so_dir)) {
3964  so_dir.SetFile(&full_so_path[double_slash_pos + 1],
3965  FileSpec::Style::native);
3966  if (FileSystem::Instance().Exists(so_dir)) {
3967  // Trim off the incorrect path
3968  full_so_path.erase(0, double_slash_pos + 1);
3969  }
3970  }
3971  }
3972  if (*full_so_path.rbegin() != '/')
3973  full_so_path += '/';
3974  full_so_path += symbol_name;
3975  sym[sym_idx - 1].GetMangled().SetValue(
3976  ConstString(full_so_path.c_str()), false);
3977  add_nlist = false;
3978  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3979  }
3980  } else {
3981  // This could be a relative path to a N_SO
3982  N_SO_index = sym_idx;
3983  }
3984  }
3985  break;
3986 
3987  case N_OSO:
3988  // object file name: name,,0,0,st_mtime
3989  type = eSymbolTypeObjectFile;
3990  break;
3991 
3992  case N_LSYM:
3993  // local sym: name,,NO_SECT,type,offset
3994  type = eSymbolTypeLocal;
3995  break;
3996 
3997  // INCL scopes
3998  case N_BINCL:
3999  // include file beginning: name,,NO_SECT,0,sum We use the current
4000  // number of symbols in the symbol table in lieu of using nlist_idx
4001  // in case we ever start trimming entries out
4002  N_INCL_indexes.push_back(sym_idx);
4003  type = eSymbolTypeScopeBegin;
4004  break;
4005 
4006  case N_EINCL:
4007  // include file end: name,,NO_SECT,0,0
4008  // Set the size of the N_BINCL to the terminating index of this
4009  // N_EINCL so that we can always skip the entire symbol if we need
4010  // to navigate more quickly at the source level when parsing STABS
4011  if (!N_INCL_indexes.empty()) {
4012  symbol_ptr = symtab.SymbolAtIndex(N_INCL_indexes.back());
4013  symbol_ptr->SetByteSize(sym_idx + 1);
4014  symbol_ptr->SetSizeIsSibling(true);
4015  N_INCL_indexes.pop_back();
4016  }
4017  type = eSymbolTypeScopeEnd;
4018  break;
4019 
4020  case N_SOL:
4021  // #included file name: name,,n_sect,0,address
4022  type = eSymbolTypeHeaderFile;
4023 
4024  // We currently don't use the header files on darwin
4025  add_nlist = false;
4026  break;
4027 
4028  case N_PARAMS:
4029  // compiler parameters: name,,NO_SECT,0,0
4030  type = eSymbolTypeCompiler;
4031  break;
4032 
4033  case N_VERSION:
4034  // compiler version: name,,NO_SECT,0,0
4035  type = eSymbolTypeCompiler;
4036  break;
4037 
4038  case N_OLEVEL:
4039  // compiler -O level: name,,NO_SECT,0,0
4040  type = eSymbolTypeCompiler;
4041  break;
4042 
4043  case N_PSYM:
4044  // parameter: name,,NO_SECT,type,offset
4045  type = eSymbolTypeVariable;
4046  break;
4047 
4048  case N_ENTRY:
4049  // alternate entry: name,,n_sect,linenumber,address
4050  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4051  type = eSymbolTypeLineEntry;
4052  break;
4053 
4054  // Left and Right Braces
4055  case N_LBRAC:
4056  // left bracket: 0,,NO_SECT,nesting level,address We use the
4057  // current number of symbols in the symbol table in lieu of using
4058  // nlist_idx in case we ever start trimming entries out
4059  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4060  N_BRAC_indexes.push_back(sym_idx);
4061  type = eSymbolTypeScopeBegin;
4062  break;
4063 
4064  case N_RBRAC:
4065  // right bracket: 0,,NO_SECT,nesting level,address Set the size of
4066  // the N_LBRAC to the terminating index of this N_RBRAC so that we
4067  // can always skip the entire symbol if we need to navigate more
4068  // quickly at the source level when parsing STABS
4069  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4070  if (!N_BRAC_indexes.empty()) {
4071  symbol_ptr = symtab.SymbolAtIndex(N_BRAC_indexes.back());
4072  symbol_ptr->SetByteSize(sym_idx + 1);
4073  symbol_ptr->SetSizeIsSibling(true);
4074  N_BRAC_indexes.pop_back();
4075  }
4076  type = eSymbolTypeScopeEnd;
4077  break;
4078 
4079  case N_EXCL:
4080  // deleted include file: name,,NO_SECT,0,sum
4081  type = eSymbolTypeHeaderFile;
4082  break;
4083 
4084  // COMM scopes
4085  case N_BCOMM:
4086  // begin common: name,,NO_SECT,0,0
4087  // We use the current number of symbols in the symbol table in lieu
4088  // of using nlist_idx in case we ever start trimming entries out
4089  type = eSymbolTypeScopeBegin;
4090  N_COMM_indexes.push_back(sym_idx);
4091  break;
4092 
4093  case N_ECOML:
4094  // end common (local name): 0,,n_sect,0,address
4095  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4096  [[fallthrough]];
4097 
4098  case N_ECOMM:
4099  // end common: name,,n_sect,0,0
4100  // Set the size of the N_BCOMM to the terminating index of this
4101  // N_ECOMM/N_ECOML so that we can always skip the entire symbol if
4102  // we need to navigate more quickly at the source level when
4103  // parsing STABS
4104  if (!N_COMM_indexes.empty()) {
4105  symbol_ptr = symtab.SymbolAtIndex(N_COMM_indexes.back());
4106  symbol_ptr->SetByteSize(sym_idx + 1);
4107  symbol_ptr->SetSizeIsSibling(true);
4108  N_COMM_indexes.pop_back();
4109  }
4110  type = eSymbolTypeScopeEnd;
4111  break;
4112 
4113  case N_LENG:
4114  // second stab entry with length information
4115  type = eSymbolTypeAdditional;
4116  break;
4117 
4118  default:
4119  break;
4120  }
4121  } else {
4122  uint8_t n_type = N_TYPE & nlist.n_type;
4123  sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
4124 
4125  switch (n_type) {
4126  case N_INDR: {
4127  const char *reexport_name_cstr = strtab_data.PeekCStr(nlist.n_value);
4128  if (reexport_name_cstr && reexport_name_cstr[0] && symbol_name) {
4129  type = eSymbolTypeReExported;
4130  ConstString reexport_name(reexport_name_cstr +
4131  ((reexport_name_cstr[0] == '_') ? 1 : 0));
4132  sym[sym_idx].SetReExportedSymbolName(reexport_name);
4133  set_value = false;
4134  reexport_shlib_needs_fixup[sym_idx] = reexport_name;
4135  indirect_symbol_names.insert(
4136  ConstString(symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
4137  } else
4138  type = eSymbolTypeUndefined;
4139  } break;
4140 
4141  case N_UNDF:
4142  if (symbol_name && symbol_name[0]) {
4143  ConstString undefined_name(symbol_name +
4144  ((symbol_name[0] == '_') ? 1 : 0));
4145  undefined_name_to_desc[undefined_name] = nlist.n_desc;
4146  }
4147  [[fallthrough]];
4148 
4149  case N_PBUD:
4150  type = eSymbolTypeUndefined;
4151  break;
4152 
4153  case N_ABS:
4154  type = eSymbolTypeAbsolute;
4155  break;
4156 
4157  case N_SECT: {
4158  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4159 
4160  if (!symbol_section) {
4161  // TODO: warn about this?
4162  add_nlist = false;
4163  break;
4164  }
4165 
4166  if (TEXT_eh_frame_sectID == nlist.n_sect) {
4167  type = eSymbolTypeException;
4168  } else {
4169  uint32_t section_type = symbol_section->Get() & SECTION_TYPE;
4170 
4171  switch (section_type) {
4172  case S_CSTRING_LITERALS:
4173  type = eSymbolTypeData;
4174  break; // section with only literal C strings
4175  case S_4BYTE_LITERALS:
4176  type = eSymbolTypeData;
4177  break; // section with only 4 byte literals
4178  case S_8BYTE_LITERALS:
4179  type = eSymbolTypeData;
4180  break; // section with only 8 byte literals
4181  case S_LITERAL_POINTERS:
4182  type = eSymbolTypeTrampoline;
4183  break; // section with only pointers to literals
4184  case S_NON_LAZY_SYMBOL_POINTERS:
4185  type = eSymbolTypeTrampoline;
4186  break; // section with only non-lazy symbol pointers
4187  case S_LAZY_SYMBOL_POINTERS:
4188  type = eSymbolTypeTrampoline;
4189  break; // section with only lazy symbol pointers
4190  case S_SYMBOL_STUBS:
4191  type = eSymbolTypeTrampoline;
4192  break; // section with only symbol stubs, byte size of stub in
4193  // the reserved2 field
4194  case S_MOD_INIT_FUNC_POINTERS:
4195  type = eSymbolTypeCode;
4196  break; // section with only function pointers for initialization
4197  case S_MOD_TERM_FUNC_POINTERS:
4198  type = eSymbolTypeCode;
4199  break; // section with only function pointers for termination
4200  case S_INTERPOSING:
4201  type = eSymbolTypeTrampoline;
4202  break; // section with only pairs of function pointers for
4203  // interposing
4204  case S_16BYTE_LITERALS:
4205  type = eSymbolTypeData;
4206  break; // section with only 16 byte literals
4207  case S_DTRACE_DOF:
4209  break;
4210  case S_LAZY_DYLIB_SYMBOL_POINTERS:
4211  type = eSymbolTypeTrampoline;
4212  break;
4213  default:
4214  switch (symbol_section->GetType()) {
4216  type = eSymbolTypeCode;
4217  break;
4218  case eSectionTypeData:
4219  case eSectionTypeDataCString: // Inlined C string data
4220  case eSectionTypeDataCStringPointers: // Pointers to C string
4221  // data
4222  case eSectionTypeDataSymbolAddress: // Address of a symbol in
4223  // the symbol table
4224  case eSectionTypeData4:
4225  case eSectionTypeData8:
4226  case eSectionTypeData16:
4227  type = eSymbolTypeData;
4228  break;
4229  default:
4230  break;
4231  }
4232  break;
4233  }
4234 
4235  if (type == eSymbolTypeInvalid) {
4236  const char *symbol_sect_name =
4237  symbol_section->GetName().AsCString();
4238  if (symbol_section->IsDescendant(text_section_sp.get())) {
4239  if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS |
4240  S_ATTR_SELF_MODIFYING_CODE |
4241  S_ATTR_SOME_INSTRUCTIONS))
4242  type = eSymbolTypeData;
4243  else
4244  type = eSymbolTypeCode;
4245  } else if (symbol_section->IsDescendant(data_section_sp.get()) ||
4246  symbol_section->IsDescendant(
4247  data_dirty_section_sp.get()) ||
4248  symbol_section->IsDescendant(
4249  data_const_section_sp.get())) {
4250  if (symbol_sect_name &&
4251  ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) {
4252  type = eSymbolTypeRuntime;
4253 
4254  if (symbol_name) {
4255  llvm::StringRef symbol_name_ref(symbol_name);
4256  if (symbol_name_ref.startswith("_OBJC_")) {
4257  llvm::StringRef g_objc_v2_prefix_class(
4258  "_OBJC_CLASS_$_");
4259  llvm::StringRef g_objc_v2_prefix_metaclass(
4260  "_OBJC_METACLASS_$_");
4261  llvm::StringRef g_objc_v2_prefix_ivar(
4262  "_OBJC_IVAR_$_");
4263  if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
4264  symbol_name_non_abi_mangled = symbol_name + 1;
4265  symbol_name =
4266  symbol_name + g_objc_v2_prefix_class.size();
4267  type = eSymbolTypeObjCClass;
4268  demangled_is_synthesized = true;
4269  } else if (symbol_name_ref.startswith(
4270  g_objc_v2_prefix_metaclass)) {
4271  symbol_name_non_abi_mangled = symbol_name + 1;
4272  symbol_name =
4273  symbol_name + g_objc_v2_prefix_metaclass.size();
4274  type = eSymbolTypeObjCMetaClass;
4275  demangled_is_synthesized = true;
4276  } else if (symbol_name_ref.startswith(
4277  g_objc_v2_prefix_ivar)) {
4278  symbol_name_non_abi_mangled = symbol_name + 1;
4279  symbol_name =
4280  symbol_name + g_objc_v2_prefix_ivar.size();
4281  type = eSymbolTypeObjCIVar;
4282  demangled_is_synthesized = true;
4283  }
4284  }
4285  }
4286  } else if (symbol_sect_name &&
4287  ::strstr(symbol_sect_name, "__gcc_except_tab") ==
4288  symbol_sect_name) {
4289  type = eSymbolTypeException;
4290  } else {
4291  type = eSymbolTypeData;
4292  }
4293  } else if (symbol_sect_name &&
4294  ::strstr(symbol_sect_name, "__IMPORT") ==
4295  symbol_sect_name) {
4296  type = eSymbolTypeTrampoline;
4297  } else if (symbol_section->IsDescendant(objc_section_sp.get())) {
4298  type = eSymbolTypeRuntime;
4299  if (symbol_name && symbol_name[0] == '.') {
4300  llvm::StringRef symbol_name_ref(symbol_name);
4301  llvm::StringRef g_objc_v1_prefix_class(
4302  ".objc_class_name_");
4303  if (symbol_name_ref.startswith(g_objc_v1_prefix_class)) {
4304  symbol_name_non_abi_mangled = symbol_name;
4305  symbol_name = symbol_name + g_objc_v1_prefix_class.size();
4306  type = eSymbolTypeObjCClass;
4307  demangled_is_synthesized = true;
4308  }
4309  }
4310  }
4311  }
4312  }
4313  } break;
4314  }
4315  }
4316 
4317  if (!add_nlist) {
4318  sym[sym_idx].Clear();
4319  return true;
4320  }
4321 
4322  uint64_t symbol_value = nlist.n_value;
4323 
4324  if (symbol_name_non_abi_mangled) {
4325  sym[sym_idx].GetMangled().SetMangledName(
4326  ConstString(symbol_name_non_abi_mangled));
4327  sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name));
4328  } else {
4329  bool symbol_name_is_mangled = false;
4330 
4331  if (symbol_name && symbol_name[0] == '_') {
4332  symbol_name_is_mangled = symbol_name[1] == '_';
4333  symbol_name++; // Skip the leading underscore
4334  }
4335 
4336  if (symbol_name) {
4337  ConstString const_symbol_name(symbol_name);
4338  sym[sym_idx].GetMangled().SetValue(const_symbol_name,
4339  symbol_name_is_mangled);
4340  }
4341  }
4342 
4343  if (is_gsym) {
4344  const char *gsym_name = sym[sym_idx]
4345  .GetMangled()
4346  .GetName(Mangled::ePreferMangled)
4347  .GetCString();
4348  if (gsym_name)
4349  N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
4350  }
4351 
4352  if (symbol_section) {
4353  const addr_t section_file_addr = symbol_section->GetFileAddress();
4354  if (symbol_byte_size == 0 && function_starts_count > 0) {
4355  addr_t symbol_lookup_file_addr = nlist.n_value;
4356  // Do an exact address match for non-ARM addresses, else get the
4357  // closest since the symbol might be a thumb symbol which has an
4358  // address with bit zero set.
4359  FunctionStarts::Entry *func_start_entry =
4360  function_starts.FindEntry(symbol_lookup_file_addr, !is_arm);
4361  if (is_arm && func_start_entry) {
4362  // Verify that the function start address is the symbol address
4363  // (ARM) or the symbol address + 1 (thumb).
4364  if (func_start_entry->addr != symbol_lookup_file_addr &&
4365  func_start_entry->addr != (symbol_lookup_file_addr + 1)) {
4366  // Not the right entry, NULL it out...
4367  func_start_entry = nullptr;
4368  }
4369  }
4370  if (func_start_entry) {
4371  func_start_entry->data = true;
4372 
4373  addr_t symbol_file_addr = func_start_entry->addr;
4374  if (is_arm)
4375  symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4376 
4377  const FunctionStarts::Entry *next_func_start_entry =
4378  function_starts.FindNextEntry(func_start_entry);
4379  const addr_t section_end_file_addr =
4380  section_file_addr + symbol_section->GetByteSize();
4381  if (next_func_start_entry) {
4382  addr_t next_symbol_file_addr = next_func_start_entry->addr;
4383  // Be sure the clear the Thumb address bit when we calculate the
4384  // size from the current and next address
4385  if (is_arm)
4386  next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4387  symbol_byte_size = std::min<lldb::addr_t>(
4388  next_symbol_file_addr - symbol_file_addr,
4389  section_end_file_addr - symbol_file_addr);
4390  } else {
4391  symbol_byte_size = section_end_file_addr - symbol_file_addr;
4392  }
4393  }
4394  }
4395  symbol_value -= section_file_addr;
4396  }
4397 
4398  if (!is_debug) {
4399  if (type == eSymbolTypeCode) {
4400  // See if we can find a N_FUN entry for any code symbols. If we do
4401  // find a match, and the name matches, then we can merge the two into
4402  // just the function symbol to avoid duplicate entries in the symbol
4403  // table.
4404  std::pair<ValueToSymbolIndexMap::const_iterator,
4405  ValueToSymbolIndexMap::const_iterator>
4406  range;
4407  range = N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
4408  if (range.first != range.second) {
4409  for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4410  pos != range.second; ++pos) {
4411  if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) ==
4412  sym[pos->second].GetMangled().GetName(
4413  Mangled::ePreferMangled)) {
4414  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4415  // We just need the flags from the linker symbol, so put these
4416  // flags into the N_FUN flags to avoid duplicate symbols in the
4417  // symbol table.
4418  sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4419  sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4420  if (resolver_addresses.find(nlist.n_value) !=
4421  resolver_addresses.end())
4422  sym[pos->second].SetType(eSymbolTypeResolver);
4423  sym[sym_idx].Clear();
4424  return true;
4425  }
4426  }
4427  } else {
4428  if (resolver_addresses.find(nlist.n_value) !=
4429  resolver_addresses.end())
4430  type = eSymbolTypeResolver;
4431  }
4432  } else if (type == eSymbolTypeData || type == eSymbolTypeObjCClass ||
4433  type == eSymbolTypeObjCMetaClass ||
4434  type == eSymbolTypeObjCIVar) {
4435  // See if we can find a N_STSYM entry for any data symbols. If we do
4436  // find a match, and the name matches, then we can merge the two into
4437  // just the Static symbol to avoid duplicate entries in the symbol
4438  // table.
4439  std::pair<ValueToSymbolIndexMap::const_iterator,
4440  ValueToSymbolIndexMap::const_iterator>
4441  range;
4442  range = N_STSYM_addr_to_sym_idx.equal_range(nlist.n_value);
4443  if (range.first != range.second) {
4444  for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4445  pos != range.second; ++pos) {
4446  if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) ==
4447  sym[pos->second].GetMangled().GetName(
4448  Mangled::ePreferMangled)) {
4449  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4450  // We just need the flags from the linker symbol, so put these
4451  // flags into the N_STSYM flags to avoid duplicate symbols in
4452  // the symbol table.
4453  sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4454  sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4455  sym[sym_idx].Clear();
4456  return true;
4457  }
4458  }
4459  } else {
4460  // Combine N_GSYM stab entries with the non stab symbol.
4461  const char *gsym_name = sym[sym_idx]
4462  .GetMangled()
4463  .GetName(Mangled::ePreferMangled)
4464  .GetCString();
4465  if (gsym_name) {
4466  ConstNameToSymbolIndexMap::const_iterator pos =
4467  N_GSYM_name_to_sym_idx.find(gsym_name);
4468  if (pos != N_GSYM_name_to_sym_idx.end()) {
4469  const uint32_t GSYM_sym_idx = pos->second;
4470  m_nlist_idx_to_sym_idx[nlist_idx] = GSYM_sym_idx;
4471  // Copy the address, because often the N_GSYM address has an
4472  // invalid address of zero when the global is a common symbol.
4473  sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section);
4474  sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value);
4475  add_symbol_addr(
4476  sym[GSYM_sym_idx].GetAddress().GetFileAddress());
4477  // We just need the flags from the linker symbol, so put these
4478  // flags into the N_GSYM flags to avoid duplicate symbols in
4479  // the symbol table.
4480  sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4481  sym[sym_idx].Clear();
4482  return true;
4483  }
4484  }
4485  }
4486  }
4487  }
4488 
4489  sym[sym_idx].SetID(nlist_idx);
4490  sym[sym_idx].SetType(type);
4491  if (set_value) {
4492  sym[sym_idx].GetAddressRef().SetSection(symbol_section);
4493  sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
4494  if (symbol_section)
4495  add_symbol_addr(sym[sym_idx].GetAddress().GetFileAddress());
4496  }
4497  sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4498  if (nlist.n_desc & N_WEAK_REF)
4499  sym[sym_idx].SetIsWeak(true);
4500 
4501  if (symbol_byte_size > 0)
4502  sym[sym_idx].SetByteSize(symbol_byte_size);
4503 
4504  if (demangled_is_synthesized)
4505  sym[sym_idx].SetDemangledNameIsSynthesized(true);
4506 
4507  ++sym_idx;
4508  return true;
4509  };
4510 
4511  // First parse all the nlists but don't process them yet. See the next
4512  // comment for an explanation why.
4513  std::vector<struct nlist_64> nlists;
4514  nlists.reserve(symtab_load_command.nsyms);
4515  for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) {
4516  if (auto nlist =
4517  ParseNList(nlist_data, nlist_data_offset, nlist_byte_size))
4518  nlists.push_back(*nlist);
4519  else
4520  break;
4521  }
4522 
4523  // Now parse all the debug symbols. This is needed to merge non-debug
4524  // symbols in the next step. Non-debug symbols are always coalesced into
4525  // the debug symbol. Doing this in one step would mean that some symbols
4526  // won't be merged.
4527  nlist_idx = 0;
4528  for (auto &nlist : nlists) {
4529  if (!ParseSymbolLambda(nlist, nlist_idx++, DebugSymbols))
4530  break;
4531  }
4532 
4533  // Finally parse all the non debug symbols.
4534  nlist_idx = 0;
4535  for (auto &nlist : nlists) {
4536  if (!ParseSymbolLambda(nlist, nlist_idx++, NonDebugSymbols))
4537  break;
4538  }
4539 
4540  for (const auto &pos : reexport_shlib_needs_fixup) {
4541  const auto undef_pos = undefined_name_to_desc.find(pos.second);
4542  if (undef_pos != undefined_name_to_desc.end()) {
4543  const uint8_t dylib_ordinal =
4544  llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
4545  if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
4546  sym[pos.first].SetReExportedSymbolSharedLibrary(
4547  dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
4548  }
4549  }
4550  }
4551 
4552  // Count how many trie symbols we'll add to the symbol table
4553  int trie_symbol_table_augment_count = 0;
4554  for (auto &e : external_sym_trie_entries) {
4555  if (symbols_added.find(e.entry.address) == symbols_added.end())
4556  trie_symbol_table_augment_count++;
4557  }
4558 
4559  if (num_syms < sym_idx + trie_symbol_table_augment_count) {
4560  num_syms = sym_idx + trie_symbol_table_augment_count;
4561  sym = symtab.Resize(num_syms);
4562  }
4563  uint32_t synthetic_sym_id = symtab_load_command.nsyms;
4564 
4565  // Add symbols from the trie to the symbol table.
4566  for (auto &e : external_sym_trie_entries) {
4567  if (symbols_added.contains(e.entry.address))
4568  continue;
4569 
4570  // Find the section that this trie address is in, use that to annotate
4571  // symbol type as we add the trie address and name to the symbol table.
4572  Address symbol_addr;
4573  if (module_sp->ResolveFileAddress(e.entry.address, symbol_addr)) {
4574  SectionSP symbol_section(symbol_addr.GetSection());
4575  const char *symbol_name = e.entry.name.GetCString();
4576  bool demangled_is_synthesized = false;
4577  SymbolType type =
4578  GetSymbolType(symbol_name, demangled_is_synthesized, text_section_sp,
4579  data_section_sp, data_dirty_section_sp,
4580  data_const_section_sp, symbol_section);
4581 
4582  sym[sym_idx].SetType(type);
4583  if (symbol_section) {
4584  sym[sym_idx].SetID(synthetic_sym_id++);
4585  sym[sym_idx].GetMangled().SetMangledName(ConstString(symbol_name));
4586  if (demangled_is_synthesized)
4587  sym[sym_idx].SetDemangledNameIsSynthesized(true);
4588  sym[sym_idx].SetIsSynthetic(true);
4589  sym[sym_idx].SetExternal(true);
4590  sym[sym_idx].GetAddressRef() = symbol_addr;
4591  add_symbol_addr(symbol_addr.GetFileAddress());
4592  if (e.entry.flags & TRIE_SYMBOL_IS_THUMB)
4594  ++sym_idx;
4595  }
4596  }
4597  }
4598 
4599  if (function_starts_count > 0) {
4600  uint32_t num_synthetic_function_symbols = 0;
4601  for (i = 0; i < function_starts_count; ++i) {
4602  if (symbols_added.find(function_starts.GetEntryRef(i).addr) ==
4603  symbols_added.end())
4604  ++num_synthetic_function_symbols;
4605  }
4606 
4607  if (num_synthetic_function_symbols > 0) {
4608  if (num_syms < sym_idx + num_synthetic_function_symbols) {
4609  num_syms = sym_idx + num_synthetic_function_symbols;
4610  sym = symtab.Resize(num_syms);
4611  }
4612  for (i = 0; i < function_starts_count; ++i) {
4613  const FunctionStarts::Entry *func_start_entry =
4614  function_starts.GetEntryAtIndex(i);
4615  if (symbols_added.find(func_start_entry->addr) == symbols_added.end()) {
4616  addr_t symbol_file_addr = func_start_entry->addr;
4617  uint32_t symbol_flags = 0;
4618  if (func_start_entry->data)
4619  symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
4620  Address symbol_addr;
4621  if (module_sp->ResolveFileAddress(symbol_file_addr, symbol_addr)) {
4622  SectionSP symbol_section(symbol_addr.GetSection());
4623  uint32_t symbol_byte_size = 0;
4624  if (symbol_section) {
4625  const addr_t section_file_addr = symbol_section->GetFileAddress();
4626  const FunctionStarts::Entry *next_func_start_entry =
4627  function_starts.FindNextEntry(func_start_entry);
4628  const addr_t section_end_file_addr =
4629  section_file_addr + symbol_section->GetByteSize();
4630  if (next_func_start_entry) {
4631  addr_t next_symbol_file_addr = next_func_start_entry->addr;
4632  if (is_arm)
4633  next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4634  symbol_byte_size = std::min<lldb::addr_t>(
4635  next_symbol_file_addr - symbol_file_addr,
4636  section_end_file_addr - symbol_file_addr);
4637  } else {
4638  symbol_byte_size = section_end_file_addr - symbol_file_addr;
4639  }
4640  sym[sym_idx].SetID(synthetic_sym_id++);
4641  // Don't set the name for any synthetic symbols, the Symbol
4642  // object will generate one if needed when the name is accessed
4643  // via accessors.
4644  sym[sym_idx].GetMangled().SetDemangledName(ConstString());
4645  sym[sym_idx].SetType(eSymbolTypeCode);
4646  sym[sym_idx].SetIsSynthetic(true);
4647  sym[sym_idx].GetAddressRef() = symbol_addr;
4648  add_symbol_addr(symbol_addr.GetFileAddress());
4649  if (symbol_flags)
4650  sym[sym_idx].SetFlags(symbol_flags);
4651  if (symbol_byte_size)
4652  sym[sym_idx].SetByteSize(symbol_byte_size);
4653  ++sym_idx;
4654  }
4655  }
4656  }
4657  }
4658  }
4659  }
4660 
4661  // Trim our symbols down to just what we ended up with after removing any
4662  // symbols.
4663  if (sym_idx < num_syms) {
4664  num_syms = sym_idx;
4665  sym = symtab.Resize(num_syms);
4666  }
4667 
4668  // Now synthesize indirect symbols
4669  if (m_dysymtab.nindirectsyms != 0) {
4670  if (indirect_symbol_index_data.GetByteSize()) {
4671  NListIndexToSymbolIndexMap::const_iterator end_index_pos =
4672  m_nlist_idx_to_sym_idx.end();
4673 
4674  for (uint32_t sect_idx = 1; sect_idx < m_mach_sections.size();
4675  ++sect_idx) {
4676  if ((m_mach_sections[sect_idx].flags & SECTION_TYPE) ==
4677  S_SYMBOL_STUBS) {
4678  uint32_t symbol_stub_byte_size = m_mach_sections[sect_idx].reserved2;
4679  if (symbol_stub_byte_size == 0)
4680  continue;
4681 
4682  const uint32_t num_symbol_stubs =
4683  m_mach_sections[sect_idx].size / symbol_stub_byte_size;
4684 
4685  if (num_symbol_stubs == 0)
4686  continue;
4687 
4688  const uint32_t symbol_stub_index_offset =
4689  m_mach_sections[sect_idx].reserved1;
4690  for (uint32_t stub_idx = 0; stub_idx < num_symbol_stubs; ++stub_idx) {
4691  const uint32_t symbol_stub_index =
4692  symbol_stub_index_offset + stub_idx;
4693  const lldb::addr_t symbol_stub_addr =
4694  m_mach_sections[sect_idx].addr +
4695  (stub_idx * symbol_stub_byte_size);
4696  lldb::offset_t symbol_stub_offset = symbol_stub_index * 4;
4697  if (indirect_symbol_index_data.ValidOffsetForDataOfSize(
4698  symbol_stub_offset, 4)) {
4699  const uint32_t stub_sym_id =
4700  indirect_symbol_index_data.GetU32(&symbol_stub_offset);
4701  if (stub_sym_id & (INDIRECT_SYMBOL_ABS | INDIRECT_SYMBOL_LOCAL))
4702  continue;
4703 
4704  NListIndexToSymbolIndexMap::const_iterator index_pos =
4705  m_nlist_idx_to_sym_idx.find(stub_sym_id);
4706  Symbol *stub_symbol = nullptr;
4707  if (index_pos != end_index_pos) {
4708  // We have a remapping from the original nlist index to a
4709  // current symbol index, so just look this up by index
4710  stub_symbol = symtab.SymbolAtIndex(index_pos->second);
4711  } else {
4712  // We need to lookup a symbol using the original nlist symbol
4713  // index since this index is coming from the S_SYMBOL_STUBS
4714  stub_symbol = symtab.FindSymbolByID(stub_sym_id);
4715  }
4716 
4717  if (stub_symbol) {
4718  Address so_addr(symbol_stub_addr, section_list);
4719 
4720  if (stub_symbol->GetType() == eSymbolTypeUndefined) {
4721  // Change the external symbol into a trampoline that makes
4722  // sense These symbols were N_UNDF N_EXT, and are useless
4723  // to us, so we can re-use them so we don't have to make up
4724  // a synthetic symbol for no good reason.
4725  if (resolver_addresses.find(symbol_stub_addr) ==
4726  resolver_addresses.end())
4727  stub_symbol->SetType(eSymbolTypeTrampoline);
4728  else
4729  stub_symbol->SetType(eSymbolTypeResolver);
4730  stub_symbol->SetExternal(false);
4731  stub_symbol->GetAddressRef() = so_addr;
4732  stub_symbol->SetByteSize(symbol_stub_byte_size);
4733  } else {
4734  // Make a synthetic symbol to describe the trampoline stub
4735  Mangled stub_symbol_mangled_name(stub_symbol->GetMangled());
4736  if (sym_idx >= num_syms) {
4737  sym = symtab.Resize(++num_syms);
4738  stub_symbol = nullptr; // this pointer no longer valid
4739  }
4740  sym[sym_idx].SetID(synthetic_sym_id++);
4741  sym[sym_idx].GetMangled() = stub_symbol_mangled_name;
4742  if (resolver_addresses.find(symbol_stub_addr) ==
4743  resolver_addresses.end())
4744  sym[sym_idx].SetType(eSymbolTypeTrampoline);
4745  else
4746  sym[sym_idx].SetType(eSymbolTypeResolver);
4747  sym[sym_idx].SetIsSynthetic(true);
4748  sym[sym_idx].GetAddressRef() = so_addr;
4749  add_symbol_addr(so_addr.GetFileAddress());
4750  sym[sym_idx].SetByteSize(symbol_stub_byte_size);
4751  ++sym_idx;
4752  }
4753  } else {
4754  if (log)
4755  log->Warning("symbol stub referencing symbol table symbol "
4756  "%u that isn't in our minimal symbol table, "
4757  "fix this!!!",
4758  stub_sym_id);
4759  }
4760  }
4761  }
4762  }
4763  }
4764  }
4765  }
4766 
4767  if (!reexport_trie_entries.empty()) {
4768  for (const auto &e : reexport_trie_entries) {
4769  if (e.entry.import_name) {
4770  // Only add indirect symbols from the Trie entries if we didn't have
4771  // a N_INDR nlist entry for this already
4772  if (indirect_symbol_names.find(e.entry.name) ==
4773  indirect_symbol_names.end()) {
4774  // Make a synthetic symbol to describe re-exported symbol.
4775  if (sym_idx >= num_syms)
4776  sym = symtab.Resize(++num_syms);
4777  sym[sym_idx].SetID(synthetic_sym_id++);
4778  sym[sym_idx].GetMangled() = Mangled(e.entry.name);
4779  sym[sym_idx].SetType(eSymbolTypeReExported);
4780  sym[sym_idx].SetIsSynthetic(true);
4781  sym[sym_idx].SetReExportedSymbolName(e.entry.import_name);
4782  if (e.entry.other > 0 && e.entry.other <= dylib_files.GetSize()) {
4783  sym[sym_idx].SetReExportedSymbolSharedLibrary(
4784  dylib_files.GetFileSpecAtIndex(e.entry.other - 1));
4785  }
4786  ++sym_idx;
4787  }
4788  }
4789  }
4790  }
4791 }
4792 
4794  ModuleSP module_sp(GetModule());
4795  if (module_sp) {
4796  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
4797  s->Printf("%p: ", static_cast<void *>(this));
4798  s->Indent();
4799  if (m_header.magic == MH_MAGIC_64 || m_header.magic == MH_CIGAM_64)
4800  s->PutCString("ObjectFileMachO64");
4801  else
4802  s->PutCString("ObjectFileMachO32");
4803 
4804  *s << ", file = '" << m_file;
4805  ModuleSpecList all_specs;
4806  ModuleSpec base_spec;
4808  base_spec, all_specs);
4809  for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
4810  *s << "', triple";
4811  if (e)
4812  s->Printf("[%d]", i);
4813  *s << " = ";
4814  *s << all_specs.GetModuleSpecRefAtIndex(i)
4815  .GetArchitecture()
4816  .GetTriple()
4817  .getTriple();
4818  }
4819  *s << "\n";
4820  SectionList *sections = GetSectionList();
4821  if (sections)
4822  sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
4823  UINT32_MAX);
4824 
4825  if (m_symtab_up)
4826  m_symtab_up->Dump(s, nullptr, eSortOrderNone);
4827  }
4828 }
4829 
4830 UUID ObjectFileMachO::GetUUID(const llvm::MachO::mach_header &header,
4831  const lldb_private::DataExtractor &data,
4832  lldb::offset_t lc_offset) {
4833  uint32_t i;
4834  llvm::MachO::uuid_command load_cmd;
4835 
4836  lldb::offset_t offset = lc_offset;
4837  for (i = 0; i < header.ncmds; ++i) {
4838  const lldb::offset_t cmd_offset = offset;
4839  if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
4840  break;
4841 
4842  if (load_cmd.cmd == LC_UUID) {
4843  const uint8_t *uuid_bytes = data.PeekData(offset, 16);
4844 
4845  if (uuid_bytes) {
4846  // OpenCL on Mac OS X uses the same UUID for each of its object files.
4847  // We pretend these object files have no UUID to prevent crashing.
4848 
4849  const uint8_t opencl_uuid[] = {0x8c, 0x8e, 0xb3, 0x9b, 0x3b, 0xa8,
4850  0x4b, 0x16, 0xb6, 0xa4, 0x27, 0x63,
4851  0xbb, 0x14, 0xf0, 0x0d};
4852 
4853  if (!memcmp(uuid_bytes, opencl_uuid, 16))
4854  return UUID();
4855 
4856  return UUID(uuid_bytes, 16);
4857  }
4858  return UUID();
4859  }
4860  offset = cmd_offset + load_cmd.cmdsize;
4861  }
4862  return UUID();
4863 }
4864 
4865 static llvm::StringRef GetOSName(uint32_t cmd) {
4866  switch (cmd) {
4867  case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
4868  return llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4869  case llvm::MachO::LC_VERSION_MIN_MACOSX:
4870  return llvm::Triple::getOSTypeName(llvm::Triple::MacOSX);
4871  case llvm::MachO::LC_VERSION_MIN_TVOS:
4872  return llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4873  case llvm::MachO::LC_VERSION_MIN_WATCHOS:
4874  return llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4875  default:
4876  llvm_unreachable("unexpected LC_VERSION load command");
4877  }
4878 }
4879 
4880 namespace {
4881 struct OSEnv {
4882  llvm::StringRef os_type;
4883  llvm::StringRef environment;
4884  OSEnv(uint32_t cmd) {
4885  switch (cmd) {
4886  case llvm::MachO::PLATFORM_MACOS:
4887  os_type = llvm::Triple::getOSTypeName(llvm::Triple::MacOSX);
4888  return;
4889  case llvm::MachO::PLATFORM_IOS:
4890  os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4891  return;
4892  case llvm::MachO::PLATFORM_TVOS:
4893  os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4894  return;
4895  case llvm::MachO::PLATFORM_WATCHOS:
4896  os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4897  return;
4898  // TODO: add BridgeOS & DriverKit once in llvm/lib/Support/Triple.cpp
4899  // NEED_BRIDGEOS_TRIPLE
4900  // case llvm::MachO::PLATFORM_BRIDGEOS:
4901  // os_type = llvm::Triple::getOSTypeName(llvm::Triple::BridgeOS);
4902  // return;
4903  // case llvm::MachO::PLATFORM_DRIVERKIT:
4904  // os_type = llvm::Triple::getOSTypeName(llvm::Triple::DriverKit);
4905  // return;
4906  case llvm::MachO::PLATFORM_MACCATALYST:
4907  os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4908  environment = llvm::Triple::getEnvironmentTypeName(llvm::Triple::MacABI);
4909  return;
4910  case llvm::MachO::PLATFORM_IOSSIMULATOR:
4911  os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4912  environment =
4913  llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4914  return;
4915  case llvm::MachO::PLATFORM_TVOSSIMULATOR:
4916  os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4917  environment =
4918  llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4919  return;
4920  case llvm::MachO::PLATFORM_WATCHOSSIMULATOR:
4921  os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4922  environment =
4923  llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4924  return;
4925  default: {
4926  Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process));
4927  LLDB_LOGF(log, "unsupported platform in LC_BUILD_VERSION");
4928  }
4929  }
4930  }
4931 };
4932 
4933 struct MinOS {
4934  uint32_t major_version, minor_version, patch_version;
4935  MinOS(uint32_t version)
4936  : major_version(version >> 16), minor_version((version >> 8) & 0xffu),
4937  patch_version(version & 0xffu) {}
4938 };
4939 } // namespace
4940 
4941 void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header,
4942  const lldb_private::DataExtractor &data,
4943  lldb::offset_t lc_offset,
4944  ModuleSpec &base_spec,
4945  lldb_private::ModuleSpecList &all_specs) {
4946  auto &base_arch = base_spec.GetArchitecture();
4947  base_arch.SetArchitecture(eArchTypeMachO, header.cputype, header.cpusubtype);
4948  if (!base_arch.IsValid())
4949  return;
4950 
4951  bool found_any = false;
4952  auto add_triple = [&](const llvm::Triple &triple) {
4953  auto spec = base_spec;
4954  spec.GetArchitecture().GetTriple() = triple;
4955  if (spec.GetArchitecture().IsValid()) {
4956  spec.GetUUID() = ObjectFileMachO::GetUUID(header, data, lc_offset);
4957  all_specs.Append(spec);
4958  found_any = true;
4959  }
4960  };
4961 
4962  // Set OS to an unspecified unknown or a "*" so it can match any OS
4963  llvm::Triple base_triple = base_arch.GetTriple();
4964  base_triple.setOS(llvm::Triple::UnknownOS);
4965  base_triple.setOSName(llvm::StringRef());
4966 
4967  if (header.filetype == MH_PRELOAD) {
4968  if (header.cputype == CPU_TYPE_ARM) {
4969  // If this is a 32-bit arm binary, and it's a standalone binary, force
4970  // the Vendor to Apple so we don't accidentally pick up the generic
4971  // armv7 ABI at runtime. Apple's armv7 ABI always uses r7 for the
4972  // frame pointer register; most other armv7 ABIs use a combination of
4973  // r7 and r11.
4974  base_triple.setVendor(llvm::Triple::Apple);
4975  } else {
4976  // Set vendor to an unspecified unknown or a "*" so it can match any
4977  // vendor This is required for correct behavior of EFI debugging on
4978  // x86_64
4979  base_triple.setVendor(llvm::Triple::UnknownVendor);
4980  base_triple.setVendorName(llvm::StringRef());
4981  }
4982  return add_triple(base_triple);
4983  }
4984 
4985  llvm::MachO::load_command load_cmd;
4986 
4987  // See if there is an LC_VERSION_MIN_* load command that can give
4988  // us the OS type.
4989  lldb::offset_t offset = lc_offset;
4990  for (uint32_t i = 0; i < header.ncmds; ++i) {
4991  const lldb::offset_t cmd_offset = offset;
4992  if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
4993  break;
4994 
4995  llvm::MachO::version_min_command version_min;
4996  switch (load_cmd.cmd) {
4997  case llvm::MachO::LC_VERSION_MIN_MACOSX:
4998  case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
4999  case llvm::MachO::LC_VERSION_MIN_TVOS:
5000  case llvm::MachO::LC_VERSION_MIN_WATCHOS: {
5001  if (load_cmd.cmdsize != sizeof(version_min))
5002  break;
5003  if (data.ExtractBytes(cmd_offset, sizeof(version_min),
5004  data.GetByteOrder(), &version_min) == 0)
5005  break;
5006  MinOS min_os(version_min.version);
5007  llvm::SmallString<32> os_name;
5008  llvm::raw_svector_ostream os(os_name);
5009  os << GetOSName(load_cmd.cmd) << min_os.major_version << '.'
5010  << min_os.minor_version << '.' << min_os.patch_version;
5011 
5012  auto triple = base_triple;
5013  triple.setOSName(os.str());
5014 
5015  // Disambiguate legacy simulator platforms.
5016  if (load_cmd.cmd != llvm::MachO::LC_VERSION_MIN_MACOSX &&
5017  (base_triple.getArch() == llvm::Triple::x86_64 ||
5018  base_triple.getArch() == llvm::Triple::x86)) {
5019  // The combination of legacy LC_VERSION_MIN load command and
5020  // x86 architecture always indicates a simulator environment.
5021  // The combination of LC_VERSION_MIN and arm architecture only
5022  // appears for native binaries. Back-deploying simulator
5023  // binaries on Apple Silicon Macs use the modern unambigous
5024  // LC_BUILD_VERSION load commands; no special handling required.
5025  triple.setEnvironment(llvm::Triple::Simulator);
5026  }
5027  add_triple(triple);
5028  break;
5029  }
5030  default:
5031  break;
5032  }
5033 
5034  offset = cmd_offset + load_cmd.cmdsize;
5035  }
5036 
5037  // See if there are LC_BUILD_VERSION load commands that can give
5038  // us the OS type.
5039  offset = lc_offset;
5040  for (uint32_t i = 0; i < header.ncmds; ++i) {
5041  const lldb::offset_t cmd_offset = offset;
5042  if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
5043  break;
5044 
5045  do {
5046  if (load_cmd.cmd == llvm::MachO::LC_BUILD_VERSION) {
5047  llvm::MachO::build_version_command build_version;
5048  if (load_cmd.cmdsize < sizeof(build_version)) {
5049  // Malformed load command.
5050  break;
5051  }
5052  if (data.ExtractBytes(cmd_offset, sizeof(build_version),
5053  data.GetByteOrder(), &build_version) == 0)
5054  break;
5055  MinOS min_os(build_version.minos);
5056  OSEnv os_env(build_version.platform);
5057  llvm::SmallString<16> os_name;
5058  llvm::raw_svector_ostream os(os_name);
5059  os << os_env.os_type << min_os.major_version << '.'
5060  << min_os.minor_version << '.' << min_os.patch_version;
5061  auto triple = base_triple;
5062  triple.setOSName(os.str());
5063  os_name.clear();
5064  if (!os_env.environment.empty())
5065  triple.setEnvironmentName(os_env.environment);
5066  add_triple(triple);
5067  }
5068  } while (false);
5069  offset = cmd_offset + load_cmd.cmdsize;
5070  }
5071 
5072  if (!found_any) {
5073  add_triple(base_triple);
5074  }
5075 }
5076 
5078  ModuleSP module_sp, const llvm::MachO::mach_header &header,
5079  const lldb_private::DataExtractor &data, lldb::offset_t lc_offset) {
5080  ModuleSpecList all_specs;
5081  ModuleSpec base_spec;
5082  GetAllArchSpecs(header, data, MachHeaderSizeFromMagic(header.magic),
5083  base_spec, all_specs);
5084 
5085  // If the object file offers multiple alternative load commands,
5086  // pick the one that matches the module.
5087  if (module_sp) {
5088  const ArchSpec &module_arch = module_sp->GetArchitecture();
5089  for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
5090  ArchSpec mach_arch =
5092  if (module_arch.IsCompatibleMatch(mach_arch))
5093  return mach_arch;
5094  }
5095  }
5096 
5097  // Return the first arch we found.
5098  if (all_specs.GetSize() == 0)
5099  return {};
5100  return all_specs.GetModuleSpecRefAtIndex(0).GetArchitecture();
5101 }
5102 
5104  ModuleSP module_sp(GetModule());
5105  if (module_sp) {
5106  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5108  return GetUUID(m_header, m_data, offset);
5109  }
5110  return UUID();
5111 }
5112 
5114  uint32_t count = 0;
5115  ModuleSP module_sp(GetModule());
5116  if (module_sp) {
5117  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5118  llvm::MachO::load_command load_cmd;
5120  std::vector<std::string> rpath_paths;
5121  std::vector<std::string> rpath_relative_paths;
5122  std::vector<std::string> at_exec_relative_paths;
5123  uint32_t i;
5124  for (i = 0; i < m_header.ncmds; ++i) {
5125  const uint32_t cmd_offset = offset;
5126  if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
5127  break;
5128 
5129  switch (load_cmd.cmd) {
5130  case LC_RPATH:
5131  case LC_LOAD_DYLIB:
5132  case LC_LOAD_WEAK_DYLIB:
5133  case LC_REEXPORT_DYLIB:
5134  case LC_LOAD_DYLINKER:
5135  case LC_LOADFVMLIB:
5136  case LC_LOAD_UPWARD_DYLIB: {
5137  uint32_t name_offset = cmd_offset + m_data.GetU32(&offset);
5138  const char *path = m_data.PeekCStr(name_offset);
5139  if (path) {
5140  if (load_cmd.cmd == LC_RPATH)
5141  rpath_paths.push_back(path);
5142  else {
5143  if (path[0] == '@') {
5144  if (strncmp(path, "@rpath", strlen("@rpath")) == 0)
5145  rpath_relative_paths.push_back(path + strlen("@rpath"));
5146  else if (strncmp(path, "@executable_path",
5147  strlen("@executable_path")) == 0)
5148  at_exec_relative_paths.push_back(path +
5149  strlen("@executable_path"));
5150  } else {
5151  FileSpec file_spec(path);
5152  if (files.AppendIfUnique(file_spec))
5153  count++;
5154  }
5155  }
5156  }
5157  } break;
5158 
5159  default:
5160  break;
5161  }
5162  offset = cmd_offset + load_cmd.cmdsize;
5163  }
5164 
5165  FileSpec this_file_spec(m_file);
5166  FileSystem::Instance().Resolve(this_file_spec);
5167 
5168  if (!rpath_paths.empty()) {
5169  // Fixup all LC_RPATH values to be absolute paths
5170  std::string loader_path("@loader_path");
5171  std::string executable_path("@executable_path");
5172  for (auto &rpath : rpath_paths) {
5173  if (llvm::StringRef(rpath).startswith(loader_path)) {
5174  rpath.erase(0, loader_path.size());
5175  rpath.insert(0, this_file_spec.GetDirectory().GetCString());
5176  } else if (llvm::StringRef(rpath).startswith(executable_path)) {
5177  rpath.erase(0, executable_path.size());
5178  rpath.insert(0, this_file_spec.GetDirectory().GetCString());
5179  }
5180  }
5181 
5182  for (const auto &rpath_relative_path : rpath_relative_paths) {
5183  for (const auto &rpath : rpath_paths) {
5184  std::string path = rpath;
5185  path += rpath_relative_path;
5186  // It is OK to resolve this path because we must find a file on disk
5187  // for us to accept it anyway if it is rpath relative.
5188  FileSpec file_spec(path);
5189  FileSystem::Instance().Resolve(file_spec);
5190  if (FileSystem::Instance().Exists(file_spec) &&
5191  files.AppendIfUnique(file_spec)) {
5192  count++;
5193  break;
5194  }
5195  }
5196  }
5197  }
5198 
5199  // We may have @executable_paths but no RPATHS. Figure those out here.
5200  // Only do this if this object file is the executable. We have no way to
5201  // get back to the actual executable otherwise, so we won't get the right
5202  // path.
5203  if (!at_exec_relative_paths.empty() && CalculateType() == eTypeExecutable) {
5204  FileSpec exec_dir = this_file_spec.CopyByRemovingLastPathComponent();
5205  for (const auto &at_exec_relative_path : at_exec_relative_paths) {
5206  FileSpec file_spec =
5207  exec_dir.CopyByAppendingPathComponent(at_exec_relative_path);
5208  if (FileSystem::Instance().Exists(file_spec) &&
5209  files.AppendIfUnique(file_spec))
5210  count++;
5211  }
5212  }
5213  }
5214  return count;
5215 }
5216 
5218  // If the object file is not an executable it can't hold the entry point.
5219  // m_entry_point_address is initialized to an invalid address, so we can just
5220  // return that. If m_entry_point_address is valid it means we've found it
5221  // already, so return the cached value.
5222 
5223  if ((!IsExecutable() && !IsDynamicLoader()) ||
5225  return m_entry_point_address;
5226  }
5227 
5228  // Otherwise, look for the UnixThread or Thread command. The data for the
5229  // Thread command is given in /usr/include/mach-o.h, but it is basically:
5230  //
5231  // uint32_t flavor - this is the flavor argument you would pass to
5232  // thread_get_state
5233  // uint32_t count - this is the count of longs in the thread state data
5234  // struct XXX_thread_state state - this is the structure from
5235  // <machine/thread_status.h> corresponding to the flavor.
5236  // <repeat this trio>
5237  //
5238  // So we just keep reading the various register flavors till we find the GPR
5239  // one, then read the PC out of there.
5240  // FIXME: We will need to have a "RegisterContext data provider" class at some
5241  // point that can get all the registers
5242  // out of data in this form & attach them to a given thread. That should
5243  // underlie the MacOS X User process plugin, and we'll also need it for the
5244  // MacOS X Core File process plugin. When we have that we can also use it
5245  // here.
5246  //
5247  // For now we hard-code the offsets and flavors we need:
5248  //
5249  //
5250 
5251  ModuleSP module_sp(GetModule());
5252  if (module_sp) {
5253  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5254  llvm::MachO::load_command load_cmd;
5256  uint32_t i;
5257  lldb::addr_t start_address = LLDB_INVALID_ADDRESS;
5258  bool done = false;
5259 
5260  for (i = 0; i < m_header.ncmds; ++i) {
5261  const lldb::offset_t cmd_offset = offset;
5262  if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
5263  break;
5264 
5265  switch (load_cmd.cmd) {
5266  case LC_UNIXTHREAD:
5267  case LC_THREAD: {
5268  while (offset < cmd_offset + load_cmd.cmdsize) {
5269  uint32_t flavor = m_data.GetU32(&offset);
5270  uint32_t count = m_data.GetU32(&offset);
5271  if (count == 0) {
5272  // We've gotten off somehow, log and exit;
5273  return m_entry_point_address;
5274  }
5275 
5276  switch (m_header.cputype) {
5277  case llvm::MachO::CPU_TYPE_ARM:
5278  if (flavor == 1 ||
5279  flavor == 9) // ARM_THREAD_STATE/ARM_THREAD_STATE32
5280  // from mach/arm/thread_status.h
5281  {
5282  offset += 60; // This is the offset of pc in the GPR thread state
5283  // data structure.
5284  start_address = m_data.GetU32(&offset);
5285  done = true;
5286  }
5287  break;
5290  if (flavor == 6) // ARM_THREAD_STATE64 from mach/arm/thread_status.h
5291  {
5292  offset += 256; // This is the offset of pc in the GPR thread state
5293  // data structure.
5294  start_address = m_data.GetU64(&offset);
5295  done = true;
5296  }
5297  break;
5298  case llvm::MachO::CPU_TYPE_I386:
5299  if (flavor ==
5300  1) // x86_THREAD_STATE32 from mach/i386/thread_status.h
5301  {
5302  offset += 40; // This is the offset of eip in the GPR thread state
5303  // data structure.
5304  start_address = m_data.GetU32(&offset);
5305  done = true;
5306  }
5307  break;
5308  case llvm::MachO::CPU_TYPE_X86_64:
5309  if (flavor ==
5310  4) // x86_THREAD_STATE64 from mach/i386/thread_status.h
5311  {
5312  offset += 16 * 8; // This is the offset of rip in the GPR thread
5313  // state data structure.
5314  start_address = m_data.GetU64(&offset);
5315  done = true;
5316  }
5317  break;
5318  default:
5319  return m_entry_point_address;
5320  }
5321  // Haven't found the GPR flavor yet, skip over the data for this
5322  // flavor:
5323  if (done)
5324  break;
5325  offset += count * 4;
5326  }
5327  } break;
5328  case LC_MAIN: {
5329  ConstString text_segment_name("__TEXT");
5330  uint64_t entryoffset = m_data.GetU64(&offset);
5331  SectionSP text_segment_sp =
5332  GetSectionList()->FindSectionByName(text_segment_name);
5333  if (text_segment_sp) {
5334  done = true;
5335  start_address = text_segment_sp->GetFileAddress() + entryoffset;
5336  }
5337  } break;
5338 
5339  default:
5340  break;
5341  }
5342  if (done)
5343  break;
5344 
5345  // Go to the next load command:
5346  offset = cmd_offset + load_cmd.cmdsize;
5347  }
5348 
5349  if (start_address == LLDB_INVALID_ADDRESS && IsDynamicLoader()) {
5350  if (GetSymtab()) {
5351  Symbol *dyld_start_sym = GetSymtab()->FindFirstSymbolWithNameAndType(
5352  ConstString("_dyld_start"), SymbolType::eSymbolTypeCode,
5353  Symtab::eDebugAny, Symtab::eVisibilityAny);
5354  if (dyld_start_sym && dyld_start_sym->GetAddress().IsValid()) {
5355  start_address = dyld_start_sym->GetAddress().GetFileAddress();
5356  }
5357  }
5358  }
5359 
5360  if (start_address != LLDB_INVALID_ADDRESS) {
5361  // We got the start address from the load commands, so now resolve that
5362  // address in the sections of this ObjectFile:
5364  start_address, GetSectionList())) {
5366  }
5367  } else {
5368  // We couldn't read the UnixThread load command - maybe it wasn't there.
5369  // As a fallback look for the "start" symbol in the main executable.
5370 
5371  ModuleSP module_sp(GetModule());
5372 
5373  if (module_sp) {
5374  SymbolContextList contexts;
5375  SymbolContext context;
5376  module_sp->FindSymbolsWithNameAndType(ConstString("start"),
5377  eSymbolTypeCode, contexts);
5378  if (contexts.GetSize()) {
5379  if (contexts.GetContextAtIndex(0, context))
5381  }
5382  }
5383  }
5384  }
5385 
5386  return m_entry_point_address;
5387 }
5388 
5390  lldb_private::Address header_addr;
5391  SectionList *section_list = GetSectionList();
5392  if (section_list) {
5393  SectionSP text_segment_sp(
5394  section_list->FindSectionByName(GetSegmentNameTEXT()));
5395  if (text_segment_sp) {
5396  header_addr.SetSection(text_segment_sp);
5397  header_addr.SetOffset(0);
5398  }
5399  }
5400  return header_addr;
5401 }
5402 
5404  ModuleSP module_sp(GetModule());
5405  if (module_sp) {
5406  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5410  FileRangeArray::Entry file_range;
5411  llvm::MachO::thread_command thread_cmd;
5412  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5413  const uint32_t cmd_offset = offset;
5414  if (m_data.GetU32(&offset, &thread_cmd, 2) == nullptr)
5415  break;
5416 
5417  if (thread_cmd.cmd == LC_THREAD) {
5418  file_range.SetRangeBase(offset);
5419  file_range.SetByteSize(thread_cmd.cmdsize - 8);
5420  m_thread_context_offsets.Append(file_range);
5421  }
5422  offset = cmd_offset + thread_cmd.cmdsize;
5423  }
5424  }
5425  }
5427 }
5428 
5430  std::string result;
5431  ModuleSP module_sp(GetModule());
5432  if (module_sp) {
5433  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5434 
5435  // First, look over the load commands for an LC_NOTE load command with
5436  // data_owner string "kern ver str" & use that if found.
5438  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5439  const uint32_t cmd_offset = offset;
5440  llvm::MachO::load_command lc = {};
5441  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
5442  break;
5443  if (lc.cmd == LC_NOTE) {
5444  char data_owner[17];
5445  m_data.CopyData(offset, 16, data_owner);
5446  data_owner[16] = '\0';
5447  offset += 16;
5448  uint64_t fileoff = m_data.GetU64_unchecked(&offset);
5449  uint64_t size = m_data.GetU64_unchecked(&offset);
5450 
5451  // "kern ver str" has a uint32_t version and then a nul terminated
5452  // c-string.
5453  if (strcmp("kern ver str", data_owner) == 0) {
5454  offset = fileoff;
5455  uint32_t version;
5456  if (m_data.GetU32(&offset, &version, 1) != nullptr) {
5457  if (version == 1) {
5458  uint32_t strsize = size - sizeof(uint32_t);
5459  char *buf = (char *)malloc(strsize);
5460  if (buf) {
5461  m_data.CopyData(offset, strsize, buf);
5462  buf[strsize - 1] = '\0';
5463  result = buf;
5464  if (buf)
5465  free(buf);
5466  return result;
5467  }
5468  }
5469  }
5470  }
5471  }
5472  offset = cmd_offset + lc.cmdsize;
5473  }
5474 
5475  // Second, make a pass over the load commands looking for an obsolete
5476  // LC_IDENT load command.
5477  offset = MachHeaderSizeFromMagic(m_header.magic);
5478  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5479  const uint32_t cmd_offset = offset;
5480  llvm::MachO::ident_command ident_command;
5481  if (m_data.GetU32(&offset, &ident_command, 2) == nullptr)
5482  break;
5483  if (ident_command.cmd == LC_IDENT && ident_command.cmdsize != 0) {
5484  char *buf = (char *)malloc(ident_command.cmdsize);
5485  if (buf != nullptr && m_data.CopyData(offset, ident_command.cmdsize,
5486  buf) == ident_command.cmdsize) {
5487  buf[ident_command.cmdsize - 1] = '\0';
5488  result = buf;
5489  }
5490  if (buf)
5491  free(buf);
5492  }
5493  offset = cmd_offset + ident_command.cmdsize;
5494  }
5495  }
5496  return result;
5497 }
5498 
5500  addr_t mask = 0;
5501  ModuleSP module_sp(GetModule());
5502  if (module_sp) {
5503  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5505  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5506  const uint32_t cmd_offset = offset;
5507  llvm::MachO::load_command lc = {};
5508  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
5509  break;
5510  if (lc.cmd == LC_NOTE) {
5511  char data_owner[17];
5512  m_data.CopyData(offset, 16, data_owner);
5513  data_owner[16] = '\0';
5514  offset += 16;
5515  uint64_t fileoff = m_data.GetU64_unchecked(&offset);
5516 
5517  // "addrable bits" has a uint32_t version and a uint32_t
5518  // number of bits used in addressing.
5519  if (strcmp("addrable bits", data_owner) == 0) {
5520  offset = fileoff;
5521  uint32_t version;
5522  if (m_data.GetU32(&offset, &version, 1) != nullptr) {
5523  if (version == 3) {
5524  uint32_t num_addr_bits = m_data.GetU32_unchecked(&offset);
5525  if (num_addr_bits != 0) {
5526  mask = ~((1ULL << num_addr_bits) - 1);
5527  }
5528  break;
5529  }
5530  }
5531  }
5532  }
5533  offset = cmd_offset + lc.cmdsize;
5534  }
5535  }
5536  return mask;
5537 }
5538 
5540  bool &value_is_offset,
5541  UUID &uuid,
5542  ObjectFile::BinaryType &type) {
5543  value = LLDB_INVALID_ADDRESS;
5544  value_is_offset = false;
5545  uuid.Clear();
5546  uint32_t log2_pagesize = 0; // not currently passed up to caller
5547  uint32_t platform = 0; // not currently passed up to caller
5548  ModuleSP module_sp(GetModule());
5549  if (module_sp) {
5550  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5552  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5553  const uint32_t cmd_offset = offset;
5554  llvm::MachO::load_command lc = {};
5555  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
5556  break;
5557  if (lc.cmd == LC_NOTE) {
5558  char data_owner[17];
5559  memset(data_owner, 0, sizeof(data_owner));
5560  m_data.CopyData(offset, 16, data_owner);
5561  offset += 16;
5562  uint64_t fileoff = m_data.GetU64_unchecked(&offset);
5563  uint64_t size = m_data.GetU64_unchecked(&offset);
5564 
5565  // struct main_bin_spec
5566  // {
5567  // uint32_t version; // currently 2
5568  // uint32_t type; // 0 == unspecified, 1 == kernel,
5569  // // 2 == user process,
5570  // // 3 == standalone binary
5571  // uint64_t address; // UINT64_MAX if address not specified
5572  // uint64_t slide; // slide, UINT64_MAX if unspecified
5573  // // 0 if no slide needs to be applied to
5574  // // file address
5575  // uuid_t uuid; // all zero's if uuid not specified
5576  // uint32_t log2_pagesize; // process page size in log base 2,
5577  // // e.g. 4k pages are 12.
5578  // // 0 for unspecified
5579  // uint32_t platform; // The Mach-O platform for this corefile.
5580  // // 0 for unspecified.
5581  // // The values are defined in
5582  // // <mach-o/loader.h>, PLATFORM_*.
5583  // } __attribute((packed));
5584 
5585  // "main bin spec" (main binary specification) data payload is
5586  // formatted:
5587  // uint32_t version [currently 1]
5588  // uint32_t type [0 == unspecified, 1 == kernel,
5589  // 2 == user process, 3 == firmware ]
5590  // uint64_t address [ UINT64_MAX if address not specified ]
5591  // uuid_t uuid [ all zero's if uuid not specified ]
5592  // uint32_t log2_pagesize [ process page size in log base
5593  // 2, e.g. 4k pages are 12.
5594  // 0 for unspecified ]
5595  // uint32_t unused [ for alignment ]
5596 
5597  if (strcmp("main bin spec", data_owner) == 0 && size >= 32) {
5598  offset = fileoff;
5599  uint32_t version;
5600  if (m_data.GetU32(&offset, &version, 1) != nullptr && version <= 2) {
5601  uint32_t binspec_type = 0;
5602  uuid_t raw_uuid;
5603  memset(raw_uuid, 0, sizeof(uuid_t));
5604 
5605  if (!m_data.GetU32(&offset, &binspec_type, 1))
5606  return false;
5607  if (!m_data.GetU64(&offset, &value, 1))
5608  return false;
5609  uint64_t slide = LLDB_INVALID_ADDRESS;
5610  if (version > 1 && !m_data.GetU64(&offset, &slide, 1))
5611  return false;
5612  if (value == LLDB_INVALID_ADDRESS &&
5613  slide != LLDB_INVALID_ADDRESS) {
5614  value = slide;
5615  value_is_offset = true;
5616  }
5617 
5618  if (m_data.CopyData(offset, sizeof(uuid_t), raw_uuid) != 0) {
5619  uuid = UUID(raw_uuid, sizeof(uuid_t));
5620  // convert the "main bin spec" type into our
5621  // ObjectFile::BinaryType enum
5622  switch (binspec_type) {
5623  case 0:
5624  type = eBinaryTypeUnknown;
5625  break;
5626  case 1:
5627  type = eBinaryTypeKernel;
5628  break;
5629  case 2:
5630  type = eBinaryTypeUser;
5631  break;
5632  case 3:
5633  type = eBinaryTypeStandalone;
5634  break;
5635  }
5636  if (!m_data.GetU32(&offset, &log2_pagesize, 1))
5637  return false;
5638  if (version > 1 && !m_data.GetU32(&offset, &platform, 1))
5639  return false;
5640  return true;
5641  }
5642  }
5643  }
5644  }
5645  offset = cmd_offset + lc.cmdsize;
5646  }
5647  }
5648  return false;
5649 }
5650 
5651 lldb::RegisterContextSP
5653  lldb_private::Thread &thread) {
5654  lldb::RegisterContextSP reg_ctx_sp;
5655 
5656  ModuleSP module_sp(GetModule());
5657  if (module_sp) {
5658  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5661 
5662  const FileRangeArray::Entry *thread_context_file_range =
5664  if (thread_context_file_range) {
5665 
5666  DataExtractor data(m_data, thread_context_file_range->GetRangeBase(),
5667  thread_context_file_range->GetByteSize());
5668 
5669  switch (m_header.cputype) {
5672  reg_ctx_sp =
5673  std::make_shared<RegisterContextDarwin_arm64_Mach>(thread, data);
5674  break;
5675 
5676  case llvm::MachO::CPU_TYPE_ARM:
5677  reg_ctx_sp =
5678  std::make_shared<RegisterContextDarwin_arm_Mach>(thread, data);
5679  break;
5680 
5681  case llvm::MachO::CPU_TYPE_I386:
5682  reg_ctx_sp =
5683  std::make_shared<RegisterContextDarwin_i386_Mach>(thread, data);
5684  break;
5685 
5686  case llvm::MachO::CPU_TYPE_X86_64:
5687  reg_ctx_sp =
5688  std::make_shared<RegisterContextDarwin_x86_64_Mach>(thread, data);
5689  break;
5690  }
5691  }
5692  }
5693  return reg_ctx_sp;
5694 }
5695 
5697  switch (m_header.filetype) {
5698  case MH_OBJECT: // 0x1u
5699  if (GetAddressByteSize() == 4) {
5700  // 32 bit kexts are just object files, but they do have a valid
5701  // UUID load command.
5702  if (GetUUID()) {
5703  // this checking for the UUID load command is not enough we could
5704  // eventually look for the symbol named "OSKextGetCurrentIdentifier" as
5705  // this is required of kexts
5706  if (m_strata == eStrataInvalid)