LLDB  mainline
ObjectFileMachO.cpp
Go to the documentation of this file.
1 //===-- ObjectFileMachO.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/ADT/StringRef.h"
10 
15 #include "lldb/Core/Debugger.h"
16 #include "lldb/Core/FileSpecList.h"
17 #include "lldb/Core/Module.h"
18 #include "lldb/Core/ModuleSpec.h"
20 #include "lldb/Core/Progress.h"
21 #include "lldb/Core/Section.h"
22 #include "lldb/Core/StreamFile.h"
23 #include "lldb/Host/Host.h"
25 #include "lldb/Symbol/ObjectFile.h"
28 #include "lldb/Target/Platform.h"
29 #include "lldb/Target/Process.h"
31 #include "lldb/Target/Target.h"
32 #include "lldb/Target/Thread.h"
33 #include "lldb/Target/ThreadList.h"
34 #include "lldb/Utility/ArchSpec.h"
36 #include "lldb/Utility/FileSpec.h"
37 #include "lldb/Utility/Log.h"
38 #include "lldb/Utility/RangeMap.h"
40 #include "lldb/Utility/Status.h"
42 #include "lldb/Utility/Timer.h"
43 #include "lldb/Utility/UUID.h"
44 
45 #include "lldb/Host/SafeMachO.h"
46 
47 #include "llvm/Support/FormatVariadic.h"
48 #include "llvm/Support/MemoryBuffer.h"
49 
50 #include "ObjectFileMachO.h"
51 
52 #if defined(__APPLE__)
53 #include <TargetConditionals.h>
54 // GetLLDBSharedCacheUUID() needs to call dlsym()
55 #include <dlfcn.h>
56 #endif
57 
58 #ifndef __APPLE__
60 #else
61 #include <uuid/uuid.h>
62 #endif
63 
64 #include <memory>
65 
66 #define THUMB_ADDRESS_BIT_MASK 0xfffffffffffffffeull
67 using namespace lldb;
68 using namespace lldb_private;
69 using namespace llvm::MachO;
70 
72 
73 // Some structure definitions needed for parsing the dyld shared cache files
74 // found on iOS devices.
75 
77  char magic[16]; // e.g. "dyld_v0 i386", "dyld_v1 armv7", etc.
78  uint32_t mappingOffset; // file offset to first dyld_cache_mapping_info
79  uint32_t mappingCount; // number of dyld_cache_mapping_info entries
82  uint64_t dyldBaseAddress;
85  uint64_t slideInfoOffset;
86  uint64_t slideInfoSize;
88  uint64_t localSymbolsSize;
89  uint8_t uuid[16]; // v1 and above, also recorded in dyld_all_image_infos v13
90  // and later
91 };
92 
94  uint64_t address;
95  uint64_t size;
96  uint64_t fileOffset;
99 };
100 
108 };
113 };
114 
115 static void PrintRegisterValue(RegisterContext *reg_ctx, const char *name,
116  const char *alt_name, size_t reg_byte_size,
117  Stream &data) {
118  const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name);
119  if (reg_info == nullptr)
120  reg_info = reg_ctx->GetRegisterInfoByName(alt_name);
121  if (reg_info) {
122  lldb_private::RegisterValue reg_value;
123  if (reg_ctx->ReadRegister(reg_info, reg_value)) {
124  if (reg_info->byte_size >= reg_byte_size)
125  data.Write(reg_value.GetBytes(), reg_byte_size);
126  else {
127  data.Write(reg_value.GetBytes(), reg_info->byte_size);
128  for (size_t i = 0, n = reg_byte_size - reg_info->byte_size; i < n; ++i)
129  data.PutChar(0);
130  }
131  return;
132  }
133  }
134  // Just write zeros if all else fails
135  for (size_t i = 0; i < reg_byte_size; ++i)
136  data.PutChar(0);
137 }
138 
140 public:
142  const DataExtractor &data)
143  : RegisterContextDarwin_x86_64(thread, 0) {
144  SetRegisterDataFrom_LC_THREAD(data);
145  }
146 
147  void InvalidateAllRegisters() override {
148  // Do nothing... registers are always valid...
149  }
150 
152  lldb::offset_t offset = 0;
153  SetError(GPRRegSet, Read, -1);
154  SetError(FPURegSet, Read, -1);
155  SetError(EXCRegSet, Read, -1);
156  bool done = false;
157 
158  while (!done) {
159  int flavor = data.GetU32(&offset);
160  if (flavor == 0)
161  done = true;
162  else {
163  uint32_t i;
164  uint32_t count = data.GetU32(&offset);
165  switch (flavor) {
166  case GPRRegSet:
167  for (i = 0; i < count; ++i)
168  (&gpr.rax)[i] = data.GetU64(&offset);
169  SetError(GPRRegSet, Read, 0);
170  done = true;
171 
172  break;
173  case FPURegSet:
174  // TODO: fill in FPU regs....
175  // SetError (FPURegSet, Read, -1);
176  done = true;
177 
178  break;
179  case EXCRegSet:
180  exc.trapno = data.GetU32(&offset);
181  exc.err = data.GetU32(&offset);
182  exc.faultvaddr = data.GetU64(&offset);
183  SetError(EXCRegSet, Read, 0);
184  done = true;
185  break;
186  case 7:
187  case 8:
188  case 9:
189  // fancy flavors that encapsulate of the above flavors...
190  break;
191 
192  default:
193  done = true;
194  break;
195  }
196  }
197  }
198  }
199 
200  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
201  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
202  if (reg_ctx_sp) {
203  RegisterContext *reg_ctx = reg_ctx_sp.get();
204 
205  data.PutHex32(GPRRegSet); // Flavor
206  data.PutHex32(GPRWordCount);
207  PrintRegisterValue(reg_ctx, "rax", nullptr, 8, data);
208  PrintRegisterValue(reg_ctx, "rbx", nullptr, 8, data);
209  PrintRegisterValue(reg_ctx, "rcx", nullptr, 8, data);
210  PrintRegisterValue(reg_ctx, "rdx", nullptr, 8, data);
211  PrintRegisterValue(reg_ctx, "rdi", nullptr, 8, data);
212  PrintRegisterValue(reg_ctx, "rsi", nullptr, 8, data);
213  PrintRegisterValue(reg_ctx, "rbp", nullptr, 8, data);
214  PrintRegisterValue(reg_ctx, "rsp", nullptr, 8, data);
215  PrintRegisterValue(reg_ctx, "r8", nullptr, 8, data);
216  PrintRegisterValue(reg_ctx, "r9", nullptr, 8, data);
217  PrintRegisterValue(reg_ctx, "r10", nullptr, 8, data);
218  PrintRegisterValue(reg_ctx, "r11", nullptr, 8, data);
219  PrintRegisterValue(reg_ctx, "r12", nullptr, 8, data);
220  PrintRegisterValue(reg_ctx, "r13", nullptr, 8, data);
221  PrintRegisterValue(reg_ctx, "r14", nullptr, 8, data);
222  PrintRegisterValue(reg_ctx, "r15", nullptr, 8, data);
223  PrintRegisterValue(reg_ctx, "rip", nullptr, 8, data);
224  PrintRegisterValue(reg_ctx, "rflags", nullptr, 8, data);
225  PrintRegisterValue(reg_ctx, "cs", nullptr, 8, data);
226  PrintRegisterValue(reg_ctx, "fs", nullptr, 8, data);
227  PrintRegisterValue(reg_ctx, "gs", nullptr, 8, data);
228 
229  // // Write out the FPU registers
230  // const size_t fpu_byte_size = sizeof(FPU);
231  // size_t bytes_written = 0;
232  // data.PutHex32 (FPURegSet);
233  // data.PutHex32 (fpu_byte_size/sizeof(uint64_t));
234  // bytes_written += data.PutHex32(0); // uint32_t pad[0]
235  // bytes_written += data.PutHex32(0); // uint32_t pad[1]
236  // bytes_written += WriteRegister (reg_ctx, "fcw", "fctrl", 2,
237  // data); // uint16_t fcw; // "fctrl"
238  // bytes_written += WriteRegister (reg_ctx, "fsw" , "fstat", 2,
239  // data); // uint16_t fsw; // "fstat"
240  // bytes_written += WriteRegister (reg_ctx, "ftw" , "ftag", 1,
241  // data); // uint8_t ftw; // "ftag"
242  // bytes_written += data.PutHex8 (0); // uint8_t pad1;
243  // bytes_written += WriteRegister (reg_ctx, "fop" , NULL, 2,
244  // data); // uint16_t fop; // "fop"
245  // bytes_written += WriteRegister (reg_ctx, "fioff", "ip", 4,
246  // data); // uint32_t ip; // "fioff"
247  // bytes_written += WriteRegister (reg_ctx, "fiseg", NULL, 2,
248  // data); // uint16_t cs; // "fiseg"
249  // bytes_written += data.PutHex16 (0); // uint16_t pad2;
250  // bytes_written += WriteRegister (reg_ctx, "dp", "fooff" , 4,
251  // data); // uint32_t dp; // "fooff"
252  // bytes_written += WriteRegister (reg_ctx, "foseg", NULL, 2,
253  // data); // uint16_t ds; // "foseg"
254  // bytes_written += data.PutHex16 (0); // uint16_t pad3;
255  // bytes_written += WriteRegister (reg_ctx, "mxcsr", NULL, 4,
256  // data); // uint32_t mxcsr;
257  // bytes_written += WriteRegister (reg_ctx, "mxcsrmask", NULL,
258  // 4, data);// uint32_t mxcsrmask;
259  // bytes_written += WriteRegister (reg_ctx, "stmm0", NULL,
260  // sizeof(MMSReg), data);
261  // bytes_written += WriteRegister (reg_ctx, "stmm1", NULL,
262  // sizeof(MMSReg), data);
263  // bytes_written += WriteRegister (reg_ctx, "stmm2", NULL,
264  // sizeof(MMSReg), data);
265  // bytes_written += WriteRegister (reg_ctx, "stmm3", NULL,
266  // sizeof(MMSReg), data);
267  // bytes_written += WriteRegister (reg_ctx, "stmm4", NULL,
268  // sizeof(MMSReg), data);
269  // bytes_written += WriteRegister (reg_ctx, "stmm5", NULL,
270  // sizeof(MMSReg), data);
271  // bytes_written += WriteRegister (reg_ctx, "stmm6", NULL,
272  // sizeof(MMSReg), data);
273  // bytes_written += WriteRegister (reg_ctx, "stmm7", NULL,
274  // sizeof(MMSReg), data);
275  // bytes_written += WriteRegister (reg_ctx, "xmm0" , NULL,
276  // sizeof(XMMReg), data);
277  // bytes_written += WriteRegister (reg_ctx, "xmm1" , NULL,
278  // sizeof(XMMReg), data);
279  // bytes_written += WriteRegister (reg_ctx, "xmm2" , NULL,
280  // sizeof(XMMReg), data);
281  // bytes_written += WriteRegister (reg_ctx, "xmm3" , NULL,
282  // sizeof(XMMReg), data);
283  // bytes_written += WriteRegister (reg_ctx, "xmm4" , NULL,
284  // sizeof(XMMReg), data);
285  // bytes_written += WriteRegister (reg_ctx, "xmm5" , NULL,
286  // sizeof(XMMReg), data);
287  // bytes_written += WriteRegister (reg_ctx, "xmm6" , NULL,
288  // sizeof(XMMReg), data);
289  // bytes_written += WriteRegister (reg_ctx, "xmm7" , NULL,
290  // sizeof(XMMReg), data);
291  // bytes_written += WriteRegister (reg_ctx, "xmm8" , NULL,
292  // sizeof(XMMReg), data);
293  // bytes_written += WriteRegister (reg_ctx, "xmm9" , NULL,
294  // sizeof(XMMReg), data);
295  // bytes_written += WriteRegister (reg_ctx, "xmm10", NULL,
296  // sizeof(XMMReg), data);
297  // bytes_written += WriteRegister (reg_ctx, "xmm11", NULL,
298  // sizeof(XMMReg), data);
299  // bytes_written += WriteRegister (reg_ctx, "xmm12", NULL,
300  // sizeof(XMMReg), data);
301  // bytes_written += WriteRegister (reg_ctx, "xmm13", NULL,
302  // sizeof(XMMReg), data);
303  // bytes_written += WriteRegister (reg_ctx, "xmm14", NULL,
304  // sizeof(XMMReg), data);
305  // bytes_written += WriteRegister (reg_ctx, "xmm15", NULL,
306  // sizeof(XMMReg), data);
307  //
308  // // Fill rest with zeros
309  // for (size_t i=0, n = fpu_byte_size - bytes_written; i<n; ++
310  // i)
311  // data.PutChar(0);
312 
313  // Write out the EXC registers
314  data.PutHex32(EXCRegSet);
315  data.PutHex32(EXCWordCount);
316  PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data);
317  PrintRegisterValue(reg_ctx, "err", nullptr, 4, data);
318  PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 8, data);
319  return true;
320  }
321  return false;
322  }
323 
324 protected:
325  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; }
326 
327  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; }
328 
329  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; }
330 
331  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
332  return 0;
333  }
334 
335  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
336  return 0;
337  }
338 
339  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
340  return 0;
341  }
342 };
343 
345 public:
347  const DataExtractor &data)
348  : RegisterContextDarwin_i386(thread, 0) {
349  SetRegisterDataFrom_LC_THREAD(data);
350  }
351 
352  void InvalidateAllRegisters() override {
353  // Do nothing... registers are always valid...
354  }
355 
357  lldb::offset_t offset = 0;
358  SetError(GPRRegSet, Read, -1);
359  SetError(FPURegSet, Read, -1);
360  SetError(EXCRegSet, Read, -1);
361  bool done = false;
362 
363  while (!done) {
364  int flavor = data.GetU32(&offset);
365  if (flavor == 0)
366  done = true;
367  else {
368  uint32_t i;
369  uint32_t count = data.GetU32(&offset);
370  switch (flavor) {
371  case GPRRegSet:
372  for (i = 0; i < count; ++i)
373  (&gpr.eax)[i] = data.GetU32(&offset);
374  SetError(GPRRegSet, Read, 0);
375  done = true;
376 
377  break;
378  case FPURegSet:
379  // TODO: fill in FPU regs....
380  // SetError (FPURegSet, Read, -1);
381  done = true;
382 
383  break;
384  case EXCRegSet:
385  exc.trapno = data.GetU32(&offset);
386  exc.err = data.GetU32(&offset);
387  exc.faultvaddr = data.GetU32(&offset);
388  SetError(EXCRegSet, Read, 0);
389  done = true;
390  break;
391  case 7:
392  case 8:
393  case 9:
394  // fancy flavors that encapsulate of the above flavors...
395  break;
396 
397  default:
398  done = true;
399  break;
400  }
401  }
402  }
403  }
404 
405  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
406  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
407  if (reg_ctx_sp) {
408  RegisterContext *reg_ctx = reg_ctx_sp.get();
409 
410  data.PutHex32(GPRRegSet); // Flavor
411  data.PutHex32(GPRWordCount);
412  PrintRegisterValue(reg_ctx, "eax", nullptr, 4, data);
413  PrintRegisterValue(reg_ctx, "ebx", nullptr, 4, data);
414  PrintRegisterValue(reg_ctx, "ecx", nullptr, 4, data);
415  PrintRegisterValue(reg_ctx, "edx", nullptr, 4, data);
416  PrintRegisterValue(reg_ctx, "edi", nullptr, 4, data);
417  PrintRegisterValue(reg_ctx, "esi", nullptr, 4, data);
418  PrintRegisterValue(reg_ctx, "ebp", nullptr, 4, data);
419  PrintRegisterValue(reg_ctx, "esp", nullptr, 4, data);
420  PrintRegisterValue(reg_ctx, "ss", nullptr, 4, data);
421  PrintRegisterValue(reg_ctx, "eflags", nullptr, 4, data);
422  PrintRegisterValue(reg_ctx, "eip", nullptr, 4, data);
423  PrintRegisterValue(reg_ctx, "cs", nullptr, 4, data);
424  PrintRegisterValue(reg_ctx, "ds", nullptr, 4, data);
425  PrintRegisterValue(reg_ctx, "es", nullptr, 4, data);
426  PrintRegisterValue(reg_ctx, "fs", nullptr, 4, data);
427  PrintRegisterValue(reg_ctx, "gs", nullptr, 4, data);
428 
429  // Write out the EXC registers
430  data.PutHex32(EXCRegSet);
431  data.PutHex32(EXCWordCount);
432  PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data);
433  PrintRegisterValue(reg_ctx, "err", nullptr, 4, data);
434  PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 4, data);
435  return true;
436  }
437  return false;
438  }
439 
440 protected:
441  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; }
442 
443  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; }
444 
445  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; }
446 
447  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
448  return 0;
449  }
450 
451  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
452  return 0;
453  }
454 
455  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
456  return 0;
457  }
458 };
459 
461 public:
463  const DataExtractor &data)
464  : RegisterContextDarwin_arm(thread, 0) {
465  SetRegisterDataFrom_LC_THREAD(data);
466  }
467 
468  void InvalidateAllRegisters() override {
469  // Do nothing... registers are always valid...
470  }
471 
473  lldb::offset_t offset = 0;
474  SetError(GPRRegSet, Read, -1);
475  SetError(FPURegSet, Read, -1);
476  SetError(EXCRegSet, Read, -1);
477  bool done = false;
478 
479  while (!done) {
480  int flavor = data.GetU32(&offset);
481  uint32_t count = data.GetU32(&offset);
482  lldb::offset_t next_thread_state = offset + (count * 4);
483  switch (flavor) {
484  case GPRAltRegSet:
485  case GPRRegSet:
486  // On ARM, the CPSR register is also included in the count but it is
487  // not included in gpr.r so loop until (count-1).
488  for (uint32_t i = 0; i < (count - 1); ++i) {
489  gpr.r[i] = data.GetU32(&offset);
490  }
491  // Save cpsr explicitly.
492  gpr.cpsr = data.GetU32(&offset);
493 
494  SetError(GPRRegSet, Read, 0);
495  offset = next_thread_state;
496  break;
497 
498  case FPURegSet: {
499  uint8_t *fpu_reg_buf = (uint8_t *)&fpu.floats.s[0];
500  const int fpu_reg_buf_size = sizeof(fpu.floats);
501  if (data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle,
502  fpu_reg_buf) == fpu_reg_buf_size) {
503  offset += fpu_reg_buf_size;
504  fpu.fpscr = data.GetU32(&offset);
505  SetError(FPURegSet, Read, 0);
506  } else {
507  done = true;
508  }
509  }
510  offset = next_thread_state;
511  break;
512 
513  case EXCRegSet:
514  if (count == 3) {
515  exc.exception = data.GetU32(&offset);
516  exc.fsr = data.GetU32(&offset);
517  exc.far = data.GetU32(&offset);
518  SetError(EXCRegSet, Read, 0);
519  }
520  done = true;
521  offset = next_thread_state;
522  break;
523 
524  // Unknown register set flavor, stop trying to parse.
525  default:
526  done = true;
527  }
528  }
529  }
530 
531  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
532  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
533  if (reg_ctx_sp) {
534  RegisterContext *reg_ctx = reg_ctx_sp.get();
535 
536  data.PutHex32(GPRRegSet); // Flavor
537  data.PutHex32(GPRWordCount);
538  PrintRegisterValue(reg_ctx, "r0", nullptr, 4, data);
539  PrintRegisterValue(reg_ctx, "r1", nullptr, 4, data);
540  PrintRegisterValue(reg_ctx, "r2", nullptr, 4, data);
541  PrintRegisterValue(reg_ctx, "r3", nullptr, 4, data);
542  PrintRegisterValue(reg_ctx, "r4", nullptr, 4, data);
543  PrintRegisterValue(reg_ctx, "r5", nullptr, 4, data);
544  PrintRegisterValue(reg_ctx, "r6", nullptr, 4, data);
545  PrintRegisterValue(reg_ctx, "r7", nullptr, 4, data);
546  PrintRegisterValue(reg_ctx, "r8", nullptr, 4, data);
547  PrintRegisterValue(reg_ctx, "r9", nullptr, 4, data);
548  PrintRegisterValue(reg_ctx, "r10", nullptr, 4, data);
549  PrintRegisterValue(reg_ctx, "r11", nullptr, 4, data);
550  PrintRegisterValue(reg_ctx, "r12", nullptr, 4, data);
551  PrintRegisterValue(reg_ctx, "sp", nullptr, 4, data);
552  PrintRegisterValue(reg_ctx, "lr", nullptr, 4, data);
553  PrintRegisterValue(reg_ctx, "pc", nullptr, 4, data);
554  PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data);
555 
556  // Write out the EXC registers
557  // data.PutHex32 (EXCRegSet);
558  // data.PutHex32 (EXCWordCount);
559  // WriteRegister (reg_ctx, "exception", NULL, 4, data);
560  // WriteRegister (reg_ctx, "fsr", NULL, 4, data);
561  // WriteRegister (reg_ctx, "far", NULL, 4, data);
562  return true;
563  }
564  return false;
565  }
566 
567 protected:
568  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
569 
570  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
571 
572  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
573 
574  int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
575 
576  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
577  return 0;
578  }
579 
580  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
581  return 0;
582  }
583 
584  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
585  return 0;
586  }
587 
588  int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
589  return -1;
590  }
591 };
592 
594 public:
596  const DataExtractor &data)
597  : RegisterContextDarwin_arm64(thread, 0) {
598  SetRegisterDataFrom_LC_THREAD(data);
599  }
600 
601  void InvalidateAllRegisters() override {
602  // Do nothing... registers are always valid...
603  }
604 
606  lldb::offset_t offset = 0;
607  SetError(GPRRegSet, Read, -1);
608  SetError(FPURegSet, Read, -1);
609  SetError(EXCRegSet, Read, -1);
610  bool done = false;
611  while (!done) {
612  int flavor = data.GetU32(&offset);
613  uint32_t count = data.GetU32(&offset);
614  lldb::offset_t next_thread_state = offset + (count * 4);
615  switch (flavor) {
616  case GPRRegSet:
617  // x0-x29 + fp + lr + sp + pc (== 33 64-bit registers) plus cpsr (1
618  // 32-bit register)
619  if (count >= (33 * 2) + 1) {
620  for (uint32_t i = 0; i < 29; ++i)
621  gpr.x[i] = data.GetU64(&offset);
622  gpr.fp = data.GetU64(&offset);
623  gpr.lr = data.GetU64(&offset);
624  gpr.sp = data.GetU64(&offset);
625  gpr.pc = data.GetU64(&offset);
626  gpr.cpsr = data.GetU32(&offset);
627  SetError(GPRRegSet, Read, 0);
628  }
629  offset = next_thread_state;
630  break;
631  case FPURegSet: {
632  uint8_t *fpu_reg_buf = (uint8_t *)&fpu.v[0];
633  const int fpu_reg_buf_size = sizeof(fpu);
634  if (fpu_reg_buf_size == count * sizeof(uint32_t) &&
635  data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle,
636  fpu_reg_buf) == fpu_reg_buf_size) {
637  SetError(FPURegSet, Read, 0);
638  } else {
639  done = true;
640  }
641  }
642  offset = next_thread_state;
643  break;
644  case EXCRegSet:
645  if (count == 4) {
646  exc.far = data.GetU64(&offset);
647  exc.esr = data.GetU32(&offset);
648  exc.exception = data.GetU32(&offset);
649  SetError(EXCRegSet, Read, 0);
650  }
651  offset = next_thread_state;
652  break;
653  default:
654  done = true;
655  break;
656  }
657  }
658  }
659 
660  static bool Create_LC_THREAD(Thread *thread, Stream &data) {
661  RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
662  if (reg_ctx_sp) {
663  RegisterContext *reg_ctx = reg_ctx_sp.get();
664 
665  data.PutHex32(GPRRegSet); // Flavor
666  data.PutHex32(GPRWordCount);
667  PrintRegisterValue(reg_ctx, "x0", nullptr, 8, data);
668  PrintRegisterValue(reg_ctx, "x1", nullptr, 8, data);
669  PrintRegisterValue(reg_ctx, "x2", nullptr, 8, data);
670  PrintRegisterValue(reg_ctx, "x3", nullptr, 8, data);
671  PrintRegisterValue(reg_ctx, "x4", nullptr, 8, data);
672  PrintRegisterValue(reg_ctx, "x5", nullptr, 8, data);
673  PrintRegisterValue(reg_ctx, "x6", nullptr, 8, data);
674  PrintRegisterValue(reg_ctx, "x7", nullptr, 8, data);
675  PrintRegisterValue(reg_ctx, "x8", nullptr, 8, data);
676  PrintRegisterValue(reg_ctx, "x9", nullptr, 8, data);
677  PrintRegisterValue(reg_ctx, "x10", nullptr, 8, data);
678  PrintRegisterValue(reg_ctx, "x11", nullptr, 8, data);
679  PrintRegisterValue(reg_ctx, "x12", nullptr, 8, data);
680  PrintRegisterValue(reg_ctx, "x13", nullptr, 8, data);
681  PrintRegisterValue(reg_ctx, "x14", nullptr, 8, data);
682  PrintRegisterValue(reg_ctx, "x15", nullptr, 8, data);
683  PrintRegisterValue(reg_ctx, "x16", nullptr, 8, data);
684  PrintRegisterValue(reg_ctx, "x17", nullptr, 8, data);
685  PrintRegisterValue(reg_ctx, "x18", nullptr, 8, data);
686  PrintRegisterValue(reg_ctx, "x19", nullptr, 8, data);
687  PrintRegisterValue(reg_ctx, "x20", nullptr, 8, data);
688  PrintRegisterValue(reg_ctx, "x21", nullptr, 8, data);
689  PrintRegisterValue(reg_ctx, "x22", nullptr, 8, data);
690  PrintRegisterValue(reg_ctx, "x23", nullptr, 8, data);
691  PrintRegisterValue(reg_ctx, "x24", nullptr, 8, data);
692  PrintRegisterValue(reg_ctx, "x25", nullptr, 8, data);
693  PrintRegisterValue(reg_ctx, "x26", nullptr, 8, data);
694  PrintRegisterValue(reg_ctx, "x27", nullptr, 8, data);
695  PrintRegisterValue(reg_ctx, "x28", nullptr, 8, data);
696  PrintRegisterValue(reg_ctx, "fp", nullptr, 8, data);
697  PrintRegisterValue(reg_ctx, "lr", nullptr, 8, data);
698  PrintRegisterValue(reg_ctx, "sp", nullptr, 8, data);
699  PrintRegisterValue(reg_ctx, "pc", nullptr, 8, data);
700  PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data);
701 
702  // Write out the EXC registers
703  // data.PutHex32 (EXCRegSet);
704  // data.PutHex32 (EXCWordCount);
705  // WriteRegister (reg_ctx, "far", NULL, 8, data);
706  // WriteRegister (reg_ctx, "esr", NULL, 4, data);
707  // WriteRegister (reg_ctx, "exception", NULL, 4, data);
708  return true;
709  }
710  return false;
711  }
712 
713 protected:
714  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
715 
716  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
717 
718  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
719 
720  int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
721 
722  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
723  return 0;
724  }
725 
726  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
727  return 0;
728  }
729 
730  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
731  return 0;
732  }
733 
734  int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
735  return -1;
736  }
737 };
738 
740  switch (magic) {
741  case MH_MAGIC:
742  case MH_CIGAM:
743  return sizeof(struct mach_header);
744 
745  case MH_MAGIC_64:
746  case MH_CIGAM_64:
747  return sizeof(struct mach_header_64);
748  break;
749 
750  default:
751  break;
752  }
753  return 0;
754 }
755 
756 #define MACHO_NLIST_ARM_SYMBOL_IS_THUMB 0x0008
757 
759 
761  PluginManager::RegisterPlugin(
762  GetPluginNameStatic(), GetPluginDescriptionStatic(), CreateInstance,
763  CreateMemoryInstance, GetModuleSpecifications, SaveCore);
764 }
765 
767  PluginManager::UnregisterPlugin(CreateInstance);
768 }
769 
771  static ConstString g_name("mach-o");
772  return g_name;
773 }
774 
776  return "Mach-o object file reader (32 and 64 bit)";
777 }
778 
779 ObjectFile *ObjectFileMachO::CreateInstance(const lldb::ModuleSP &module_sp,
780  DataBufferSP &data_sp,
781  lldb::offset_t data_offset,
782  const FileSpec *file,
783  lldb::offset_t file_offset,
784  lldb::offset_t length) {
785  if (!data_sp) {
786  data_sp = MapFileData(*file, length, file_offset);
787  if (!data_sp)
788  return nullptr;
789  data_offset = 0;
790  }
791 
792  if (!ObjectFileMachO::MagicBytesMatch(data_sp, data_offset, length))
793  return nullptr;
794 
795  // Update the data to contain the entire file if it doesn't already
796  if (data_sp->GetByteSize() < length) {
797  data_sp = MapFileData(*file, length, file_offset);
798  if (!data_sp)
799  return nullptr;
800  data_offset = 0;
801  }
802  auto objfile_up = std::make_unique<ObjectFileMachO>(
803  module_sp, data_sp, data_offset, file, file_offset, length);
804  if (!objfile_up || !objfile_up->ParseHeader())
805  return nullptr;
806 
807  return objfile_up.release();
808 }
809 
811  const lldb::ModuleSP &module_sp, DataBufferSP &data_sp,
812  const ProcessSP &process_sp, lldb::addr_t header_addr) {
813  if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) {
814  std::unique_ptr<ObjectFile> objfile_up(
815  new ObjectFileMachO(module_sp, data_sp, process_sp, header_addr));
816  if (objfile_up.get() && objfile_up->ParseHeader())
817  return objfile_up.release();
818  }
819  return nullptr;
820 }
821 
823  const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp,
824  lldb::offset_t data_offset, lldb::offset_t file_offset,
826  const size_t initial_count = specs.GetSize();
827 
828  if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) {
829  DataExtractor data;
830  data.SetData(data_sp);
831  llvm::MachO::mach_header header;
832  if (ParseHeader(data, &data_offset, header)) {
833  size_t header_and_load_cmds =
834  header.sizeofcmds + MachHeaderSizeFromMagic(header.magic);
835  if (header_and_load_cmds >= data_sp->GetByteSize()) {
836  data_sp = MapFileData(file, header_and_load_cmds, file_offset);
837  data.SetData(data_sp);
838  data_offset = MachHeaderSizeFromMagic(header.magic);
839  }
840  if (data_sp) {
841  ModuleSpec base_spec;
842  base_spec.GetFileSpec() = file;
843  base_spec.SetObjectOffset(file_offset);
844  base_spec.SetObjectSize(length);
845  GetAllArchSpecs(header, data, data_offset, base_spec, specs);
846  }
847  }
848  }
849  return specs.GetSize() - initial_count;
850 }
851 
853  static ConstString g_segment_name_TEXT("__TEXT");
854  return g_segment_name_TEXT;
855 }
856 
858  static ConstString g_segment_name_DATA("__DATA");
859  return g_segment_name_DATA;
860 }
861 
863  static ConstString g_segment_name("__DATA_DIRTY");
864  return g_segment_name;
865 }
866 
868  static ConstString g_segment_name("__DATA_CONST");
869  return g_segment_name;
870 }
871 
873  static ConstString g_segment_name_OBJC("__OBJC");
874  return g_segment_name_OBJC;
875 }
876 
878  static ConstString g_section_name_LINKEDIT("__LINKEDIT");
879  return g_section_name_LINKEDIT;
880 }
881 
883  static ConstString g_section_name("__DWARF");
884  return g_section_name;
885 }
886 
888  static ConstString g_section_name_eh_frame("__eh_frame");
889  return g_section_name_eh_frame;
890 }
891 
892 bool ObjectFileMachO::MagicBytesMatch(DataBufferSP &data_sp,
893  lldb::addr_t data_offset,
894  lldb::addr_t data_length) {
895  DataExtractor data;
896  data.SetData(data_sp, data_offset, data_length);
897  lldb::offset_t offset = 0;
898  uint32_t magic = data.GetU32(&offset);
899  return MachHeaderSizeFromMagic(magic) != 0;
900 }
901 
902 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp,
903  DataBufferSP &data_sp,
904  lldb::offset_t data_offset,
905  const FileSpec *file,
906  lldb::offset_t file_offset,
907  lldb::offset_t length)
908  : ObjectFile(module_sp, file, file_offset, length, data_sp, data_offset),
909  m_mach_segments(), m_mach_sections(), m_entry_point_address(),
910  m_thread_context_offsets(), m_thread_context_offsets_valid(false),
911  m_reexported_dylibs(), m_allow_assembly_emulation_unwind_plans(true) {
912  ::memset(&m_header, 0, sizeof(m_header));
913  ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
914 }
915 
916 ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp,
917  lldb::DataBufferSP &header_data_sp,
918  const lldb::ProcessSP &process_sp,
919  lldb::addr_t header_addr)
920  : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
921  m_mach_segments(), m_mach_sections(), m_entry_point_address(),
922  m_thread_context_offsets(), m_thread_context_offsets_valid(false),
923  m_reexported_dylibs(), m_allow_assembly_emulation_unwind_plans(true) {
924  ::memset(&m_header, 0, sizeof(m_header));
925  ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
926 }
927 
929  lldb::offset_t *data_offset_ptr,
930  llvm::MachO::mach_header &header) {
932  // Leave magic in the original byte order
933  header.magic = data.GetU32(data_offset_ptr);
934  bool can_parse = false;
935  bool is_64_bit = false;
936  switch (header.magic) {
937  case MH_MAGIC:
939  data.SetAddressByteSize(4);
940  can_parse = true;
941  break;
942 
943  case MH_MAGIC_64:
945  data.SetAddressByteSize(8);
946  can_parse = true;
947  is_64_bit = true;
948  break;
949 
950  case MH_CIGAM:
953  : eByteOrderBig);
954  data.SetAddressByteSize(4);
955  can_parse = true;
956  break;
957 
958  case MH_CIGAM_64:
961  : eByteOrderBig);
962  data.SetAddressByteSize(8);
963  is_64_bit = true;
964  can_parse = true;
965  break;
966 
967  default:
968  break;
969  }
970 
971  if (can_parse) {
972  data.GetU32(data_offset_ptr, &header.cputype, 6);
973  if (is_64_bit)
974  *data_offset_ptr += 4;
975  return true;
976  } else {
977  memset(&header, 0, sizeof(header));
978  }
979  return false;
980 }
981 
983  ModuleSP module_sp(GetModule());
984  if (!module_sp)
985  return false;
986 
987  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
988  bool can_parse = false;
989  lldb::offset_t offset = 0;
991  // Leave magic in the original byte order
992  m_header.magic = m_data.GetU32(&offset);
993  switch (m_header.magic) {
994  case MH_MAGIC:
997  can_parse = true;
998  break;
999 
1000  case MH_MAGIC_64:
1003  can_parse = true;
1004  break;
1005 
1006  case MH_CIGAM:
1009  : eByteOrderBig);
1011  can_parse = true;
1012  break;
1013 
1014  case MH_CIGAM_64:
1017  : eByteOrderBig);
1019  can_parse = true;
1020  break;
1021 
1022  default:
1023  break;
1024  }
1025 
1026  if (can_parse) {
1027  m_data.GetU32(&offset, &m_header.cputype, 6);
1028 
1029  ModuleSpecList all_specs;
1030  ModuleSpec base_spec;
1032  base_spec, all_specs);
1033 
1034  for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
1035  ArchSpec mach_arch =
1037 
1038  // Check if the module has a required architecture
1039  const ArchSpec &module_arch = module_sp->GetArchitecture();
1040  if (module_arch.IsValid() && !module_arch.IsCompatibleMatch(mach_arch))
1041  continue;
1042 
1043  if (SetModulesArchitecture(mach_arch)) {
1044  const size_t header_and_lc_size =
1045  m_header.sizeofcmds + MachHeaderSizeFromMagic(m_header.magic);
1046  if (m_data.GetByteSize() < header_and_lc_size) {
1047  DataBufferSP data_sp;
1048  ProcessSP process_sp(m_process_wp.lock());
1049  if (process_sp) {
1050  data_sp = ReadMemory(process_sp, m_memory_addr, header_and_lc_size);
1051  } else {
1052  // Read in all only the load command data from the file on disk
1053  data_sp = MapFileData(m_file, header_and_lc_size, m_file_offset);
1054  if (data_sp->GetByteSize() != header_and_lc_size)
1055  continue;
1056  }
1057  if (data_sp)
1058  m_data.SetData(data_sp);
1059  }
1060  }
1061  return true;
1062  }
1063  // None found.
1064  return false;
1065  } else {
1066  memset(&m_header, 0, sizeof(struct mach_header));
1067  }
1068  return false;
1069 }
1070 
1072  return m_data.GetByteOrder();
1073 }
1074 
1076  return m_header.filetype == MH_EXECUTE;
1077 }
1078 
1080  return m_header.filetype == MH_DYLINKER;
1081 }
1082 
1084  return m_data.GetAddressByteSize();
1085 }
1086 
1088  Symtab *symtab = GetSymtab();
1089  if (!symtab)
1090  return AddressClass::eUnknown;
1091 
1092  Symbol *symbol = symtab->FindSymbolContainingFileAddress(file_addr);
1093  if (symbol) {
1094  if (symbol->ValueIsAddress()) {
1095  SectionSP section_sp(symbol->GetAddressRef().GetSection());
1096  if (section_sp) {
1097  const lldb::SectionType section_type = section_sp->GetType();
1098  switch (section_type) {
1099  case eSectionTypeInvalid:
1100  return AddressClass::eUnknown;
1101 
1102  case eSectionTypeCode:
1103  if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1104  // For ARM we have a bit in the n_desc field of the symbol that
1105  // tells us ARM/Thumb which is bit 0x0008.
1106  if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB)
1107  return AddressClass::eCodeAlternateISA;
1108  }
1109  return AddressClass::eCode;
1110 
1111  case eSectionTypeContainer:
1112  return AddressClass::eUnknown;
1113 
1114  case eSectionTypeData:
1118  case eSectionTypeData4:
1119  case eSectionTypeData8:
1120  case eSectionTypeData16:
1122  case eSectionTypeZeroFill:
1125  case eSectionTypeGoSymtab:
1126  return AddressClass::eData;
1127 
1128  case eSectionTypeDebug:
1163  return AddressClass::eDebug;
1164 
1165  case eSectionTypeEHFrame:
1166  case eSectionTypeARMexidx:
1167  case eSectionTypeARMextab:
1169  return AddressClass::eRuntime;
1170 
1176  case eSectionTypeOther:
1177  return AddressClass::eUnknown;
1178  }
1179  }
1180  }
1181 
1182  const SymbolType symbol_type = symbol->GetType();
1183  switch (symbol_type) {
1184  case eSymbolTypeAny:
1185  return AddressClass::eUnknown;
1186  case eSymbolTypeAbsolute:
1187  return AddressClass::eUnknown;
1188 
1189  case eSymbolTypeCode:
1190  case eSymbolTypeTrampoline:
1191  case eSymbolTypeResolver:
1192  if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1193  // For ARM we have a bit in the n_desc field of the symbol that tells
1194  // us ARM/Thumb which is bit 0x0008.
1195  if (symbol->GetFlags() & MACHO_NLIST_ARM_SYMBOL_IS_THUMB)
1196  return AddressClass::eCodeAlternateISA;
1197  }
1198  return AddressClass::eCode;
1199 
1200  case eSymbolTypeData:
1201  return AddressClass::eData;
1202  case eSymbolTypeRuntime:
1203  return AddressClass::eRuntime;
1204  case eSymbolTypeException:
1205  return AddressClass::eRuntime;
1206  case eSymbolTypeSourceFile:
1207  return AddressClass::eDebug;
1208  case eSymbolTypeHeaderFile:
1209  return AddressClass::eDebug;
1210  case eSymbolTypeObjectFile:
1211  return AddressClass::eDebug;
1213  return AddressClass::eDebug;
1214  case eSymbolTypeBlock:
1215  return AddressClass::eDebug;
1216  case eSymbolTypeLocal:
1217  return AddressClass::eData;
1218  case eSymbolTypeParam:
1219  return AddressClass::eData;
1220  case eSymbolTypeVariable:
1221  return AddressClass::eData;
1223  return AddressClass::eDebug;
1224  case eSymbolTypeLineEntry:
1225  return AddressClass::eDebug;
1226  case eSymbolTypeLineHeader:
1227  return AddressClass::eDebug;
1228  case eSymbolTypeScopeBegin:
1229  return AddressClass::eDebug;
1230  case eSymbolTypeScopeEnd:
1231  return AddressClass::eDebug;
1232  case eSymbolTypeAdditional:
1233  return AddressClass::eUnknown;
1234  case eSymbolTypeCompiler:
1235  return AddressClass::eDebug;
1237  return AddressClass::eDebug;
1238  case eSymbolTypeUndefined:
1239  return AddressClass::eUnknown;
1240  case eSymbolTypeObjCClass:
1241  return AddressClass::eRuntime;
1243  return AddressClass::eRuntime;
1244  case eSymbolTypeObjCIVar:
1245  return AddressClass::eRuntime;
1246  case eSymbolTypeReExported:
1247  return AddressClass::eRuntime;
1248  }
1249  }
1250  return AddressClass::eUnknown;
1251 }
1252 
1254  ModuleSP module_sp(GetModule());
1255  if (module_sp) {
1256  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
1257  if (m_symtab_up == nullptr) {
1258  m_symtab_up = std::make_unique<Symtab>(this);
1259  std::lock_guard<std::recursive_mutex> symtab_guard(
1260  m_symtab_up->GetMutex());
1261  ParseSymtab();
1262  m_symtab_up->Finalize();
1263  }
1264  }
1265  return m_symtab_up.get();
1266 }
1267 
1269  if (m_dysymtab.cmd == 0) {
1270  ModuleSP module_sp(GetModule());
1271  if (module_sp) {
1273  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1274  const lldb::offset_t load_cmd_offset = offset;
1275 
1276  load_command lc;
1277  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
1278  break;
1279  if (lc.cmd == LC_DYSYMTAB) {
1280  m_dysymtab.cmd = lc.cmd;
1281  m_dysymtab.cmdsize = lc.cmdsize;
1282  if (m_data.GetU32(&offset, &m_dysymtab.ilocalsym,
1283  (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2) ==
1284  nullptr) {
1285  // Clear m_dysymtab if we were unable to read all items from the
1286  // load command
1287  ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
1288  }
1289  }
1290  offset = load_cmd_offset + lc.cmdsize;
1291  }
1292  }
1293  }
1294  if (m_dysymtab.cmd)
1295  return m_dysymtab.nlocalsym <= 1;
1296  return false;
1297 }
1298 
1300  EncryptedFileRanges result;
1302 
1303  encryption_info_command encryption_cmd;
1304  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1305  const lldb::offset_t load_cmd_offset = offset;
1306  if (m_data.GetU32(&offset, &encryption_cmd, 2) == nullptr)
1307  break;
1308 
1309  // LC_ENCRYPTION_INFO and LC_ENCRYPTION_INFO_64 have the same sizes for the
1310  // 3 fields we care about, so treat them the same.
1311  if (encryption_cmd.cmd == LC_ENCRYPTION_INFO ||
1312  encryption_cmd.cmd == LC_ENCRYPTION_INFO_64) {
1313  if (m_data.GetU32(&offset, &encryption_cmd.cryptoff, 3)) {
1314  if (encryption_cmd.cryptid != 0) {
1316  entry.SetRangeBase(encryption_cmd.cryptoff);
1317  entry.SetByteSize(encryption_cmd.cryptsize);
1318  result.Append(entry);
1319  }
1320  }
1321  }
1322  offset = load_cmd_offset + encryption_cmd.cmdsize;
1323  }
1324 
1325  return result;
1326 }
1327 
1328 void ObjectFileMachO::SanitizeSegmentCommand(segment_command_64 &seg_cmd,
1329  uint32_t cmd_idx) {
1330  if (m_length == 0 || seg_cmd.filesize == 0)
1331  return;
1332 
1333  if ((m_header.flags & MH_DYLIB_IN_CACHE) && !IsInMemory()) {
1334  // In shared cache images, the load commands are relative to the
1335  // shared cache file, and not the the specific image we are
1336  // examining. Let's fix this up so that it looks like a normal
1337  // image.
1338  if (strncmp(seg_cmd.segname, "__TEXT", sizeof(seg_cmd.segname)) == 0)
1339  m_text_address = seg_cmd.vmaddr;
1340  if (strncmp(seg_cmd.segname, "__LINKEDIT", sizeof(seg_cmd.segname)) == 0)
1341  m_linkedit_original_offset = seg_cmd.fileoff;
1342 
1343  seg_cmd.fileoff = seg_cmd.vmaddr - m_text_address;
1344  }
1345 
1346  if (seg_cmd.fileoff > m_length) {
1347  // We have a load command that says it extends past the end of the file.
1348  // This is likely a corrupt file. We don't have any way to return an error
1349  // condition here (this method was likely invoked from something like
1350  // ObjectFile::GetSectionList()), so we just null out the section contents,
1351  // and dump a message to stdout. The most common case here is core file
1352  // debugging with a truncated file.
1353  const char *lc_segment_name =
1354  seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1355  GetModule()->ReportWarning(
1356  "load command %u %s has a fileoff (0x%" PRIx64
1357  ") that extends beyond the end of the file (0x%" PRIx64
1358  "), ignoring this section",
1359  cmd_idx, lc_segment_name, seg_cmd.fileoff, m_length);
1360 
1361  seg_cmd.fileoff = 0;
1362  seg_cmd.filesize = 0;
1363  }
1364 
1365  if (seg_cmd.fileoff + seg_cmd.filesize > m_length) {
1366  // We have a load command that says it extends past the end of the file.
1367  // This is likely a corrupt file. We don't have any way to return an error
1368  // condition here (this method was likely invoked from something like
1369  // ObjectFile::GetSectionList()), so we just null out the section contents,
1370  // and dump a message to stdout. The most common case here is core file
1371  // debugging with a truncated file.
1372  const char *lc_segment_name =
1373  seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1374  GetModule()->ReportWarning(
1375  "load command %u %s has a fileoff + filesize (0x%" PRIx64
1376  ") that extends beyond the end of the file (0x%" PRIx64
1377  "), the segment will be truncated to match",
1378  cmd_idx, lc_segment_name, seg_cmd.fileoff + seg_cmd.filesize, m_length);
1379 
1380  // Truncate the length
1381  seg_cmd.filesize = m_length - seg_cmd.fileoff;
1382  }
1383 }
1384 
1385 static uint32_t GetSegmentPermissions(const segment_command_64 &seg_cmd) {
1386  uint32_t result = 0;
1387  if (seg_cmd.initprot & VM_PROT_READ)
1388  result |= ePermissionsReadable;
1389  if (seg_cmd.initprot & VM_PROT_WRITE)
1390  result |= ePermissionsWritable;
1391  if (seg_cmd.initprot & VM_PROT_EXECUTE)
1392  result |= ePermissionsExecutable;
1393  return result;
1394 }
1395 
1397  ConstString section_name) {
1398 
1399  if (flags & (S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS))
1400  return eSectionTypeCode;
1401 
1402  uint32_t mach_sect_type = flags & SECTION_TYPE;
1403  static ConstString g_sect_name_objc_data("__objc_data");
1404  static ConstString g_sect_name_objc_msgrefs("__objc_msgrefs");
1405  static ConstString g_sect_name_objc_selrefs("__objc_selrefs");
1406  static ConstString g_sect_name_objc_classrefs("__objc_classrefs");
1407  static ConstString g_sect_name_objc_superrefs("__objc_superrefs");
1408  static ConstString g_sect_name_objc_const("__objc_const");
1409  static ConstString g_sect_name_objc_classlist("__objc_classlist");
1410  static ConstString g_sect_name_cfstring("__cfstring");
1411 
1412  static ConstString g_sect_name_dwarf_debug_abbrev("__debug_abbrev");
1413  static ConstString g_sect_name_dwarf_debug_aranges("__debug_aranges");
1414  static ConstString g_sect_name_dwarf_debug_frame("__debug_frame");
1415  static ConstString g_sect_name_dwarf_debug_info("__debug_info");
1416  static ConstString g_sect_name_dwarf_debug_line("__debug_line");
1417  static ConstString g_sect_name_dwarf_debug_loc("__debug_loc");
1418  static ConstString g_sect_name_dwarf_debug_loclists("__debug_loclists");
1419  static ConstString g_sect_name_dwarf_debug_macinfo("__debug_macinfo");
1420  static ConstString g_sect_name_dwarf_debug_names("__debug_names");
1421  static ConstString g_sect_name_dwarf_debug_pubnames("__debug_pubnames");
1422  static ConstString g_sect_name_dwarf_debug_pubtypes("__debug_pubtypes");
1423  static ConstString g_sect_name_dwarf_debug_ranges("__debug_ranges");
1424  static ConstString g_sect_name_dwarf_debug_str("__debug_str");
1425  static ConstString g_sect_name_dwarf_debug_types("__debug_types");
1426  static ConstString g_sect_name_dwarf_apple_names("__apple_names");
1427  static ConstString g_sect_name_dwarf_apple_types("__apple_types");
1428  static ConstString g_sect_name_dwarf_apple_namespaces("__apple_namespac");
1429  static ConstString g_sect_name_dwarf_apple_objc("__apple_objc");
1430  static ConstString g_sect_name_eh_frame("__eh_frame");
1431  static ConstString g_sect_name_compact_unwind("__unwind_info");
1432  static ConstString g_sect_name_text("__text");
1433  static ConstString g_sect_name_data("__data");
1434  static ConstString g_sect_name_go_symtab("__gosymtab");
1435 
1436  if (section_name == g_sect_name_dwarf_debug_abbrev)
1438  if (section_name == g_sect_name_dwarf_debug_aranges)
1440  if (section_name == g_sect_name_dwarf_debug_frame)
1442  if (section_name == g_sect_name_dwarf_debug_info)
1444  if (section_name == g_sect_name_dwarf_debug_line)
1446  if (section_name == g_sect_name_dwarf_debug_loc)
1448  if (section_name == g_sect_name_dwarf_debug_loclists)
1450  if (section_name == g_sect_name_dwarf_debug_macinfo)
1452  if (section_name == g_sect_name_dwarf_debug_names)
1454  if (section_name == g_sect_name_dwarf_debug_pubnames)
1456  if (section_name == g_sect_name_dwarf_debug_pubtypes)
1458  if (section_name == g_sect_name_dwarf_debug_ranges)
1460  if (section_name == g_sect_name_dwarf_debug_str)
1462  if (section_name == g_sect_name_dwarf_debug_types)
1464  if (section_name == g_sect_name_dwarf_apple_names)
1466  if (section_name == g_sect_name_dwarf_apple_types)
1468  if (section_name == g_sect_name_dwarf_apple_namespaces)
1470  if (section_name == g_sect_name_dwarf_apple_objc)
1472  if (section_name == g_sect_name_objc_selrefs)
1474  if (section_name == g_sect_name_objc_msgrefs)
1476  if (section_name == g_sect_name_eh_frame)
1477  return eSectionTypeEHFrame;
1478  if (section_name == g_sect_name_compact_unwind)
1480  if (section_name == g_sect_name_cfstring)
1482  if (section_name == g_sect_name_go_symtab)
1483  return eSectionTypeGoSymtab;
1484  if (section_name == g_sect_name_objc_data ||
1485  section_name == g_sect_name_objc_classrefs ||
1486  section_name == g_sect_name_objc_superrefs ||
1487  section_name == g_sect_name_objc_const ||
1488  section_name == g_sect_name_objc_classlist) {
1489  return eSectionTypeDataPointers;
1490  }
1491 
1492  switch (mach_sect_type) {
1493  // TODO: categorize sections by other flags for regular sections
1494  case S_REGULAR:
1495  if (section_name == g_sect_name_text)
1496  return eSectionTypeCode;
1497  if (section_name == g_sect_name_data)
1498  return eSectionTypeData;
1499  return eSectionTypeOther;
1500  case S_ZEROFILL:
1501  return eSectionTypeZeroFill;
1502  case S_CSTRING_LITERALS: // section with only literal C strings
1503  return eSectionTypeDataCString;
1504  case S_4BYTE_LITERALS: // section with only 4 byte literals
1505  return eSectionTypeData4;
1506  case S_8BYTE_LITERALS: // section with only 8 byte literals
1507  return eSectionTypeData8;
1508  case S_LITERAL_POINTERS: // section with only pointers to literals
1509  return eSectionTypeDataPointers;
1510  case S_NON_LAZY_SYMBOL_POINTERS: // section with only non-lazy symbol pointers
1511  return eSectionTypeDataPointers;
1512  case S_LAZY_SYMBOL_POINTERS: // section with only lazy symbol pointers
1513  return eSectionTypeDataPointers;
1514  case S_SYMBOL_STUBS: // section with only symbol stubs, byte size of stub in
1515  // the reserved2 field
1516  return eSectionTypeCode;
1517  case S_MOD_INIT_FUNC_POINTERS: // section with only function pointers for
1518  // initialization
1519  return eSectionTypeDataPointers;
1520  case S_MOD_TERM_FUNC_POINTERS: // section with only function pointers for
1521  // termination
1522  return eSectionTypeDataPointers;
1523  case S_COALESCED:
1524  return eSectionTypeOther;
1525  case S_GB_ZEROFILL:
1526  return eSectionTypeZeroFill;
1527  case S_INTERPOSING: // section with only pairs of function pointers for
1528  // interposing
1529  return eSectionTypeCode;
1530  case S_16BYTE_LITERALS: // section with only 16 byte literals
1531  return eSectionTypeData16;
1532  case S_DTRACE_DOF:
1533  return eSectionTypeDebug;
1534  case S_LAZY_DYLIB_SYMBOL_POINTERS:
1535  return eSectionTypeDataPointers;
1536  default:
1537  return eSectionTypeOther;
1538  }
1539 }
1540 
1546  bool FileAddressesChanged = false;
1547 
1551 };
1552 
1553 void ObjectFileMachO::ProcessSegmentCommand(const load_command &load_cmd_,
1554  lldb::offset_t offset,
1555  uint32_t cmd_idx,
1556  SegmentParsingContext &context) {
1557  segment_command_64 load_cmd;
1558  memcpy(&load_cmd, &load_cmd_, sizeof(load_cmd_));
1559 
1560  if (!m_data.GetU8(&offset, (uint8_t *)load_cmd.segname, 16))
1561  return;
1562 
1563  ModuleSP module_sp = GetModule();
1564  const bool is_core = GetType() == eTypeCoreFile;
1565  const bool is_dsym = (m_header.filetype == MH_DSYM);
1566  bool add_section = true;
1567  bool add_to_unified = true;
1568  ConstString const_segname(
1569  load_cmd.segname, strnlen(load_cmd.segname, sizeof(load_cmd.segname)));
1570 
1571  SectionSP unified_section_sp(
1572  context.UnifiedList.FindSectionByName(const_segname));
1573  if (is_dsym && unified_section_sp) {
1574  if (const_segname == GetSegmentNameLINKEDIT()) {
1575  // We need to keep the __LINKEDIT segment private to this object file
1576  // only
1577  add_to_unified = false;
1578  } else {
1579  // This is the dSYM file and this section has already been created by the
1580  // object file, no need to create it.
1581  add_section = false;
1582  }
1583  }
1584  load_cmd.vmaddr = m_data.GetAddress(&offset);
1585  load_cmd.vmsize = m_data.GetAddress(&offset);
1586  load_cmd.fileoff = m_data.GetAddress(&offset);
1587  load_cmd.filesize = m_data.GetAddress(&offset);
1588  if (!m_data.GetU32(&offset, &load_cmd.maxprot, 4))
1589  return;
1590 
1591  SanitizeSegmentCommand(load_cmd, cmd_idx);
1592 
1593  const uint32_t segment_permissions = GetSegmentPermissions(load_cmd);
1594  const bool segment_is_encrypted =
1595  (load_cmd.flags & SG_PROTECTED_VERSION_1) != 0;
1596 
1597  // Keep a list of mach segments around in case we need to get at data that
1598  // isn't stored in the abstracted Sections.
1599  m_mach_segments.push_back(load_cmd);
1600 
1601  // Use a segment ID of the segment index shifted left by 8 so they never
1602  // conflict with any of the sections.
1603  SectionSP segment_sp;
1604  if (add_section && (const_segname || is_core)) {
1605  segment_sp = std::make_shared<Section>(
1606  module_sp, // Module to which this section belongs
1607  this, // Object file to which this sections belongs
1608  ++context.NextSegmentIdx
1609  << 8, // Section ID is the 1 based segment index
1610  // shifted right by 8 bits as not to collide with any of the 256
1611  // section IDs that are possible
1612  const_segname, // Name of this section
1613  eSectionTypeContainer, // This section is a container of other
1614  // sections.
1615  load_cmd.vmaddr, // File VM address == addresses as they are
1616  // found in the object file
1617  load_cmd.vmsize, // VM size in bytes of this section
1618  load_cmd.fileoff, // Offset to the data for this section in
1619  // the file
1620  load_cmd.filesize, // Size in bytes of this section as found
1621  // in the file
1622  0, // Segments have no alignment information
1623  load_cmd.flags); // Flags for this section
1624 
1625  segment_sp->SetIsEncrypted(segment_is_encrypted);
1626  m_sections_up->AddSection(segment_sp);
1627  segment_sp->SetPermissions(segment_permissions);
1628  if (add_to_unified)
1629  context.UnifiedList.AddSection(segment_sp);
1630  } else if (unified_section_sp) {
1631  if (is_dsym && unified_section_sp->GetFileAddress() != load_cmd.vmaddr) {
1632  // Check to see if the module was read from memory?
1633  if (module_sp->GetObjectFile()->IsInMemory()) {
1634  // We have a module that is in memory and needs to have its file
1635  // address adjusted. We need to do this because when we load a file
1636  // from memory, its addresses will be slid already, yet the addresses
1637  // in the new symbol file will still be unslid. Since everything is
1638  // stored as section offset, this shouldn't cause any problems.
1639 
1640  // Make sure we've parsed the symbol table from the ObjectFile before
1641  // we go around changing its Sections.
1642  module_sp->GetObjectFile()->GetSymtab();
1643  // eh_frame would present the same problems but we parse that on a per-
1644  // function basis as-needed so it's more difficult to remove its use of
1645  // the Sections. Realistically, the environments where this code path
1646  // will be taken will not have eh_frame sections.
1647 
1648  unified_section_sp->SetFileAddress(load_cmd.vmaddr);
1649 
1650  // Notify the module that the section addresses have been changed once
1651  // we're done so any file-address caches can be updated.
1652  context.FileAddressesChanged = true;
1653  }
1654  }
1655  m_sections_up->AddSection(unified_section_sp);
1656  }
1657 
1658  struct section_64 sect64;
1659  ::memset(&sect64, 0, sizeof(sect64));
1660  // Push a section into our mach sections for the section at index zero
1661  // (NO_SECT) if we don't have any mach sections yet...
1662  if (m_mach_sections.empty())
1663  m_mach_sections.push_back(sect64);
1664  uint32_t segment_sect_idx;
1665  const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1;
1666 
1667  const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8;
1668  for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects;
1669  ++segment_sect_idx) {
1670  if (m_data.GetU8(&offset, (uint8_t *)sect64.sectname,
1671  sizeof(sect64.sectname)) == nullptr)
1672  break;
1673  if (m_data.GetU8(&offset, (uint8_t *)sect64.segname,
1674  sizeof(sect64.segname)) == nullptr)
1675  break;
1676  sect64.addr = m_data.GetAddress(&offset);
1677  sect64.size = m_data.GetAddress(&offset);
1678 
1679  if (m_data.GetU32(&offset, &sect64.offset, num_u32s) == nullptr)
1680  break;
1681 
1682  if ((m_header.flags & MH_DYLIB_IN_CACHE) && !IsInMemory()) {
1683  sect64.offset = sect64.addr - m_text_address;
1684  }
1685 
1686  // Keep a list of mach sections around in case we need to get at data that
1687  // isn't stored in the abstracted Sections.
1688  m_mach_sections.push_back(sect64);
1689 
1690  if (add_section) {
1691  ConstString section_name(
1692  sect64.sectname, strnlen(sect64.sectname, sizeof(sect64.sectname)));
1693  if (!const_segname) {
1694  // We have a segment with no name so we need to conjure up segments
1695  // that correspond to the section's segname if there isn't already such
1696  // a section. If there is such a section, we resize the section so that
1697  // it spans all sections. We also mark these sections as fake so
1698  // address matches don't hit if they land in the gaps between the child
1699  // sections.
1700  const_segname.SetTrimmedCStringWithLength(sect64.segname,
1701  sizeof(sect64.segname));
1702  segment_sp = context.UnifiedList.FindSectionByName(const_segname);
1703  if (segment_sp.get()) {
1704  Section *segment = segment_sp.get();
1705  // Grow the section size as needed.
1706  const lldb::addr_t sect64_min_addr = sect64.addr;
1707  const lldb::addr_t sect64_max_addr = sect64_min_addr + sect64.size;
1708  const lldb::addr_t curr_seg_byte_size = segment->GetByteSize();
1709  const lldb::addr_t curr_seg_min_addr = segment->GetFileAddress();
1710  const lldb::addr_t curr_seg_max_addr =
1711  curr_seg_min_addr + curr_seg_byte_size;
1712  if (sect64_min_addr >= curr_seg_min_addr) {
1713  const lldb::addr_t new_seg_byte_size =
1714  sect64_max_addr - curr_seg_min_addr;
1715  // Only grow the section size if needed
1716  if (new_seg_byte_size > curr_seg_byte_size)
1717  segment->SetByteSize(new_seg_byte_size);
1718  } else {
1719  // We need to change the base address of the segment and adjust the
1720  // child section offsets for all existing children.
1721  const lldb::addr_t slide_amount =
1722  sect64_min_addr - curr_seg_min_addr;
1723  segment->Slide(slide_amount, false);
1724  segment->GetChildren().Slide(-slide_amount, false);
1725  segment->SetByteSize(curr_seg_max_addr - sect64_min_addr);
1726  }
1727 
1728  // Grow the section size as needed.
1729  if (sect64.offset) {
1730  const lldb::addr_t segment_min_file_offset =
1731  segment->GetFileOffset();
1732  const lldb::addr_t segment_max_file_offset =
1733  segment_min_file_offset + segment->GetFileSize();
1734 
1735  const lldb::addr_t section_min_file_offset = sect64.offset;
1736  const lldb::addr_t section_max_file_offset =
1737  section_min_file_offset + sect64.size;
1738  const lldb::addr_t new_file_offset =
1739  std::min(section_min_file_offset, segment_min_file_offset);
1740  const lldb::addr_t new_file_size =
1741  std::max(section_max_file_offset, segment_max_file_offset) -
1742  new_file_offset;
1743  segment->SetFileOffset(new_file_offset);
1744  segment->SetFileSize(new_file_size);
1745  }
1746  } else {
1747  // Create a fake section for the section's named segment
1748  segment_sp = std::make_shared<Section>(
1749  segment_sp, // Parent section
1750  module_sp, // Module to which this section belongs
1751  this, // Object file to which this section belongs
1752  ++context.NextSegmentIdx
1753  << 8, // Section ID is the 1 based segment index
1754  // shifted right by 8 bits as not to
1755  // collide with any of the 256 section IDs
1756  // that are possible
1757  const_segname, // Name of this section
1758  eSectionTypeContainer, // This section is a container of
1759  // other sections.
1760  sect64.addr, // File VM address == addresses as they are
1761  // found in the object file
1762  sect64.size, // VM size in bytes of this section
1763  sect64.offset, // Offset to the data for this section in
1764  // the file
1765  sect64.offset ? sect64.size : 0, // Size in bytes of
1766  // this section as
1767  // found in the file
1768  sect64.align,
1769  load_cmd.flags); // Flags for this section
1770  segment_sp->SetIsFake(true);
1771  segment_sp->SetPermissions(segment_permissions);
1772  m_sections_up->AddSection(segment_sp);
1773  if (add_to_unified)
1774  context.UnifiedList.AddSection(segment_sp);
1775  segment_sp->SetIsEncrypted(segment_is_encrypted);
1776  }
1777  }
1778  assert(segment_sp.get());
1779 
1780  lldb::SectionType sect_type = GetSectionType(sect64.flags, section_name);
1781 
1782  SectionSP section_sp(new Section(
1783  segment_sp, module_sp, this, ++context.NextSectionIdx, section_name,
1784  sect_type, sect64.addr - segment_sp->GetFileAddress(), sect64.size,
1785  sect64.offset, sect64.offset == 0 ? 0 : sect64.size, sect64.align,
1786  sect64.flags));
1787  // Set the section to be encrypted to match the segment
1788 
1789  bool section_is_encrypted = false;
1790  if (!segment_is_encrypted && load_cmd.filesize != 0)
1791  section_is_encrypted = context.EncryptedRanges.FindEntryThatContains(
1792  sect64.offset) != nullptr;
1793 
1794  section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted);
1795  section_sp->SetPermissions(segment_permissions);
1796  segment_sp->GetChildren().AddSection(section_sp);
1797 
1798  if (segment_sp->IsFake()) {
1799  segment_sp.reset();
1800  const_segname.Clear();
1801  }
1802  }
1803  }
1804  if (segment_sp && is_dsym) {
1805  if (first_segment_sectID <= context.NextSectionIdx) {
1806  lldb::user_id_t sect_uid;
1807  for (sect_uid = first_segment_sectID; sect_uid <= context.NextSectionIdx;
1808  ++sect_uid) {
1809  SectionSP curr_section_sp(
1810  segment_sp->GetChildren().FindSectionByID(sect_uid));
1811  SectionSP next_section_sp;
1812  if (sect_uid + 1 <= context.NextSectionIdx)
1813  next_section_sp =
1814  segment_sp->GetChildren().FindSectionByID(sect_uid + 1);
1815 
1816  if (curr_section_sp.get()) {
1817  if (curr_section_sp->GetByteSize() == 0) {
1818  if (next_section_sp.get() != nullptr)
1819  curr_section_sp->SetByteSize(next_section_sp->GetFileAddress() -
1820  curr_section_sp->GetFileAddress());
1821  else
1822  curr_section_sp->SetByteSize(load_cmd.vmsize);
1823  }
1824  }
1825  }
1826  }
1827  }
1828 }
1829 
1830 void ObjectFileMachO::ProcessDysymtabCommand(const load_command &load_cmd,
1831  lldb::offset_t offset) {
1832  m_dysymtab.cmd = load_cmd.cmd;
1833  m_dysymtab.cmdsize = load_cmd.cmdsize;
1834  m_data.GetU32(&offset, &m_dysymtab.ilocalsym,
1835  (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2);
1836 }
1837 
1838 void ObjectFileMachO::CreateSections(SectionList &unified_section_list) {
1839  if (m_sections_up)
1840  return;
1841 
1842  m_sections_up = std::make_unique<SectionList>();
1843 
1845  // bool dump_sections = false;
1846  ModuleSP module_sp(GetModule());
1847 
1848  offset = MachHeaderSizeFromMagic(m_header.magic);
1849 
1850  SegmentParsingContext context(GetEncryptedFileRanges(), unified_section_list);
1851  struct load_command load_cmd;
1852  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1853  const lldb::offset_t load_cmd_offset = offset;
1854  if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
1855  break;
1856 
1857  if (load_cmd.cmd == LC_SEGMENT || load_cmd.cmd == LC_SEGMENT_64)
1858  ProcessSegmentCommand(load_cmd, offset, i, context);
1859  else if (load_cmd.cmd == LC_DYSYMTAB)
1860  ProcessDysymtabCommand(load_cmd, offset);
1861 
1862  offset = load_cmd_offset + load_cmd.cmdsize;
1863  }
1864 
1865  if (context.FileAddressesChanged && module_sp)
1866  module_sp->SectionFileAddressesChanged();
1867 }
1868 
1870 public:
1872  : m_section_list(section_list), m_section_infos() {
1873  // Get the number of sections down to a depth of 1 to include all segments
1874  // and their sections, but no other sections that may be added for debug
1875  // map or
1876  m_section_infos.resize(section_list->GetNumSections(1));
1877  }
1878 
1879  SectionSP GetSection(uint8_t n_sect, addr_t file_addr) {
1880  if (n_sect == 0)
1881  return SectionSP();
1882  if (n_sect < m_section_infos.size()) {
1883  if (!m_section_infos[n_sect].section_sp) {
1884  SectionSP section_sp(m_section_list->FindSectionByID(n_sect));
1885  m_section_infos[n_sect].section_sp = section_sp;
1886  if (section_sp) {
1887  m_section_infos[n_sect].vm_range.SetBaseAddress(
1888  section_sp->GetFileAddress());
1889  m_section_infos[n_sect].vm_range.SetByteSize(
1890  section_sp->GetByteSize());
1891  } else {
1892  std::string filename = "<unknown>";
1893  SectionSP first_section_sp(m_section_list->GetSectionAtIndex(0));
1894  if (first_section_sp)
1895  filename = first_section_sp->GetObjectFile()->GetFileSpec().GetPath();
1896 
1897  Host::SystemLog(Host::eSystemLogError,
1898  "error: unable to find section %d for a symbol in %s, corrupt file?\n",
1899  n_sect,
1900  filename.c_str());
1901  }
1902  }
1903  if (m_section_infos[n_sect].vm_range.Contains(file_addr)) {
1904  // Symbol is in section.
1905  return m_section_infos[n_sect].section_sp;
1906  } else if (m_section_infos[n_sect].vm_range.GetByteSize() == 0 &&
1907  m_section_infos[n_sect].vm_range.GetBaseAddress() ==
1908  file_addr) {
1909  // Symbol is in section with zero size, but has the same start address
1910  // as the section. This can happen with linker symbols (symbols that
1911  // start with the letter 'l' or 'L'.
1912  return m_section_infos[n_sect].section_sp;
1913  }
1914  }
1916  }
1917 
1918 protected:
1919  struct SectionInfo {
1921 
1923  SectionSP section_sp;
1924  };
1926  std::vector<SectionInfo> m_section_infos;
1927 };
1928 
1929 #define TRIE_SYMBOL_IS_THUMB (1ULL << 63)
1930 struct TrieEntry {
1931  void Dump() const {
1932  printf("0x%16.16llx 0x%16.16llx 0x%16.16llx \"%s\"",
1933  static_cast<unsigned long long>(address),
1934  static_cast<unsigned long long>(flags),
1935  static_cast<unsigned long long>(other), name.GetCString());
1936  if (import_name)
1937  printf(" -> \"%s\"\n", import_name.GetCString());
1938  else
1939  printf("\n");
1940  }
1943  uint64_t flags =
1944  0; // EXPORT_SYMBOL_FLAGS_REEXPORT, EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER,
1945  // TRIE_SYMBOL_IS_THUMB
1946  uint64_t other = 0;
1948 };
1949 
1953 
1955 
1956  void Dump(uint32_t idx) const {
1957  printf("[%3u] 0x%16.16llx: ", idx,
1958  static_cast<unsigned long long>(nodeOffset));
1959  entry.Dump();
1960  }
1961 
1962  bool operator<(const TrieEntryWithOffset &other) const {
1963  return (nodeOffset < other.nodeOffset);
1964  }
1965 };
1966 
1968  const bool is_arm, addr_t text_seg_base_addr,
1969  std::vector<llvm::StringRef> &nameSlices,
1970  std::set<lldb::addr_t> &resolver_addresses,
1971  std::vector<TrieEntryWithOffset> &reexports,
1972  std::vector<TrieEntryWithOffset> &ext_symbols) {
1973  if (!data.ValidOffset(offset))
1974  return true;
1975 
1976  // Terminal node -- end of a branch, possibly add this to
1977  // the symbol table or resolver table.
1978  const uint64_t terminalSize = data.GetULEB128(&offset);
1979  lldb::offset_t children_offset = offset + terminalSize;
1980  if (terminalSize != 0) {
1981  TrieEntryWithOffset e(offset);
1982  e.entry.flags = data.GetULEB128(&offset);
1983  const char *import_name = nullptr;
1984  if (e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
1985  e.entry.address = 0;
1986  e.entry.other = data.GetULEB128(&offset); // dylib ordinal
1987  import_name = data.GetCStr(&offset);
1988  } else {
1989  e.entry.address = data.GetULEB128(&offset);
1990  if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
1991  e.entry.address += text_seg_base_addr;
1992  if (e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
1993  e.entry.other = data.GetULEB128(&offset);
1994  uint64_t resolver_addr = e.entry.other;
1995  if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
1996  resolver_addr += text_seg_base_addr;
1997  if (is_arm)
1998  resolver_addr &= THUMB_ADDRESS_BIT_MASK;
1999  resolver_addresses.insert(resolver_addr);
2000  } else
2001  e.entry.other = 0;
2002  }
2003  bool add_this_entry = false;
2004  if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT) &&
2005  import_name && import_name[0]) {
2006  // add symbols that are reexport symbols with a valid import name.
2007  add_this_entry = true;
2008  } else if (e.entry.flags == 0 &&
2009  (import_name == nullptr || import_name[0] == '\0')) {
2010  // add externally visible symbols, in case the nlist record has
2011  // been stripped/omitted.
2012  add_this_entry = true;
2013  }
2014  if (add_this_entry) {
2015  std::string name;
2016  if (!nameSlices.empty()) {
2017  for (auto name_slice : nameSlices)
2018  name.append(name_slice.data(), name_slice.size());
2019  }
2020  if (name.size() > 1) {
2021  // Skip the leading '_'
2022  e.entry.name.SetCStringWithLength(name.c_str() + 1, name.size() - 1);
2023  }
2024  if (import_name) {
2025  // Skip the leading '_'
2026  e.entry.import_name.SetCString(import_name + 1);
2027  }
2028  if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT)) {
2029  reexports.push_back(e);
2030  } else {
2031  if (is_arm && (e.entry.address & 1)) {
2034  }
2035  ext_symbols.push_back(e);
2036  }
2037  }
2038  }
2039 
2040  const uint8_t childrenCount = data.GetU8(&children_offset);
2041  for (uint8_t i = 0; i < childrenCount; ++i) {
2042  const char *cstr = data.GetCStr(&children_offset);
2043  if (cstr)
2044  nameSlices.push_back(llvm::StringRef(cstr));
2045  else
2046  return false; // Corrupt data
2047  lldb::offset_t childNodeOffset = data.GetULEB128(&children_offset);
2048  if (childNodeOffset) {
2049  if (!ParseTrieEntries(data, childNodeOffset, is_arm, text_seg_base_addr,
2050  nameSlices, resolver_addresses, reexports,
2051  ext_symbols)) {
2052  return false;
2053  }
2054  }
2055  nameSlices.pop_back();
2056  }
2057  return true;
2058 }
2059 
2060 static SymbolType GetSymbolType(const char *&symbol_name,
2061  bool &demangled_is_synthesized,
2062  const SectionSP &text_section_sp,
2063  const SectionSP &data_section_sp,
2064  const SectionSP &data_dirty_section_sp,
2065  const SectionSP &data_const_section_sp,
2066  const SectionSP &symbol_section) {
2068 
2069  const char *symbol_sect_name = symbol_section->GetName().AsCString();
2070  if (symbol_section->IsDescendant(text_section_sp.get())) {
2071  if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS |
2072  S_ATTR_SELF_MODIFYING_CODE |
2073  S_ATTR_SOME_INSTRUCTIONS))
2074  type = eSymbolTypeData;
2075  else
2076  type = eSymbolTypeCode;
2077  } else if (symbol_section->IsDescendant(data_section_sp.get()) ||
2078  symbol_section->IsDescendant(data_dirty_section_sp.get()) ||
2079  symbol_section->IsDescendant(data_const_section_sp.get())) {
2080  if (symbol_sect_name &&
2081  ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) {
2082  type = eSymbolTypeRuntime;
2083 
2084  if (symbol_name) {
2085  llvm::StringRef symbol_name_ref(symbol_name);
2086  if (symbol_name_ref.startswith("OBJC_")) {
2087  static const llvm::StringRef g_objc_v2_prefix_class("OBJC_CLASS_$_");
2088  static const llvm::StringRef g_objc_v2_prefix_metaclass(
2089  "OBJC_METACLASS_$_");
2090  static const llvm::StringRef g_objc_v2_prefix_ivar("OBJC_IVAR_$_");
2091  if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
2092  symbol_name = symbol_name + g_objc_v2_prefix_class.size();
2093  type = eSymbolTypeObjCClass;
2094  demangled_is_synthesized = true;
2095  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_metaclass)) {
2096  symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
2097  type = eSymbolTypeObjCMetaClass;
2098  demangled_is_synthesized = true;
2099  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_ivar)) {
2100  symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
2101  type = eSymbolTypeObjCIVar;
2102  demangled_is_synthesized = true;
2103  }
2104  }
2105  }
2106  } else if (symbol_sect_name &&
2107  ::strstr(symbol_sect_name, "__gcc_except_tab") ==
2108  symbol_sect_name) {
2109  type = eSymbolTypeException;
2110  } else {
2111  type = eSymbolTypeData;
2112  }
2113  } else if (symbol_sect_name &&
2114  ::strstr(symbol_sect_name, "__IMPORT") == symbol_sect_name) {
2115  type = eSymbolTypeTrampoline;
2116  }
2117  return type;
2118 }
2119 
2120 // Read the UUID out of a dyld_shared_cache file on-disk.
2122  const ByteOrder byte_order,
2123  const uint32_t addr_byte_size) {
2124  UUID dsc_uuid;
2125  DataBufferSP DscData = MapFileData(
2126  dyld_shared_cache, sizeof(struct lldb_copy_dyld_cache_header_v1), 0);
2127  if (!DscData)
2128  return dsc_uuid;
2129  DataExtractor dsc_header_data(DscData, byte_order, addr_byte_size);
2130 
2131  char version_str[7];
2132  lldb::offset_t offset = 0;
2133  memcpy(version_str, dsc_header_data.GetData(&offset, 6), 6);
2134  version_str[6] = '\0';
2135  if (strcmp(version_str, "dyld_v") == 0) {
2136  offset = offsetof(struct lldb_copy_dyld_cache_header_v1, uuid);
2137  dsc_uuid = UUID::fromOptionalData(
2138  dsc_header_data.GetData(&offset, sizeof(uuid_t)), sizeof(uuid_t));
2139  }
2141  if (log && dsc_uuid.IsValid()) {
2142  LLDB_LOGF(log, "Shared cache %s has UUID %s",
2143  dyld_shared_cache.GetPath().c_str(),
2144  dsc_uuid.GetAsString().c_str());
2145  }
2146  return dsc_uuid;
2147 }
2148 
2149 static llvm::Optional<struct nlist_64>
2150 ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset,
2151  size_t nlist_byte_size) {
2152  struct nlist_64 nlist;
2153  if (!nlist_data.ValidOffsetForDataOfSize(nlist_data_offset, nlist_byte_size))
2154  return {};
2155  nlist.n_strx = nlist_data.GetU32_unchecked(&nlist_data_offset);
2156  nlist.n_type = nlist_data.GetU8_unchecked(&nlist_data_offset);
2157  nlist.n_sect = nlist_data.GetU8_unchecked(&nlist_data_offset);
2158  nlist.n_desc = nlist_data.GetU16_unchecked(&nlist_data_offset);
2159  nlist.n_value = nlist_data.GetAddress_unchecked(&nlist_data_offset);
2160  return nlist;
2161 }
2162 
2163 enum { DebugSymbols = true, NonDebugSymbols = false };
2164 
2166  LLDB_SCOPED_TIMERF("ObjectFileMachO::ParseSymtab () module = %s",
2167  m_file.GetFilename().AsCString(""));
2168  ModuleSP module_sp(GetModule());
2169  if (!module_sp)
2170  return 0;
2171 
2172  Progress progress(llvm::formatv("Parsing symbol table for {0}",
2173  m_file.GetFilename().AsCString("<Unknown>")));
2174 
2175  struct symtab_command symtab_load_command = {0, 0, 0, 0, 0, 0};
2176  struct linkedit_data_command function_starts_load_command = {0, 0, 0, 0};
2177  struct dyld_info_command dyld_info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2178  // The data element of type bool indicates that this entry is thumb
2179  // code.
2180  typedef AddressDataArray<lldb::addr_t, bool, 100> FunctionStarts;
2181 
2182  // Record the address of every function/data that we add to the symtab.
2183  // We add symbols to the table in the order of most information (nlist
2184  // records) to least (function starts), and avoid duplicating symbols
2185  // via this set.
2186  std::set<addr_t> symbols_added;
2187  FunctionStarts function_starts;
2189  uint32_t i;
2190  FileSpecList dylib_files;
2192  llvm::StringRef g_objc_v2_prefix_class("_OBJC_CLASS_$_");
2193  llvm::StringRef g_objc_v2_prefix_metaclass("_OBJC_METACLASS_$_");
2194  llvm::StringRef g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
2195 
2196  for (i = 0; i < m_header.ncmds; ++i) {
2197  const lldb::offset_t cmd_offset = offset;
2198  // Read in the load command and load command size
2199  struct load_command lc;
2200  if (m_data.GetU32(&offset, &lc, 2) == nullptr)
2201  break;
2202  // Watch for the symbol table load command
2203  switch (lc.cmd) {
2204  case LC_SYMTAB:
2205  symtab_load_command.cmd = lc.cmd;
2206  symtab_load_command.cmdsize = lc.cmdsize;
2207  // Read in the rest of the symtab load command
2208  if (m_data.GetU32(&offset, &symtab_load_command.symoff, 4) ==
2209  nullptr) // fill in symoff, nsyms, stroff, strsize fields
2210  return 0;
2211  break;
2212 
2213  case LC_DYLD_INFO:
2214  case LC_DYLD_INFO_ONLY:
2215  if (m_data.GetU32(&offset, &dyld_info.rebase_off, 10)) {
2216  dyld_info.cmd = lc.cmd;
2217  dyld_info.cmdsize = lc.cmdsize;
2218  } else {
2219  memset(&dyld_info, 0, sizeof(dyld_info));
2220  }
2221  break;
2222 
2223  case LC_LOAD_DYLIB:
2224  case LC_LOAD_WEAK_DYLIB:
2225  case LC_REEXPORT_DYLIB:
2226  case LC_LOADFVMLIB:
2227  case LC_LOAD_UPWARD_DYLIB: {
2228  uint32_t name_offset = cmd_offset + m_data.GetU32(&offset);
2229  const char *path = m_data.PeekCStr(name_offset);
2230  if (path) {
2231  FileSpec file_spec(path);
2232  // Strip the path if there is @rpath, @executable, etc so we just use
2233  // the basename
2234  if (path[0] == '@')
2235  file_spec.GetDirectory().Clear();
2236 
2237  if (lc.cmd == LC_REEXPORT_DYLIB) {
2238  m_reexported_dylibs.AppendIfUnique(file_spec);
2239  }
2240 
2241  dylib_files.Append(file_spec);
2242  }
2243  } break;
2244 
2245  case LC_FUNCTION_STARTS:
2246  function_starts_load_command.cmd = lc.cmd;
2247  function_starts_load_command.cmdsize = lc.cmdsize;
2248  if (m_data.GetU32(&offset, &function_starts_load_command.dataoff, 2) ==
2249  nullptr) // fill in symoff, nsyms, stroff, strsize fields
2250  memset(&function_starts_load_command, 0,
2251  sizeof(function_starts_load_command));
2252  break;
2253 
2254  default:
2255  break;
2256  }
2257  offset = cmd_offset + lc.cmdsize;
2258  }
2259 
2260  if (!symtab_load_command.cmd)
2261  return 0;
2262 
2263  Symtab *symtab = m_symtab_up.get();
2264  SectionList *section_list = GetSectionList();
2265  if (section_list == nullptr)
2266  return 0;
2267 
2268  const uint32_t addr_byte_size = m_data.GetAddressByteSize();
2269  const ByteOrder byte_order = m_data.GetByteOrder();
2270  bool bit_width_32 = addr_byte_size == 4;
2271  const size_t nlist_byte_size =
2272  bit_width_32 ? sizeof(struct nlist) : sizeof(struct nlist_64);
2273 
2274  DataExtractor nlist_data(nullptr, 0, byte_order, addr_byte_size);
2275  DataExtractor strtab_data(nullptr, 0, byte_order, addr_byte_size);
2276  DataExtractor function_starts_data(nullptr, 0, byte_order, addr_byte_size);
2277  DataExtractor indirect_symbol_index_data(nullptr, 0, byte_order,
2278  addr_byte_size);
2279  DataExtractor dyld_trie_data(nullptr, 0, byte_order, addr_byte_size);
2280 
2281  const addr_t nlist_data_byte_size =
2282  symtab_load_command.nsyms * nlist_byte_size;
2283  const addr_t strtab_data_byte_size = symtab_load_command.strsize;
2284  addr_t strtab_addr = LLDB_INVALID_ADDRESS;
2285 
2286  ProcessSP process_sp(m_process_wp.lock());
2287  Process *process = process_sp.get();
2288 
2289  uint32_t memory_module_load_level = eMemoryModuleLoadLevelComplete;
2290  bool is_shared_cache_image = m_header.flags & MH_DYLIB_IN_CACHE;
2291  bool is_local_shared_cache_image = is_shared_cache_image && !IsInMemory();
2292  SectionSP linkedit_section_sp(
2293  section_list->FindSectionByName(GetSegmentNameLINKEDIT()));
2294 
2295  if (process && m_header.filetype != llvm::MachO::MH_OBJECT &&
2296  !is_local_shared_cache_image) {
2297  Target &target = process->GetTarget();
2298 
2299  memory_module_load_level = target.GetMemoryModuleLoadLevel();
2300 
2301  // Reading mach file from memory in a process or core file...
2302 
2303  if (linkedit_section_sp) {
2304  addr_t linkedit_load_addr =
2305  linkedit_section_sp->GetLoadBaseAddress(&target);
2306  if (linkedit_load_addr == LLDB_INVALID_ADDRESS) {
2307  // We might be trying to access the symbol table before the
2308  // __LINKEDIT's load address has been set in the target. We can't
2309  // fail to read the symbol table, so calculate the right address
2310  // manually
2311  linkedit_load_addr = CalculateSectionLoadAddressForMemoryImage(
2312  m_memory_addr, GetMachHeaderSection(), linkedit_section_sp.get());
2313  }
2314 
2315  const addr_t linkedit_file_offset = linkedit_section_sp->GetFileOffset();
2316  const addr_t symoff_addr = linkedit_load_addr +
2317  symtab_load_command.symoff -
2318  linkedit_file_offset;
2319  strtab_addr = linkedit_load_addr + symtab_load_command.stroff -
2320  linkedit_file_offset;
2321 
2322  // Always load dyld - the dynamic linker - from memory if we didn't
2323  // find a binary anywhere else. lldb will not register
2324  // dylib/framework/bundle loads/unloads if we don't have the dyld
2325  // symbols, we force dyld to load from memory despite the user's
2326  // target.memory-module-load-level setting.
2327  if (memory_module_load_level == eMemoryModuleLoadLevelComplete ||
2328  m_header.filetype == llvm::MachO::MH_DYLINKER) {
2329  DataBufferSP nlist_data_sp(
2330  ReadMemory(process_sp, symoff_addr, nlist_data_byte_size));
2331  if (nlist_data_sp)
2332  nlist_data.SetData(nlist_data_sp, 0, nlist_data_sp->GetByteSize());
2333  if (m_dysymtab.nindirectsyms != 0) {
2334  const addr_t indirect_syms_addr = linkedit_load_addr +
2335  m_dysymtab.indirectsymoff -
2336  linkedit_file_offset;
2337  DataBufferSP indirect_syms_data_sp(ReadMemory(
2338  process_sp, indirect_syms_addr, m_dysymtab.nindirectsyms * 4));
2339  if (indirect_syms_data_sp)
2340  indirect_symbol_index_data.SetData(
2341  indirect_syms_data_sp, 0,
2342  indirect_syms_data_sp->GetByteSize());
2343  // If this binary is outside the shared cache,
2344  // cache the string table.
2345  // Binaries in the shared cache all share a giant string table,
2346  // and we can't share the string tables across multiple
2347  // ObjectFileMachO's, so we'd end up re-reading this mega-strtab
2348  // for every binary in the shared cache - it would be a big perf
2349  // problem. For binaries outside the shared cache, it's faster to
2350  // read the entire strtab at once instead of piece-by-piece as we
2351  // process the nlist records.
2352  if (!is_shared_cache_image) {
2353  DataBufferSP strtab_data_sp(
2354  ReadMemory(process_sp, strtab_addr, strtab_data_byte_size));
2355  if (strtab_data_sp) {
2356  strtab_data.SetData(strtab_data_sp, 0,
2357  strtab_data_sp->GetByteSize());
2358  }
2359  }
2360  }
2361  if (memory_module_load_level >= eMemoryModuleLoadLevelPartial) {
2362  if (function_starts_load_command.cmd) {
2363  const addr_t func_start_addr =
2364  linkedit_load_addr + function_starts_load_command.dataoff -
2365  linkedit_file_offset;
2366  DataBufferSP func_start_data_sp(
2367  ReadMemory(process_sp, func_start_addr,
2368  function_starts_load_command.datasize));
2369  if (func_start_data_sp)
2370  function_starts_data.SetData(func_start_data_sp, 0,
2371  func_start_data_sp->GetByteSize());
2372  }
2373  }
2374  }
2375  }
2376  } else {
2377  if (is_local_shared_cache_image) {
2378  // The load commands in shared cache images are relative to the
2379  // beginning of the shared cache, not the library image. The
2380  // data we get handed when creating the ObjectFileMachO starts
2381  // at the beginning of a specific library and spans to the end
2382  // of the cache to be able to reach the shared LINKEDIT
2383  // segments. We need to convert the load command offsets to be
2384  // relative to the beginning of our specific image.
2385  lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset();
2386  lldb::offset_t linkedit_slide =
2387  linkedit_offset - m_linkedit_original_offset;
2388  symtab_load_command.symoff += linkedit_slide;
2389  symtab_load_command.stroff += linkedit_slide;
2390  dyld_info.export_off += linkedit_slide;
2391  m_dysymtab.indirectsymoff += linkedit_slide;
2392  function_starts_load_command.dataoff += linkedit_slide;
2393  }
2394 
2395  nlist_data.SetData(m_data, symtab_load_command.symoff,
2396  nlist_data_byte_size);
2397  strtab_data.SetData(m_data, symtab_load_command.stroff,
2398  strtab_data_byte_size);
2399 
2400  if (dyld_info.export_size > 0) {
2401  dyld_trie_data.SetData(m_data, dyld_info.export_off,
2402  dyld_info.export_size);
2403  }
2404 
2405  if (m_dysymtab.nindirectsyms != 0) {
2406  indirect_symbol_index_data.SetData(m_data, m_dysymtab.indirectsymoff,
2407  m_dysymtab.nindirectsyms * 4);
2408  }
2409  if (function_starts_load_command.cmd) {
2410  function_starts_data.SetData(m_data, function_starts_load_command.dataoff,
2411  function_starts_load_command.datasize);
2412  }
2413  }
2414 
2415  const bool have_strtab_data = strtab_data.GetByteSize() > 0;
2416 
2417  ConstString g_segment_name_TEXT = GetSegmentNameTEXT();
2418  ConstString g_segment_name_DATA = GetSegmentNameDATA();
2419  ConstString g_segment_name_DATA_DIRTY = GetSegmentNameDATA_DIRTY();
2420  ConstString g_segment_name_DATA_CONST = GetSegmentNameDATA_CONST();
2421  ConstString g_segment_name_OBJC = GetSegmentNameOBJC();
2422  ConstString g_section_name_eh_frame = GetSectionNameEHFrame();
2423  SectionSP text_section_sp(
2424  section_list->FindSectionByName(g_segment_name_TEXT));
2425  SectionSP data_section_sp(
2426  section_list->FindSectionByName(g_segment_name_DATA));
2427  SectionSP data_dirty_section_sp(
2428  section_list->FindSectionByName(g_segment_name_DATA_DIRTY));
2429  SectionSP data_const_section_sp(
2430  section_list->FindSectionByName(g_segment_name_DATA_CONST));
2431  SectionSP objc_section_sp(
2432  section_list->FindSectionByName(g_segment_name_OBJC));
2433  SectionSP eh_frame_section_sp;
2434  if (text_section_sp.get())
2435  eh_frame_section_sp = text_section_sp->GetChildren().FindSectionByName(
2436  g_section_name_eh_frame);
2437  else
2438  eh_frame_section_sp =
2439  section_list->FindSectionByName(g_section_name_eh_frame);
2440 
2441  const bool is_arm = (m_header.cputype == llvm::MachO::CPU_TYPE_ARM);
2442  const bool always_thumb = GetArchitecture().IsAlwaysThumbInstructions();
2443 
2444  // lldb works best if it knows the start address of all functions in a
2445  // module. Linker symbols or debug info are normally the best source of
2446  // information for start addr / size but they may be stripped in a released
2447  // binary. Two additional sources of information exist in Mach-O binaries:
2448  // LC_FUNCTION_STARTS - a list of ULEB128 encoded offsets of each
2449  // function's start address in the
2450  // binary, relative to the text section.
2451  // eh_frame - the eh_frame FDEs have the start addr & size of
2452  // each function
2453  // LC_FUNCTION_STARTS is the fastest source to read in, and is present on
2454  // all modern binaries.
2455  // Binaries built to run on older releases may need to use eh_frame
2456  // information.
2457 
2458  if (text_section_sp && function_starts_data.GetByteSize()) {
2459  FunctionStarts::Entry function_start_entry;
2460  function_start_entry.data = false;
2461  lldb::offset_t function_start_offset = 0;
2462  function_start_entry.addr = text_section_sp->GetFileAddress();
2463  uint64_t delta;
2464  while ((delta = function_starts_data.GetULEB128(&function_start_offset)) >
2465  0) {
2466  // Now append the current entry
2467  function_start_entry.addr += delta;
2468  if (is_arm) {
2469  if (function_start_entry.addr & 1) {
2470  function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2471  function_start_entry.data = true;
2472  } else if (always_thumb) {
2473  function_start_entry.data = true;
2474  }
2475  }
2476  function_starts.Append(function_start_entry);
2477  }
2478  } else {
2479  // If m_type is eTypeDebugInfo, then this is a dSYM - it will have the
2480  // load command claiming an eh_frame but it doesn't actually have the
2481  // eh_frame content. And if we have a dSYM, we don't need to do any of
2482  // this fill-in-the-missing-symbols works anyway - the debug info should
2483  // give us all the functions in the module.
2484  if (text_section_sp.get() && eh_frame_section_sp.get() &&
2485  m_type != eTypeDebugInfo) {
2486  DWARFCallFrameInfo eh_frame(*this, eh_frame_section_sp,
2487  DWARFCallFrameInfo::EH);
2489  eh_frame.GetFunctionAddressAndSizeVector(functions);
2490  addr_t text_base_addr = text_section_sp->GetFileAddress();
2491  size_t count = functions.GetSize();
2492  for (size_t i = 0; i < count; ++i) {
2494  functions.GetEntryAtIndex(i);
2495  if (func) {
2496  FunctionStarts::Entry function_start_entry;
2497  function_start_entry.addr = func->base - text_base_addr;
2498  if (is_arm) {
2499  if (function_start_entry.addr & 1) {
2500  function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2501  function_start_entry.data = true;
2502  } else if (always_thumb) {
2503  function_start_entry.data = true;
2504  }
2505  }
2506  function_starts.Append(function_start_entry);
2507  }
2508  }
2509  }
2510  }
2511 
2512  const size_t function_starts_count = function_starts.GetSize();
2513 
2514  // For user process binaries (executables, dylibs, frameworks, bundles), if
2515  // we don't have LC_FUNCTION_STARTS/eh_frame section in this binary, we're
2516  // going to assume the binary has been stripped. Don't allow assembly
2517  // language instruction emulation because we don't know proper function
2518  // start boundaries.
2519  //
2520  // For all other types of binaries (kernels, stand-alone bare board
2521  // binaries, kexts), they may not have LC_FUNCTION_STARTS / eh_frame
2522  // sections - we should not make any assumptions about them based on that.
2523  if (function_starts_count == 0 && CalculateStrata() == eStrataUser) {
2525  Log *unwind_or_symbol_log(lldb_private::GetLogIfAnyCategoriesSet(
2527 
2528  if (unwind_or_symbol_log)
2529  module_sp->LogMessage(
2530  unwind_or_symbol_log,
2531  "no LC_FUNCTION_STARTS, will not allow assembly profiled unwinds");
2532  }
2533 
2534  const user_id_t TEXT_eh_frame_sectID = eh_frame_section_sp.get()
2535  ? eh_frame_section_sp->GetID()
2536  : static_cast<user_id_t>(NO_SECT);
2537 
2538  lldb::offset_t nlist_data_offset = 0;
2539 
2540  uint32_t N_SO_index = UINT32_MAX;
2541 
2542  MachSymtabSectionInfo section_info(section_list);
2543  std::vector<uint32_t> N_FUN_indexes;
2544  std::vector<uint32_t> N_NSYM_indexes;
2545  std::vector<uint32_t> N_INCL_indexes;
2546  std::vector<uint32_t> N_BRAC_indexes;
2547  std::vector<uint32_t> N_COMM_indexes;
2548  typedef std::multimap<uint64_t, uint32_t> ValueToSymbolIndexMap;
2549  typedef llvm::DenseMap<uint32_t, uint32_t> NListIndexToSymbolIndexMap;
2550  typedef llvm::DenseMap<const char *, uint32_t> ConstNameToSymbolIndexMap;
2551  ValueToSymbolIndexMap N_FUN_addr_to_sym_idx;
2552  ValueToSymbolIndexMap N_STSYM_addr_to_sym_idx;
2553  ConstNameToSymbolIndexMap N_GSYM_name_to_sym_idx;
2554  // Any symbols that get merged into another will get an entry in this map
2555  // so we know
2556  NListIndexToSymbolIndexMap m_nlist_idx_to_sym_idx;
2557  uint32_t nlist_idx = 0;
2558  Symbol *symbol_ptr = nullptr;
2559 
2560  uint32_t sym_idx = 0;
2561  Symbol *sym = nullptr;
2562  size_t num_syms = 0;
2563  std::string memory_symbol_name;
2564  uint32_t unmapped_local_symbols_found = 0;
2565 
2566  std::vector<TrieEntryWithOffset> reexport_trie_entries;
2567  std::vector<TrieEntryWithOffset> external_sym_trie_entries;
2568  std::set<lldb::addr_t> resolver_addresses;
2569 
2570  if (dyld_trie_data.GetByteSize() > 0) {
2571  ConstString text_segment_name("__TEXT");
2572  SectionSP text_segment_sp =
2573  GetSectionList()->FindSectionByName(text_segment_name);
2574  lldb::addr_t text_segment_file_addr = LLDB_INVALID_ADDRESS;
2575  if (text_segment_sp)
2576  text_segment_file_addr = text_segment_sp->GetFileAddress();
2577  std::vector<llvm::StringRef> nameSlices;
2578  ParseTrieEntries(dyld_trie_data, 0, is_arm, text_segment_file_addr,
2579  nameSlices, resolver_addresses, reexport_trie_entries,
2580  external_sym_trie_entries);
2581  }
2582 
2583  typedef std::set<ConstString> IndirectSymbols;
2584  IndirectSymbols indirect_symbol_names;
2585 
2586 #if TARGET_OS_IPHONE
2587 
2588  // Some recent builds of the dyld_shared_cache (hereafter: DSC) have been
2589  // optimized by moving LOCAL symbols out of the memory mapped portion of
2590  // the DSC. The symbol information has all been retained, but it isn't
2591  // available in the normal nlist data. However, there *are* duplicate
2592  // entries of *some*
2593  // LOCAL symbols in the normal nlist data. To handle this situation
2594  // correctly, we must first attempt
2595  // to parse any DSC unmapped symbol information. If we find any, we set a
2596  // flag that tells the normal nlist parser to ignore all LOCAL symbols.
2597 
2598  if (m_header.flags & MH_DYLIB_IN_CACHE) {
2599  // Before we can start mapping the DSC, we need to make certain the
2600  // target process is actually using the cache we can find.
2601 
2602  // Next we need to determine the correct path for the dyld shared cache.
2603 
2604  ArchSpec header_arch = GetArchitecture();
2605  char dsc_path[PATH_MAX];
2606  char dsc_path_development[PATH_MAX];
2607 
2608  snprintf(
2609  dsc_path, sizeof(dsc_path), "%s%s%s",
2610  "/System/Library/Caches/com.apple.dyld/", /* IPHONE_DYLD_SHARED_CACHE_DIR
2611  */
2612  "dyld_shared_cache_", /* DYLD_SHARED_CACHE_BASE_NAME */
2613  header_arch.GetArchitectureName());
2614 
2615  snprintf(
2616  dsc_path_development, sizeof(dsc_path), "%s%s%s%s",
2617  "/System/Library/Caches/com.apple.dyld/", /* IPHONE_DYLD_SHARED_CACHE_DIR
2618  */
2619  "dyld_shared_cache_", /* DYLD_SHARED_CACHE_BASE_NAME */
2620  header_arch.GetArchitectureName(), ".development");
2621 
2622  FileSpec dsc_nondevelopment_filespec(dsc_path);
2623  FileSpec dsc_development_filespec(dsc_path_development);
2624  FileSpec dsc_filespec;
2625 
2626  UUID dsc_uuid;
2627  UUID process_shared_cache_uuid;
2628  addr_t process_shared_cache_base_addr;
2629 
2630  if (process) {
2631  GetProcessSharedCacheUUID(process, process_shared_cache_base_addr,
2632  process_shared_cache_uuid);
2633  }
2634 
2635  // First see if we can find an exact match for the inferior process
2636  // shared cache UUID in the development or non-development shared caches
2637  // on disk.
2638  if (process_shared_cache_uuid.IsValid()) {
2639  if (FileSystem::Instance().Exists(dsc_development_filespec)) {
2640  UUID dsc_development_uuid = GetSharedCacheUUID(
2641  dsc_development_filespec, byte_order, addr_byte_size);
2642  if (dsc_development_uuid.IsValid() &&
2643  dsc_development_uuid == process_shared_cache_uuid) {
2644  dsc_filespec = dsc_development_filespec;
2645  dsc_uuid = dsc_development_uuid;
2646  }
2647  }
2648  if (!dsc_uuid.IsValid() &&
2649  FileSystem::Instance().Exists(dsc_nondevelopment_filespec)) {
2650  UUID dsc_nondevelopment_uuid = GetSharedCacheUUID(
2651  dsc_nondevelopment_filespec, byte_order, addr_byte_size);
2652  if (dsc_nondevelopment_uuid.IsValid() &&
2653  dsc_nondevelopment_uuid == process_shared_cache_uuid) {
2654  dsc_filespec = dsc_nondevelopment_filespec;
2655  dsc_uuid = dsc_nondevelopment_uuid;
2656  }
2657  }
2658  }
2659 
2660  // Failing a UUID match, prefer the development dyld_shared cache if both
2661  // are present.
2662  if (!FileSystem::Instance().Exists(dsc_filespec)) {
2663  if (FileSystem::Instance().Exists(dsc_development_filespec)) {
2664  dsc_filespec = dsc_development_filespec;
2665  } else {
2666  dsc_filespec = dsc_nondevelopment_filespec;
2667  }
2668  }
2669 
2670  /* The dyld_cache_header has a pointer to the
2671  dyld_cache_local_symbols_info structure (localSymbolsOffset).
2672  The dyld_cache_local_symbols_info structure gives us three things:
2673  1. The start and count of the nlist records in the dyld_shared_cache
2674  file
2675  2. The start and size of the strings for these nlist records
2676  3. The start and count of dyld_cache_local_symbols_entry entries
2677 
2678  There is one dyld_cache_local_symbols_entry per dylib/framework in the
2679  dyld shared cache.
2680  The "dylibOffset" field is the Mach-O header of this dylib/framework in
2681  the dyld shared cache.
2682  The dyld_cache_local_symbols_entry also lists the start of this
2683  dylib/framework's nlist records
2684  and the count of how many nlist records there are for this
2685  dylib/framework.
2686  */
2687 
2688  // Process the dyld shared cache header to find the unmapped symbols
2689 
2690  DataBufferSP dsc_data_sp = MapFileData(
2691  dsc_filespec, sizeof(struct lldb_copy_dyld_cache_header_v1), 0);
2692  if (!dsc_uuid.IsValid()) {
2693  dsc_uuid = GetSharedCacheUUID(dsc_filespec, byte_order, addr_byte_size);
2694  }
2695  if (dsc_data_sp) {
2696  DataExtractor dsc_header_data(dsc_data_sp, byte_order, addr_byte_size);
2697 
2698  bool uuid_match = true;
2699  if (dsc_uuid.IsValid() && process) {
2700  if (process_shared_cache_uuid.IsValid() &&
2701  dsc_uuid != process_shared_cache_uuid) {
2702  // The on-disk dyld_shared_cache file is not the same as the one in
2703  // this process' memory, don't use it.
2704  uuid_match = false;
2705  ModuleSP module_sp(GetModule());
2706  if (module_sp)
2707  module_sp->ReportWarning("process shared cache does not match "
2708  "on-disk dyld_shared_cache file, some "
2709  "symbol names will be missing.");
2710  }
2711  }
2712 
2713  offset = offsetof(struct lldb_copy_dyld_cache_header_v1, mappingOffset);
2714 
2715  uint32_t mappingOffset = dsc_header_data.GetU32(&offset);
2716 
2717  // If the mappingOffset points to a location inside the header, we've
2718  // opened an old dyld shared cache, and should not proceed further.
2719  if (uuid_match &&
2720  mappingOffset >= sizeof(struct lldb_copy_dyld_cache_header_v1)) {
2721 
2722  DataBufferSP dsc_mapping_info_data_sp = MapFileData(
2723  dsc_filespec, sizeof(struct lldb_copy_dyld_cache_mapping_info),
2724  mappingOffset);
2725 
2726  DataExtractor dsc_mapping_info_data(dsc_mapping_info_data_sp,
2727  byte_order, addr_byte_size);
2728  offset = 0;
2729 
2730  // The File addresses (from the in-memory Mach-O load commands) for
2731  // the shared libraries in the shared library cache need to be
2732  // adjusted by an offset to match up with the dylibOffset identifying
2733  // field in the dyld_cache_local_symbol_entry's. This offset is
2734  // recorded in mapping_offset_value.
2735  const uint64_t mapping_offset_value =
2736  dsc_mapping_info_data.GetU64(&offset);
2737 
2738  offset =
2739  offsetof(struct lldb_copy_dyld_cache_header_v1, localSymbolsOffset);
2740  uint64_t localSymbolsOffset = dsc_header_data.GetU64(&offset);
2741  uint64_t localSymbolsSize = dsc_header_data.GetU64(&offset);
2742 
2743  if (localSymbolsOffset && localSymbolsSize) {
2744  // Map the local symbols
2745  DataBufferSP dsc_local_symbols_data_sp =
2746  MapFileData(dsc_filespec, localSymbolsSize, localSymbolsOffset);
2747 
2748  if (dsc_local_symbols_data_sp) {
2749  DataExtractor dsc_local_symbols_data(dsc_local_symbols_data_sp,
2750  byte_order, addr_byte_size);
2751 
2752  offset = 0;
2753 
2754  typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
2755  typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
2756  UndefinedNameToDescMap undefined_name_to_desc;
2757  SymbolIndexToName reexport_shlib_needs_fixup;
2758 
2759  // Read the local_symbols_infos struct in one shot
2760  struct lldb_copy_dyld_cache_local_symbols_info local_symbols_info;
2761  dsc_local_symbols_data.GetU32(&offset,
2762  &local_symbols_info.nlistOffset, 6);
2763 
2764  SectionSP text_section_sp(
2765  section_list->FindSectionByName(GetSegmentNameTEXT()));
2766 
2767  uint32_t header_file_offset =
2768  (text_section_sp->GetFileAddress() - mapping_offset_value);
2769 
2770  offset = local_symbols_info.entriesOffset;
2771  for (uint32_t entry_index = 0;
2772  entry_index < local_symbols_info.entriesCount; entry_index++) {
2774  local_symbols_entry;
2775  local_symbols_entry.dylibOffset =
2776  dsc_local_symbols_data.GetU32(&offset);
2777  local_symbols_entry.nlistStartIndex =
2778  dsc_local_symbols_data.GetU32(&offset);
2779  local_symbols_entry.nlistCount =
2780  dsc_local_symbols_data.GetU32(&offset);
2781 
2782  if (header_file_offset == local_symbols_entry.dylibOffset) {
2783  unmapped_local_symbols_found = local_symbols_entry.nlistCount;
2784 
2785  // The normal nlist code cannot correctly size the Symbols
2786  // array, we need to allocate it here.
2787  sym = symtab->Resize(
2788  symtab_load_command.nsyms + m_dysymtab.nindirectsyms +
2789  unmapped_local_symbols_found - m_dysymtab.nlocalsym);
2790  num_syms = symtab->GetNumSymbols();
2791 
2792  nlist_data_offset =
2793  local_symbols_info.nlistOffset +
2794  (nlist_byte_size * local_symbols_entry.nlistStartIndex);
2795  uint32_t string_table_offset = local_symbols_info.stringsOffset;
2796 
2797  for (uint32_t nlist_index = 0;
2798  nlist_index < local_symbols_entry.nlistCount;
2799  nlist_index++) {
2800  /////////////////////////////
2801  {
2802  llvm::Optional<struct nlist_64> nlist_maybe =
2803  ParseNList(dsc_local_symbols_data, nlist_data_offset,
2804  nlist_byte_size);
2805  if (!nlist_maybe)
2806  break;
2807  struct nlist_64 nlist = *nlist_maybe;
2808 
2810  const char *symbol_name = dsc_local_symbols_data.PeekCStr(
2811  string_table_offset + nlist.n_strx);
2812 
2813  if (symbol_name == NULL) {
2814  // No symbol should be NULL, even the symbols with no
2815  // string values should have an offset zero which
2816  // points to an empty C-string
2817  Host::SystemLog(
2818  Host::eSystemLogError,
2819  "error: DSC unmapped local symbol[%u] has invalid "
2820  "string table offset 0x%x in %s, ignoring symbol\n",
2821  entry_index, nlist.n_strx,
2822  module_sp->GetFileSpec().GetPath().c_str());
2823  continue;
2824  }
2825  if (symbol_name[0] == '\0')
2826  symbol_name = NULL;
2827 
2828  const char *symbol_name_non_abi_mangled = NULL;
2829 
2830  SectionSP symbol_section;
2831  uint32_t symbol_byte_size = 0;
2832  bool add_nlist = true;
2833  bool is_debug = ((nlist.n_type & N_STAB) != 0);
2834  bool demangled_is_synthesized = false;
2835  bool is_gsym = false;
2836  bool set_value = true;
2837 
2838  assert(sym_idx < num_syms);
2839 
2840  sym[sym_idx].SetDebug(is_debug);
2841 
2842  if (is_debug) {
2843  switch (nlist.n_type) {
2844  case N_GSYM:
2845  // global symbol: name,,NO_SECT,type,0
2846  // Sometimes the N_GSYM value contains the address.
2847 
2848  // FIXME: In the .o files, we have a GSYM and a debug
2849  // symbol for all the ObjC data. They
2850  // have the same address, but we want to ensure that
2851  // we always find only the real symbol, 'cause we
2852  // don't currently correctly attribute the
2853  // GSYM one to the ObjCClass/Ivar/MetaClass
2854  // symbol type. This is a temporary hack to make
2855  // sure the ObjectiveC symbols get treated correctly.
2856  // To do this right, we should coalesce all the GSYM
2857  // & global symbols that have the same address.
2858 
2859  is_gsym = true;
2860  sym[sym_idx].SetExternal(true);
2861 
2862  if (symbol_name && symbol_name[0] == '_' &&
2863  symbol_name[1] == 'O') {
2864  llvm::StringRef symbol_name_ref(symbol_name);
2865  if (symbol_name_ref.startswith(
2866  g_objc_v2_prefix_class)) {
2867  symbol_name_non_abi_mangled = symbol_name + 1;
2868  symbol_name =
2869  symbol_name + g_objc_v2_prefix_class.size();
2870  type = eSymbolTypeObjCClass;
2871  demangled_is_synthesized = true;
2872 
2873  } else if (symbol_name_ref.startswith(
2874  g_objc_v2_prefix_metaclass)) {
2875  symbol_name_non_abi_mangled = symbol_name + 1;
2876  symbol_name =
2877  symbol_name + g_objc_v2_prefix_metaclass.size();
2878  type = eSymbolTypeObjCMetaClass;
2879  demangled_is_synthesized = true;
2880  } else if (symbol_name_ref.startswith(
2881  g_objc_v2_prefix_ivar)) {
2882  symbol_name_non_abi_mangled = symbol_name + 1;
2883  symbol_name =
2884  symbol_name + g_objc_v2_prefix_ivar.size();
2885  type = eSymbolTypeObjCIVar;
2886  demangled_is_synthesized = true;
2887  }
2888  } else {
2889  if (nlist.n_value != 0)
2890  symbol_section = section_info.GetSection(
2891  nlist.n_sect, nlist.n_value);
2892  type = eSymbolTypeData;
2893  }
2894  break;
2895 
2896  case N_FNAME:
2897  // procedure name (f77 kludge): name,,NO_SECT,0,0
2898  type = eSymbolTypeCompiler;
2899  break;
2900 
2901  case N_FUN:
2902  // procedure: name,,n_sect,linenumber,address
2903  if (symbol_name) {
2904  type = eSymbolTypeCode;
2905  symbol_section = section_info.GetSection(
2906  nlist.n_sect, nlist.n_value);
2907 
2908  N_FUN_addr_to_sym_idx.insert(
2909  std::make_pair(nlist.n_value, sym_idx));
2910  // We use the current number of symbols in the
2911  // symbol table in lieu of using nlist_idx in case
2912  // we ever start trimming entries out
2913  N_FUN_indexes.push_back(sym_idx);
2914  } else {
2915  type = eSymbolTypeCompiler;
2916 
2917  if (!N_FUN_indexes.empty()) {
2918  // Copy the size of the function into the
2919  // original
2920  // STAB entry so we don't have
2921  // to hunt for it later
2922  symtab->SymbolAtIndex(N_FUN_indexes.back())
2923  ->SetByteSize(nlist.n_value);
2924  N_FUN_indexes.pop_back();
2925  // We don't really need the end function STAB as
2926  // it contains the size which we already placed
2927  // with the original symbol, so don't add it if
2928  // we want a minimal symbol table
2929  add_nlist = false;
2930  }
2931  }
2932  break;
2933 
2934  case N_STSYM:
2935  // static symbol: name,,n_sect,type,address
2936  N_STSYM_addr_to_sym_idx.insert(
2937  std::make_pair(nlist.n_value, sym_idx));
2938  symbol_section = section_info.GetSection(nlist.n_sect,
2939  nlist.n_value);
2940  if (symbol_name && symbol_name[0]) {
2941  type = ObjectFile::GetSymbolTypeFromName(
2942  symbol_name + 1, eSymbolTypeData);
2943  }
2944  break;
2945 
2946  case N_LCSYM:
2947  // .lcomm symbol: name,,n_sect,type,address
2948  symbol_section = section_info.GetSection(nlist.n_sect,
2949  nlist.n_value);
2950  type = eSymbolTypeCommonBlock;
2951  break;
2952 
2953  case N_BNSYM:
2954  // We use the current number of symbols in the symbol
2955  // table in lieu of using nlist_idx in case we ever
2956  // start trimming entries out Skip these if we want
2957  // minimal symbol tables
2958  add_nlist = false;
2959  break;
2960 
2961  case N_ENSYM:
2962  // Set the size of the N_BNSYM to the terminating
2963  // index of this N_ENSYM so that we can always skip
2964  // the entire symbol if we need to navigate more
2965  // quickly at the source level when parsing STABS
2966  // Skip these if we want minimal symbol tables
2967  add_nlist = false;
2968  break;
2969 
2970  case N_OPT:
2971  // emitted with gcc2_compiled and in gcc source
2972  type = eSymbolTypeCompiler;
2973  break;
2974 
2975  case N_RSYM:
2976  // register sym: name,,NO_SECT,type,register
2977  type = eSymbolTypeVariable;
2978  break;
2979 
2980  case N_SLINE:
2981  // src line: 0,,n_sect,linenumber,address
2982  symbol_section = section_info.GetSection(nlist.n_sect,
2983  nlist.n_value);
2984  type = eSymbolTypeLineEntry;
2985  break;
2986 
2987  case N_SSYM:
2988  // structure elt: name,,NO_SECT,type,struct_offset
2989  type = eSymbolTypeVariableType;
2990  break;
2991 
2992  case N_SO:
2993  // source file name
2994  type = eSymbolTypeSourceFile;
2995  if (symbol_name == NULL) {
2996  add_nlist = false;
2997  if (N_SO_index != UINT32_MAX) {
2998  // Set the size of the N_SO to the terminating
2999  // index of this N_SO so that we can always skip
3000  // the entire N_SO if we need to navigate more
3001  // quickly at the source level when parsing STABS
3002  symbol_ptr = symtab->SymbolAtIndex(N_SO_index);
3003  symbol_ptr->SetByteSize(sym_idx);
3004  symbol_ptr->SetSizeIsSibling(true);
3005  }
3006  N_NSYM_indexes.clear();
3007  N_INCL_indexes.clear();
3008  N_BRAC_indexes.clear();
3009  N_COMM_indexes.clear();
3010  N_FUN_indexes.clear();
3011  N_SO_index = UINT32_MAX;
3012  } else {
3013  // We use the current number of symbols in the
3014  // symbol table in lieu of using nlist_idx in case
3015  // we ever start trimming entries out
3016  const bool N_SO_has_full_path = symbol_name[0] == '/';
3017  if (N_SO_has_full_path) {
3018  if ((N_SO_index == sym_idx - 1) &&
3019  ((sym_idx - 1) < num_syms)) {
3020  // We have two consecutive N_SO entries where
3021  // the first contains a directory and the
3022  // second contains a full path.
3023  sym[sym_idx - 1].GetMangled().SetValue(
3024  ConstString(symbol_name), false);
3025  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3026  add_nlist = false;
3027  } else {
3028  // This is the first entry in a N_SO that
3029  // contains a directory or
3030  // a full path to the source file
3031  N_SO_index = sym_idx;
3032  }
3033  } else if ((N_SO_index == sym_idx - 1) &&
3034  ((sym_idx - 1) < num_syms)) {
3035  // This is usually the second N_SO entry that
3036  // contains just the filename, so here we combine
3037  // it with the first one if we are minimizing the
3038  // symbol table
3039  const char *so_path = sym[sym_idx - 1]
3040  .GetMangled()
3041  .GetDemangledName()
3042  .AsCString();
3043  if (so_path && so_path[0]) {
3044  std::string full_so_path(so_path);
3045  const size_t double_slash_pos =
3046  full_so_path.find("//");
3047  if (double_slash_pos != std::string::npos) {
3048  // The linker has been generating bad N_SO
3049  // entries with doubled up paths
3050  // in the format "%s%s" where the first
3051  // string in the DW_AT_comp_dir, and the
3052  // second is the directory for the source
3053  // file so you end up with a path that looks
3054  // like "/tmp/src//tmp/src/"
3055  FileSpec so_dir(so_path);
3056  if (!FileSystem::Instance().Exists(so_dir)) {
3057  so_dir.SetFile(
3058  &full_so_path[double_slash_pos + 1],
3059  FileSpec::Style::native);
3060  if (FileSystem::Instance().Exists(so_dir)) {
3061  // Trim off the incorrect path
3062  full_so_path.erase(0, double_slash_pos + 1);
3063  }
3064  }
3065  }
3066  if (*full_so_path.rbegin() != '/')
3067  full_so_path += '/';
3068  full_so_path += symbol_name;
3069  sym[sym_idx - 1].GetMangled().SetValue(
3070  ConstString(full_so_path.c_str()), false);
3071  add_nlist = false;
3072  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3073  }
3074  } else {
3075  // This could be a relative path to a N_SO
3076  N_SO_index = sym_idx;
3077  }
3078  }
3079  break;
3080 
3081  case N_OSO:
3082  // object file name: name,,0,0,st_mtime
3083  type = eSymbolTypeObjectFile;
3084  break;
3085 
3086  case N_LSYM:
3087  // local sym: name,,NO_SECT,type,offset
3088  type = eSymbolTypeLocal;
3089  break;
3090 
3091  // INCL scopes
3092  case N_BINCL:
3093  // include file beginning: name,,NO_SECT,0,sum We use
3094  // the current number of symbols in the symbol table
3095  // in lieu of using nlist_idx in case we ever start
3096  // trimming entries out
3097  N_INCL_indexes.push_back(sym_idx);
3098  type = eSymbolTypeScopeBegin;
3099  break;
3100 
3101  case N_EINCL:
3102  // include file end: name,,NO_SECT,0,0
3103  // Set the size of the N_BINCL to the terminating
3104  // index of this N_EINCL so that we can always skip
3105  // the entire symbol if we need to navigate more
3106  // quickly at the source level when parsing STABS
3107  if (!N_INCL_indexes.empty()) {
3108  symbol_ptr =
3109  symtab->SymbolAtIndex(N_INCL_indexes.back());
3110  symbol_ptr->SetByteSize(sym_idx + 1);
3111  symbol_ptr->SetSizeIsSibling(true);
3112  N_INCL_indexes.pop_back();
3113  }
3114  type = eSymbolTypeScopeEnd;
3115  break;
3116 
3117  case N_SOL:
3118  // #included file name: name,,n_sect,0,address
3119  type = eSymbolTypeHeaderFile;
3120 
3121  // We currently don't use the header files on darwin
3122  add_nlist = false;
3123  break;
3124 
3125  case N_PARAMS:
3126  // compiler parameters: name,,NO_SECT,0,0
3127  type = eSymbolTypeCompiler;
3128  break;
3129 
3130  case N_VERSION:
3131  // compiler version: name,,NO_SECT,0,0
3132  type = eSymbolTypeCompiler;
3133  break;
3134 
3135  case N_OLEVEL:
3136  // compiler -O level: name,,NO_SECT,0,0
3137  type = eSymbolTypeCompiler;
3138  break;
3139 
3140  case N_PSYM:
3141  // parameter: name,,NO_SECT,type,offset
3142  type = eSymbolTypeVariable;
3143  break;
3144 
3145  case N_ENTRY:
3146  // alternate entry: name,,n_sect,linenumber,address
3147  symbol_section = section_info.GetSection(nlist.n_sect,
3148  nlist.n_value);
3149  type = eSymbolTypeLineEntry;
3150  break;
3151 
3152  // Left and Right Braces
3153  case N_LBRAC:
3154  // left bracket: 0,,NO_SECT,nesting level,address We
3155  // use the current number of symbols in the symbol
3156  // table in lieu of using nlist_idx in case we ever
3157  // start trimming entries out
3158  symbol_section = section_info.GetSection(nlist.n_sect,
3159  nlist.n_value);
3160  N_BRAC_indexes.push_back(sym_idx);
3161  type = eSymbolTypeScopeBegin;
3162  break;
3163 
3164  case N_RBRAC:
3165  // right bracket: 0,,NO_SECT,nesting level,address
3166  // Set the size of the N_LBRAC to the terminating
3167  // index of this N_RBRAC so that we can always skip
3168  // the entire symbol if we need to navigate more
3169  // quickly at the source level when parsing STABS
3170  symbol_section = section_info.GetSection(nlist.n_sect,
3171  nlist.n_value);
3172  if (!N_BRAC_indexes.empty()) {
3173  symbol_ptr =
3174  symtab->SymbolAtIndex(N_BRAC_indexes.back());
3175  symbol_ptr->SetByteSize(sym_idx + 1);
3176  symbol_ptr->SetSizeIsSibling(true);
3177  N_BRAC_indexes.pop_back();
3178  }
3179  type = eSymbolTypeScopeEnd;
3180  break;
3181 
3182  case N_EXCL:
3183  // deleted include file: name,,NO_SECT,0,sum
3184  type = eSymbolTypeHeaderFile;
3185  break;
3186 
3187  // COMM scopes
3188  case N_BCOMM:
3189  // begin common: name,,NO_SECT,0,0
3190  // We use the current number of symbols in the symbol
3191  // table in lieu of using nlist_idx in case we ever
3192  // start trimming entries out
3193  type = eSymbolTypeScopeBegin;
3194  N_COMM_indexes.push_back(sym_idx);
3195  break;
3196 
3197  case N_ECOML:
3198  // end common (local name): 0,,n_sect,0,address
3199  symbol_section = section_info.GetSection(nlist.n_sect,
3200  nlist.n_value);
3201  // Fall through
3202 
3203  case N_ECOMM:
3204  // end common: name,,n_sect,0,0
3205  // Set the size of the N_BCOMM to the terminating
3206  // index of this N_ECOMM/N_ECOML so that we can
3207  // always skip the entire symbol if we need to
3208  // navigate more quickly at the source level when
3209  // parsing STABS
3210  if (!N_COMM_indexes.empty()) {
3211  symbol_ptr =
3212  symtab->SymbolAtIndex(N_COMM_indexes.back());
3213  symbol_ptr->SetByteSize(sym_idx + 1);
3214  symbol_ptr->SetSizeIsSibling(true);
3215  N_COMM_indexes.pop_back();
3216  }
3217  type = eSymbolTypeScopeEnd;
3218  break;
3219 
3220  case N_LENG:
3221  // second stab entry with length information
3222  type = eSymbolTypeAdditional;
3223  break;
3224 
3225  default:
3226  break;
3227  }
3228  } else {
3229  // uint8_t n_pext = N_PEXT & nlist.n_type;
3230  uint8_t n_type = N_TYPE & nlist.n_type;
3231  sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
3232 
3233  switch (n_type) {
3234  case N_INDR: {
3235  const char *reexport_name_cstr =
3236  strtab_data.PeekCStr(nlist.n_value);
3237  if (reexport_name_cstr && reexport_name_cstr[0]) {
3238  type = eSymbolTypeReExported;
3239  ConstString reexport_name(
3240  reexport_name_cstr +
3241  ((reexport_name_cstr[0] == '_') ? 1 : 0));
3242  sym[sym_idx].SetReExportedSymbolName(reexport_name);
3243  set_value = false;
3244  reexport_shlib_needs_fixup[sym_idx] = reexport_name;
3245  indirect_symbol_names.insert(ConstString(
3246  symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
3247  } else
3248  type = eSymbolTypeUndefined;
3249  } break;
3250 
3251  case N_UNDF:
3252  if (symbol_name && symbol_name[0]) {
3253  ConstString undefined_name(
3254  symbol_name + ((symbol_name[0] == '_') ? 1 : 0));
3255  undefined_name_to_desc[undefined_name] = nlist.n_desc;
3256  }
3257  // Fall through
3258  case N_PBUD:
3259  type = eSymbolTypeUndefined;
3260  break;
3261 
3262  case N_ABS:
3263  type = eSymbolTypeAbsolute;
3264  break;
3265 
3266  case N_SECT: {
3267  symbol_section = section_info.GetSection(nlist.n_sect,
3268  nlist.n_value);
3269 
3270  if (symbol_section == NULL) {
3271  // TODO: warn about this?
3272  add_nlist = false;
3273  break;
3274  }
3275 
3276  if (TEXT_eh_frame_sectID == nlist.n_sect) {
3277  type = eSymbolTypeException;
3278  } else {
3279  uint32_t section_type =
3280  symbol_section->Get() & SECTION_TYPE;
3281 
3282  switch (section_type) {
3283  case S_CSTRING_LITERALS:
3284  type = eSymbolTypeData;
3285  break; // section with only literal C strings
3286  case S_4BYTE_LITERALS:
3287  type = eSymbolTypeData;
3288  break; // section with only 4 byte literals
3289  case S_8BYTE_LITERALS:
3290  type = eSymbolTypeData;
3291  break; // section with only 8 byte literals
3292  case S_LITERAL_POINTERS:
3293  type = eSymbolTypeTrampoline;
3294  break; // section with only pointers to literals
3295  case S_NON_LAZY_SYMBOL_POINTERS:
3296  type = eSymbolTypeTrampoline;
3297  break; // section with only non-lazy symbol
3298  // pointers
3299  case S_LAZY_SYMBOL_POINTERS:
3300  type = eSymbolTypeTrampoline;
3301  break; // section with only lazy symbol pointers
3302  case S_SYMBOL_STUBS:
3303  type = eSymbolTypeTrampoline;
3304  break; // section with only symbol stubs, byte
3305  // size of stub in the reserved2 field
3306  case S_MOD_INIT_FUNC_POINTERS:
3307  type = eSymbolTypeCode;
3308  break; // section with only function pointers for
3309  // initialization
3310  case S_MOD_TERM_FUNC_POINTERS:
3311  type = eSymbolTypeCode;
3312  break; // section with only function pointers for
3313  // termination
3314  case S_INTERPOSING:
3315  type = eSymbolTypeTrampoline;
3316  break; // section with only pairs of function
3317  // pointers for interposing
3318  case S_16BYTE_LITERALS:
3319  type = eSymbolTypeData;
3320  break; // section with only 16 byte literals
3321  case S_DTRACE_DOF:
3323  break;
3324  case S_LAZY_DYLIB_SYMBOL_POINTERS:
3325  type = eSymbolTypeTrampoline;
3326  break;
3327  default:
3328  switch (symbol_section->GetType()) {
3330  type = eSymbolTypeCode;
3331  break;
3332  case eSectionTypeData:
3333  case eSectionTypeDataCString: // Inlined C string
3334  // data
3335  case eSectionTypeDataCStringPointers: // Pointers
3336  // to C
3337  // string
3338  // data
3339  case eSectionTypeDataSymbolAddress: // Address of
3340  // a symbol in
3341  // the symbol
3342  // table
3343  case eSectionTypeData4:
3344  case eSectionTypeData8:
3345  case eSectionTypeData16:
3346  type = eSymbolTypeData;
3347  break;
3348  default:
3349  break;
3350  }
3351  break;
3352  }
3353 
3354  if (type == eSymbolTypeInvalid) {
3355  const char *symbol_sect_name =
3356  symbol_section->GetName().AsCString();
3357  if (symbol_section->IsDescendant(
3358  text_section_sp.get())) {
3359  if (symbol_section->IsClear(
3360  S_ATTR_PURE_INSTRUCTIONS |
3361  S_ATTR_SELF_MODIFYING_CODE |
3362  S_ATTR_SOME_INSTRUCTIONS))
3363  type = eSymbolTypeData;
3364  else
3365  type = eSymbolTypeCode;
3366  } else if (symbol_section->IsDescendant(
3367  data_section_sp.get()) ||
3368  symbol_section->IsDescendant(
3369  data_dirty_section_sp.get()) ||
3370  symbol_section->IsDescendant(
3371  data_const_section_sp.get())) {
3372  if (symbol_sect_name &&
3373  ::strstr(symbol_sect_name, "__objc") ==
3374  symbol_sect_name) {
3375  type = eSymbolTypeRuntime;
3376 
3377  if (symbol_name) {
3378  llvm::StringRef symbol_name_ref(symbol_name);
3379  if (symbol_name_ref.startswith("_OBJC_")) {
3380  llvm::StringRef
3381  g_objc_v2_prefix_class(
3382  "_OBJC_CLASS_$_");
3383  llvm::StringRef
3384  g_objc_v2_prefix_metaclass(
3385  "_OBJC_METACLASS_$_");
3386  llvm::StringRef
3387  g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
3388  if (symbol_name_ref.startswith(
3389  g_objc_v2_prefix_class)) {
3390  symbol_name_non_abi_mangled =
3391  symbol_name + 1;
3392  symbol_name =
3393  symbol_name +
3394  g_objc_v2_prefix_class.size();
3395  type = eSymbolTypeObjCClass;
3396  demangled_is_synthesized = true;
3397  } else if (
3398  symbol_name_ref.startswith(
3399  g_objc_v2_prefix_metaclass)) {
3400  symbol_name_non_abi_mangled =
3401  symbol_name + 1;
3402  symbol_name =
3403  symbol_name +
3404  g_objc_v2_prefix_metaclass.size();
3405  type = eSymbolTypeObjCMetaClass;
3406  demangled_is_synthesized = true;
3407  } else if (symbol_name_ref.startswith(
3408  g_objc_v2_prefix_ivar)) {
3409  symbol_name_non_abi_mangled =
3410  symbol_name + 1;
3411  symbol_name =
3412  symbol_name +
3413  g_objc_v2_prefix_ivar.size();
3414  type = eSymbolTypeObjCIVar;
3415  demangled_is_synthesized = true;
3416  }
3417  }
3418  }
3419  } else if (symbol_sect_name &&
3420  ::strstr(symbol_sect_name,
3421  "__gcc_except_tab") ==
3422  symbol_sect_name) {
3423  type = eSymbolTypeException;
3424  } else {
3425  type = eSymbolTypeData;
3426  }
3427  } else if (symbol_sect_name &&
3428  ::strstr(symbol_sect_name, "__IMPORT") ==
3429  symbol_sect_name) {
3430  type = eSymbolTypeTrampoline;
3431  } else if (symbol_section->IsDescendant(
3432  objc_section_sp.get())) {
3433  type = eSymbolTypeRuntime;
3434  if (symbol_name && symbol_name[0] == '.') {
3435  llvm::StringRef symbol_name_ref(symbol_name);
3436  llvm::StringRef
3437  g_objc_v1_prefix_class(".objc_class_name_");
3438  if (symbol_name_ref.startswith(
3439  g_objc_v1_prefix_class)) {
3440  symbol_name_non_abi_mangled = symbol_name;
3441  symbol_name = symbol_name +
3442  g_objc_v1_prefix_class.size();
3443  type = eSymbolTypeObjCClass;
3444  demangled_is_synthesized = true;
3445  }
3446  }
3447  }
3448  }
3449  }
3450  } break;
3451  }
3452  }
3453 
3454  if (add_nlist) {
3455  uint64_t symbol_value = nlist.n_value;
3456  if (symbol_name_non_abi_mangled) {
3457  sym[sym_idx].GetMangled().SetMangledName(
3458  ConstString(symbol_name_non_abi_mangled));
3459  sym[sym_idx].GetMangled().SetDemangledName(
3460  ConstString(symbol_name));
3461  } else {
3462  bool symbol_name_is_mangled = false;
3463 
3464  if (symbol_name && symbol_name[0] == '_') {
3465  symbol_name_is_mangled = symbol_name[1] == '_';
3466  symbol_name++; // Skip the leading underscore
3467  }
3468 
3469  if (symbol_name) {
3470  ConstString const_symbol_name(symbol_name);
3471  sym[sym_idx].GetMangled().SetValue(
3472  const_symbol_name, symbol_name_is_mangled);
3473  if (is_gsym && is_debug) {
3474  const char *gsym_name =
3475  sym[sym_idx]
3476  .GetMangled()
3477  .GetName(Mangled::ePreferMangled)
3478  .GetCString();
3479  if (gsym_name)
3480  N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
3481  }
3482  }
3483  }
3484  if (symbol_section) {
3485  const addr_t section_file_addr =
3486  symbol_section->GetFileAddress();
3487  if (symbol_byte_size == 0 &&
3488  function_starts_count > 0) {
3489  addr_t symbol_lookup_file_addr = nlist.n_value;
3490  // Do an exact address match for non-ARM addresses,
3491  // else get the closest since the symbol might be a
3492  // thumb symbol which has an address with bit zero
3493  // set
3494  FunctionStarts::Entry *func_start_entry =
3495  function_starts.FindEntry(symbol_lookup_file_addr,
3496  !is_arm);
3497  if (is_arm && func_start_entry) {
3498  // Verify that the function start address is the
3499  // symbol address (ARM) or the symbol address + 1
3500  // (thumb)
3501  if (func_start_entry->addr !=
3502  symbol_lookup_file_addr &&
3503  func_start_entry->addr !=
3504  (symbol_lookup_file_addr + 1)) {
3505  // Not the right entry, NULL it out...
3506  func_start_entry = NULL;
3507  }
3508  }
3509  if (func_start_entry) {
3510  func_start_entry->data = true;
3511 
3512  addr_t symbol_file_addr = func_start_entry->addr;
3513  uint32_t symbol_flags = 0;
3514  if (is_arm) {
3515  if (symbol_file_addr & 1)
3516  symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
3517  symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3518  }
3519 
3520  const FunctionStarts::Entry *next_func_start_entry =
3521  function_starts.FindNextEntry(func_start_entry);
3522  const addr_t section_end_file_addr =
3523  section_file_addr +
3524  symbol_section->GetByteSize();
3525  if (next_func_start_entry) {
3526  addr_t next_symbol_file_addr =
3527  next_func_start_entry->addr;
3528  // Be sure the clear the Thumb address bit when
3529  // we calculate the size from the current and
3530  // next address
3531  if (is_arm)
3532  next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3533  symbol_byte_size = std::min<lldb::addr_t>(
3534  next_symbol_file_addr - symbol_file_addr,
3535  section_end_file_addr - symbol_file_addr);
3536  } else {
3537  symbol_byte_size =
3538  section_end_file_addr - symbol_file_addr;
3539  }
3540  }
3541  }
3542  symbol_value -= section_file_addr;
3543  }
3544 
3545  if (is_debug == false) {
3546  if (type == eSymbolTypeCode) {
3547  // See if we can find a N_FUN entry for any code
3548  // symbols. If we do find a match, and the name
3549  // matches, then we can merge the two into just the
3550  // function symbol to avoid duplicate entries in
3551  // the symbol table
3552  auto range =
3553  N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
3554  if (range.first != range.second) {
3555  bool found_it = false;
3556  for (auto pos = range.first; pos != range.second;
3557  ++pos) {
3558  if (sym[sym_idx].GetMangled().GetName(
3559  Mangled::ePreferMangled) ==
3560  sym[pos->second].GetMangled().GetName(
3561  Mangled::ePreferMangled)) {
3562  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3563  // We just need the flags from the linker
3564  // symbol, so put these flags
3565  // into the N_FUN flags to avoid duplicate
3566  // symbols in the symbol table
3567  sym[pos->second].SetExternal(
3568  sym[sym_idx].IsExternal());
3569  sym[pos->second].SetFlags(nlist.n_type << 16 |
3570  nlist.n_desc);
3571  if (resolver_addresses.find(nlist.n_value) !=
3572  resolver_addresses.end())
3573  sym[pos->second].SetType(eSymbolTypeResolver);
3574  sym[sym_idx].Clear();
3575  found_it = true;
3576  break;
3577  }
3578  }
3579  if (found_it)
3580  continue;
3581  } else {
3582  if (resolver_addresses.find(nlist.n_value) !=
3583  resolver_addresses.end())
3584  type = eSymbolTypeResolver;
3585  }
3586  } else if (type == eSymbolTypeData ||
3587  type == eSymbolTypeObjCClass ||
3588  type == eSymbolTypeObjCMetaClass ||
3589  type == eSymbolTypeObjCIVar) {
3590  // See if we can find a N_STSYM entry for any data
3591  // symbols. If we do find a match, and the name
3592  // matches, then we can merge the two into just the
3593  // Static symbol to avoid duplicate entries in the
3594  // symbol table
3595  auto range = N_STSYM_addr_to_sym_idx.equal_range(
3596  nlist.n_value);
3597  if (range.first != range.second) {
3598  bool found_it = false;
3599  for (auto pos = range.first; pos != range.second;
3600  ++pos) {
3601  if (sym[sym_idx].GetMangled().GetName(
3602  Mangled::ePreferMangled) ==
3603  sym[pos->second].GetMangled().GetName(
3604  Mangled::ePreferMangled)) {
3605  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3606  // We just need the flags from the linker
3607  // symbol, so put these flags
3608  // into the N_STSYM flags to avoid duplicate
3609  // symbols in the symbol table
3610  sym[pos->second].SetExternal(
3611  sym[sym_idx].IsExternal());
3612  sym[pos->second].SetFlags(nlist.n_type << 16 |
3613  nlist.n_desc);
3614  sym[sym_idx].Clear();
3615  found_it = true;
3616  break;
3617  }
3618  }
3619  if (found_it)
3620  continue;
3621  } else {
3622  const char *gsym_name =
3623  sym[sym_idx]
3624  .GetMangled()
3625  .GetName(Mangled::ePreferMangled)
3626  .GetCString();
3627  if (gsym_name) {
3628  // Combine N_GSYM stab entries with the non
3629  // stab symbol
3630  ConstNameToSymbolIndexMap::const_iterator pos =
3631  N_GSYM_name_to_sym_idx.find(gsym_name);
3632  if (pos != N_GSYM_name_to_sym_idx.end()) {
3633  const uint32_t GSYM_sym_idx = pos->second;
3634  m_nlist_idx_to_sym_idx[nlist_idx] =
3635  GSYM_sym_idx;
3636  // Copy the address, because often the N_GSYM
3637  // address has an invalid address of zero
3638  // when the global is a common symbol
3639  sym[GSYM_sym_idx].GetAddressRef().SetSection(
3640  symbol_section);
3641  sym[GSYM_sym_idx].GetAddressRef().SetOffset(
3642  symbol_value);
3643  symbols_added.insert(sym[GSYM_sym_idx]
3644  .GetAddress()
3645  .GetFileAddress());
3646  // We just need the flags from the linker
3647  // symbol, so put these flags
3648  // into the N_GSYM flags to avoid duplicate
3649  // symbols in the symbol table
3650  sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 |
3651  nlist.n_desc);
3652  sym[sym_idx].Clear();
3653  continue;
3654  }
3655  }
3656  }
3657  }
3658  }
3659 
3660  sym[sym_idx].SetID(nlist_idx);
3661  sym[sym_idx].SetType(type);
3662  if (set_value) {
3663  sym[sym_idx].GetAddressRef().SetSection(symbol_section);
3664  sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
3665  symbols_added.insert(
3666  sym[sym_idx].GetAddress().GetFileAddress());
3667  }
3668  sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
3669 
3670  if (symbol_byte_size > 0)
3671  sym[sym_idx].SetByteSize(symbol_byte_size);
3672 
3673  if (demangled_is_synthesized)
3674  sym[sym_idx].SetDemangledNameIsSynthesized(true);
3675  ++sym_idx;
3676  } else {
3677  sym[sym_idx].Clear();
3678  }
3679  }
3680  /////////////////////////////
3681  }
3682  break; // No more entries to consider
3683  }
3684  }
3685 
3686  for (const auto &pos : reexport_shlib_needs_fixup) {
3687  const auto undef_pos = undefined_name_to_desc.find(pos.second);
3688  if (undef_pos != undefined_name_to_desc.end()) {
3689  const uint8_t dylib_ordinal =
3690  llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
3691  if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
3692  sym[pos.first].SetReExportedSymbolSharedLibrary(
3693  dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
3694  }
3695  }
3696  }
3697  }
3698  }
3699  }
3700  }
3701 
3702  // Must reset this in case it was mutated above!
3703  nlist_data_offset = 0;
3704 #endif
3705 
3706  if (nlist_data.GetByteSize() > 0) {
3707 
3708  // If the sym array was not created while parsing the DSC unmapped
3709  // symbols, create it now.
3710  if (sym == nullptr) {
3711  sym =
3712  symtab->Resize(symtab_load_command.nsyms + m_dysymtab.nindirectsyms);
3713  num_syms = symtab->GetNumSymbols();
3714  }
3715 
3716  if (unmapped_local_symbols_found) {
3717  assert(m_dysymtab.ilocalsym == 0);
3718  nlist_data_offset += (m_dysymtab.nlocalsym * nlist_byte_size);
3719  nlist_idx = m_dysymtab.nlocalsym;
3720  } else {
3721  nlist_idx = 0;
3722  }
3723 
3724  typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
3725  typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
3726  UndefinedNameToDescMap undefined_name_to_desc;
3727  SymbolIndexToName reexport_shlib_needs_fixup;
3728 
3729  // Symtab parsing is a huge mess. Everything is entangled and the code
3730  // requires access to a ridiculous amount of variables. LLDB depends
3731  // heavily on the proper merging of symbols and to get that right we need
3732  // to make sure we have parsed all the debug symbols first. Therefore we
3733  // invoke the lambda twice, once to parse only the debug symbols and then
3734  // once more to parse the remaining symbols.
3735  auto ParseSymbolLambda = [&](struct nlist_64 &nlist, uint32_t nlist_idx,
3736  bool debug_only) {
3737  const bool is_debug = ((nlist.n_type & N_STAB) != 0);
3738  if (is_debug != debug_only)
3739  return true;
3740 
3741  const char *symbol_name_non_abi_mangled = nullptr;
3742  const char *symbol_name = nullptr;
3743 
3744  if (have_strtab_data) {
3745  symbol_name = strtab_data.PeekCStr(nlist.n_strx);
3746 
3747  if (symbol_name == nullptr) {
3748  // No symbol should be NULL, even the symbols with no string values
3749  // should have an offset zero which points to an empty C-string
3750  Host::SystemLog(Host::eSystemLogError,
3751  "error: symbol[%u] has invalid string table offset "
3752  "0x%x in %s, ignoring symbol\n",
3753  nlist_idx, nlist.n_strx,
3754  module_sp->GetFileSpec().GetPath().c_str());
3755  return true;
3756  }
3757  if (symbol_name[0] == '\0')
3758  symbol_name = nullptr;
3759  } else {
3760  const addr_t str_addr = strtab_addr + nlist.n_strx;
3761  Status str_error;
3762  if (process->ReadCStringFromMemory(str_addr, memory_symbol_name,
3763  str_error))
3764  symbol_name = memory_symbol_name.c_str();
3765  }
3766 
3768  SectionSP symbol_section;
3769  lldb::addr_t symbol_byte_size = 0;
3770  bool add_nlist = true;
3771  bool is_gsym = false;
3772  bool demangled_is_synthesized = false;
3773  bool set_value = true;
3774 
3775  assert(sym_idx < num_syms);
3776  sym[sym_idx].SetDebug(is_debug);
3777 
3778  if (is_debug) {
3779  switch (nlist.n_type) {
3780  case N_GSYM:
3781  // global symbol: name,,NO_SECT,type,0
3782  // Sometimes the N_GSYM value contains the address.
3783 
3784  // FIXME: In the .o files, we have a GSYM and a debug symbol for all
3785  // the ObjC data. They
3786  // have the same address, but we want to ensure that we always find
3787  // only the real symbol, 'cause we don't currently correctly
3788  // attribute the GSYM one to the ObjCClass/Ivar/MetaClass symbol
3789  // type. This is a temporary hack to make sure the ObjectiveC
3790  // symbols get treated correctly. To do this right, we should
3791  // coalesce all the GSYM & global symbols that have the same
3792  // address.
3793  is_gsym = true;
3794  sym[sym_idx].SetExternal(true);
3795 
3796  if (symbol_name && symbol_name[0] == '_' && symbol_name[1] == 'O') {
3797  llvm::StringRef symbol_name_ref(symbol_name);
3798  if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
3799  symbol_name_non_abi_mangled = symbol_name + 1;
3800  symbol_name = symbol_name + g_objc_v2_prefix_class.size();
3801  type = eSymbolTypeObjCClass;
3802  demangled_is_synthesized = true;
3803 
3804  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_metaclass)) {
3805  symbol_name_non_abi_mangled = symbol_name + 1;
3806  symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
3807  type = eSymbolTypeObjCMetaClass;
3808  demangled_is_synthesized = true;
3809  } else if (symbol_name_ref.startswith(g_objc_v2_prefix_ivar)) {
3810  symbol_name_non_abi_mangled = symbol_name + 1;
3811  symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
3812  type = eSymbolTypeObjCIVar;
3813  demangled_is_synthesized = true;
3814  }
3815  } else {
3816  if (nlist.n_value != 0)
3817  symbol_section =
3818  section_info.GetSection(nlist.n_sect, nlist.n_value);
3819  type = eSymbolTypeData;
3820  }
3821  break;
3822 
3823  case N_FNAME:
3824  // procedure name (f77 kludge): name,,NO_SECT,0,0
3825  type = eSymbolTypeCompiler;
3826  break;
3827 
3828  case N_FUN:
3829  // procedure: name,,n_sect,linenumber,address
3830  if (symbol_name) {
3831  type = eSymbolTypeCode;
3832  symbol_section =
3833  section_info.GetSection(nlist.n_sect, nlist.n_value);
3834 
3835  N_FUN_addr_to_sym_idx.insert(
3836  std::make_pair(nlist.n_value, sym_idx));
3837  // We use the current number of symbols in the symbol table in
3838  // lieu of using nlist_idx in case we ever start trimming entries
3839  // out
3840  N_FUN_indexes.push_back(sym_idx);
3841  } else {
3842  type = eSymbolTypeCompiler;
3843 
3844  if (!N_FUN_indexes.empty()) {
3845  // Copy the size of the function into the original STAB entry
3846  // so we don't have to hunt for it later
3847  symtab->SymbolAtIndex(N_FUN_indexes.back())
3848  ->SetByteSize(nlist.n_value);
3849  N_FUN_indexes.pop_back();
3850  // We don't really need the end function STAB as it contains
3851  // the size which we already placed with the original symbol,
3852  // so don't add it if we want a minimal symbol table
3853  add_nlist = false;
3854  }
3855  }
3856  break;
3857 
3858  case N_STSYM:
3859  // static symbol: name,,n_sect,type,address
3860  N_STSYM_addr_to_sym_idx.insert(
3861  std::make_pair(nlist.n_value, sym_idx));
3862  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3863  if (symbol_name && symbol_name[0]) {
3864  type = ObjectFile::GetSymbolTypeFromName(symbol_name + 1,
3865  eSymbolTypeData);
3866  }
3867  break;
3868 
3869  case N_LCSYM:
3870  // .lcomm symbol: name,,n_sect,type,address
3871  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3872  type = eSymbolTypeCommonBlock;
3873  break;
3874 
3875  case N_BNSYM:
3876  // We use the current number of symbols in the symbol table in lieu
3877  // of using nlist_idx in case we ever start trimming entries out
3878  // Skip these if we want minimal symbol tables
3879  add_nlist = false;
3880  break;
3881 
3882  case N_ENSYM:
3883  // Set the size of the N_BNSYM to the terminating index of this
3884  // N_ENSYM so that we can always skip the entire symbol if we need
3885  // to navigate more quickly at the source level when parsing STABS
3886  // Skip these if we want minimal symbol tables
3887  add_nlist = false;
3888  break;
3889 
3890  case N_OPT:
3891  // emitted with gcc2_compiled and in gcc source
3892  type = eSymbolTypeCompiler;
3893  break;
3894 
3895  case N_RSYM:
3896  // register sym: name,,NO_SECT,type,register
3897  type = eSymbolTypeVariable;
3898  break;
3899 
3900  case N_SLINE:
3901  // src line: 0,,n_sect,linenumber,address
3902  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3903  type = eSymbolTypeLineEntry;
3904  break;
3905 
3906  case N_SSYM:
3907  // structure elt: name,,NO_SECT,type,struct_offset
3908  type = eSymbolTypeVariableType;
3909  break;
3910 
3911  case N_SO:
3912  // source file name
3913  type = eSymbolTypeSourceFile;
3914  if (symbol_name == nullptr) {
3915  add_nlist = false;
3916  if (N_SO_index != UINT32_MAX) {
3917  // Set the size of the N_SO to the terminating index of this
3918  // N_SO so that we can always skip the entire N_SO if we need
3919  // to navigate more quickly at the source level when parsing
3920  // STABS
3921  symbol_ptr = symtab->SymbolAtIndex(N_SO_index);
3922  symbol_ptr->SetByteSize(sym_idx);
3923  symbol_ptr->SetSizeIsSibling(true);
3924  }
3925  N_NSYM_indexes.clear();
3926  N_INCL_indexes.clear();
3927  N_BRAC_indexes.clear();
3928  N_COMM_indexes.clear();
3929  N_FUN_indexes.clear();
3930  N_SO_index = UINT32_MAX;
3931  } else {
3932  // We use the current number of symbols in the symbol table in
3933  // lieu of using nlist_idx in case we ever start trimming entries
3934  // out
3935  const bool N_SO_has_full_path = symbol_name[0] == '/';
3936  if (N_SO_has_full_path) {
3937  if ((N_SO_index == sym_idx - 1) && ((sym_idx - 1) < num_syms)) {
3938  // We have two consecutive N_SO entries where the first
3939  // contains a directory and the second contains a full path.
3940  sym[sym_idx - 1].GetMangled().SetValue(ConstString(symbol_name),
3941  false);
3942  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3943  add_nlist = false;
3944  } else {
3945  // This is the first entry in a N_SO that contains a
3946  // directory or a full path to the source file
3947  N_SO_index = sym_idx;
3948  }
3949  } else if ((N_SO_index == sym_idx - 1) &&
3950  ((sym_idx - 1) < num_syms)) {
3951  // This is usually the second N_SO entry that contains just the
3952  // filename, so here we combine it with the first one if we are
3953  // minimizing the symbol table
3954  const char *so_path =
3955  sym[sym_idx - 1].GetMangled().GetDemangledName().AsCString();
3956  if (so_path && so_path[0]) {
3957  std::string full_so_path(so_path);
3958  const size_t double_slash_pos = full_so_path.find("//");
3959  if (double_slash_pos != std::string::npos) {
3960  // The linker has been generating bad N_SO entries with
3961  // doubled up paths in the format "%s%s" where the first
3962  // string in the DW_AT_comp_dir, and the second is the
3963  // directory for the source file so you end up with a path
3964  // that looks like "/tmp/src//tmp/src/"
3965  FileSpec so_dir(so_path);
3966  if (!FileSystem::Instance().Exists(so_dir)) {
3967  so_dir.SetFile(&full_so_path[double_slash_pos + 1],
3968  FileSpec::Style::native);
3969  if (FileSystem::Instance().Exists(so_dir)) {
3970  // Trim off the incorrect path
3971  full_so_path.erase(0, double_slash_pos + 1);
3972  }
3973  }
3974  }
3975  if (*full_so_path.rbegin() != '/')
3976  full_so_path += '/';
3977  full_so_path += symbol_name;
3978  sym[sym_idx - 1].GetMangled().SetValue(
3979  ConstString(full_so_path.c_str()), false);
3980  add_nlist = false;
3981  m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3982  }
3983  } else {
3984  // This could be a relative path to a N_SO
3985  N_SO_index = sym_idx;
3986  }
3987  }
3988  break;
3989 
3990  case N_OSO:
3991  // object file name: name,,0,0,st_mtime
3992  type = eSymbolTypeObjectFile;
3993  break;
3994 
3995  case N_LSYM:
3996  // local sym: name,,NO_SECT,type,offset
3997  type = eSymbolTypeLocal;
3998  break;
3999 
4000  // INCL scopes
4001  case N_BINCL:
4002  // include file beginning: name,,NO_SECT,0,sum We use the current
4003  // number of symbols in the symbol table in lieu of using nlist_idx
4004  // in case we ever start trimming entries out
4005  N_INCL_indexes.push_back(sym_idx);
4006  type = eSymbolTypeScopeBegin;
4007  break;
4008 
4009  case N_EINCL:
4010  // include file end: name,,NO_SECT,0,0
4011  // Set the size of the N_BINCL to the terminating index of this
4012  // N_EINCL so that we can always skip the entire symbol if we need
4013  // to navigate more quickly at the source level when parsing STABS
4014  if (!N_INCL_indexes.empty()) {
4015  symbol_ptr = symtab->SymbolAtIndex(N_INCL_indexes.back());
4016  symbol_ptr->SetByteSize(sym_idx + 1);
4017  symbol_ptr->SetSizeIsSibling(true);
4018  N_INCL_indexes.pop_back();
4019  }
4020  type = eSymbolTypeScopeEnd;
4021  break;
4022 
4023  case N_SOL:
4024  // #included file name: name,,n_sect,0,address
4025  type = eSymbolTypeHeaderFile;
4026 
4027  // We currently don't use the header files on darwin
4028  add_nlist = false;
4029  break;
4030 
4031  case N_PARAMS:
4032  // compiler parameters: name,,NO_SECT,0,0
4033  type = eSymbolTypeCompiler;
4034  break;
4035 
4036  case N_VERSION:
4037  // compiler version: name,,NO_SECT,0,0
4038  type = eSymbolTypeCompiler;
4039  break;
4040 
4041  case N_OLEVEL:
4042  // compiler -O level: name,,NO_SECT,0,0
4043  type = eSymbolTypeCompiler;
4044  break;
4045 
4046  case N_PSYM:
4047  // parameter: name,,NO_SECT,type,offset
4048  type = eSymbolTypeVariable;
4049  break;
4050 
4051  case N_ENTRY:
4052  // alternate entry: name,,n_sect,linenumber,address
4053  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4054  type = eSymbolTypeLineEntry;
4055  break;
4056 
4057  // Left and Right Braces
4058  case N_LBRAC:
4059  // left bracket: 0,,NO_SECT,nesting level,address We use the
4060  // current number of symbols in the symbol table in lieu of using
4061  // nlist_idx in case we ever start trimming entries out
4062  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4063  N_BRAC_indexes.push_back(sym_idx);
4064  type = eSymbolTypeScopeBegin;
4065  break;
4066 
4067  case N_RBRAC:
4068  // right bracket: 0,,NO_SECT,nesting level,address Set the size of
4069  // the N_LBRAC to the terminating index of this N_RBRAC so that we
4070  // can always skip the entire symbol if we need to navigate more
4071  // quickly at the source level when parsing STABS
4072  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4073  if (!N_BRAC_indexes.empty()) {
4074  symbol_ptr = symtab->SymbolAtIndex(N_BRAC_indexes.back());
4075  symbol_ptr->SetByteSize(sym_idx + 1);
4076  symbol_ptr->SetSizeIsSibling(true);
4077  N_BRAC_indexes.pop_back();
4078  }
4079  type = eSymbolTypeScopeEnd;
4080  break;
4081 
4082  case N_EXCL:
4083  // deleted include file: name,,NO_SECT,0,sum
4084  type = eSymbolTypeHeaderFile;
4085  break;
4086 
4087  // COMM scopes
4088  case N_BCOMM:
4089  // begin common: name,,NO_SECT,0,0
4090  // We use the current number of symbols in the symbol table in lieu
4091  // of using nlist_idx in case we ever start trimming entries out
4092  type = eSymbolTypeScopeBegin;
4093  N_COMM_indexes.push_back(sym_idx);
4094  break;
4095 
4096  case N_ECOML:
4097  // end common (local name): 0,,n_sect,0,address
4098  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4099  LLVM_FALLTHROUGH;
4100 
4101  case N_ECOMM:
4102  // end common: name,,n_sect,0,0
4103  // Set the size of the N_BCOMM to the terminating index of this
4104  // N_ECOMM/N_ECOML so that we can always skip the entire symbol if
4105  // we need to navigate more quickly at the source level when
4106  // parsing STABS
4107  if (!N_COMM_indexes.empty()) {
4108  symbol_ptr = symtab->SymbolAtIndex(N_COMM_indexes.back());
4109  symbol_ptr->SetByteSize(sym_idx + 1);
4110  symbol_ptr->SetSizeIsSibling(true);
4111  N_COMM_indexes.pop_back();
4112  }
4113  type = eSymbolTypeScopeEnd;
4114  break;
4115 
4116  case N_LENG:
4117  // second stab entry with length information
4118  type = eSymbolTypeAdditional;
4119  break;
4120 
4121  default:
4122  break;
4123  }
4124  } else {
4125  uint8_t n_type = N_TYPE & nlist.n_type;
4126  sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
4127 
4128  switch (n_type) {
4129  case N_INDR: {
4130  const char *reexport_name_cstr = strtab_data.PeekCStr(nlist.n_value);
4131  if (reexport_name_cstr && reexport_name_cstr[0]) {
4132  type = eSymbolTypeReExported;
4133  ConstString reexport_name(reexport_name_cstr +
4134  ((reexport_name_cstr[0] == '_') ? 1 : 0));
4135  sym[sym_idx].SetReExportedSymbolName(reexport_name);
4136  set_value = false;
4137  reexport_shlib_needs_fixup[sym_idx] = reexport_name;
4138  indirect_symbol_names.insert(
4139  ConstString(symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
4140  } else
4141  type = eSymbolTypeUndefined;
4142  } break;
4143 
4144  case N_UNDF:
4145  if (symbol_name && symbol_name[0]) {
4146  ConstString undefined_name(symbol_name +
4147  ((symbol_name[0] == '_') ? 1 : 0));
4148  undefined_name_to_desc[undefined_name] = nlist.n_desc;
4149  }
4150  LLVM_FALLTHROUGH;
4151 
4152  case N_PBUD:
4153  type = eSymbolTypeUndefined;
4154  break;
4155 
4156  case N_ABS:
4157  type = eSymbolTypeAbsolute;
4158  break;
4159 
4160  case N_SECT: {
4161  symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4162 
4163  if (!symbol_section) {
4164  // TODO: warn about this?
4165  add_nlist = false;
4166  break;
4167  }
4168 
4169  if (TEXT_eh_frame_sectID == nlist.n_sect) {
4170  type = eSymbolTypeException;
4171  } else {
4172  uint32_t section_type = symbol_section->Get() & SECTION_TYPE;
4173 
4174  switch (section_type) {
4175  case S_CSTRING_LITERALS:
4176  type = eSymbolTypeData;
4177  break; // section with only literal C strings
4178  case S_4BYTE_LITERALS:
4179  type = eSymbolTypeData;
4180  break; // section with only 4 byte literals
4181  case S_8BYTE_LITERALS:
4182  type = eSymbolTypeData;
4183  break; // section with only 8 byte literals
4184  case S_LITERAL_POINTERS:
4185  type = eSymbolTypeTrampoline;
4186  break; // section with only pointers to literals
4187  case S_NON_LAZY_SYMBOL_POINTERS:
4188  type = eSymbolTypeTrampoline;
4189  break; // section with only non-lazy symbol pointers
4190  case S_LAZY_SYMBOL_POINTERS:
4191  type = eSymbolTypeTrampoline;
4192  break; // section with only lazy symbol pointers
4193  case S_SYMBOL_STUBS:
4194  type = eSymbolTypeTrampoline;
4195  break; // section with only symbol stubs, byte size of stub in
4196  // the reserved2 field
4197  case S_MOD_INIT_FUNC_POINTERS:
4198  type = eSymbolTypeCode;
4199  break; // section with only function pointers for initialization
4200  case S_MOD_TERM_FUNC_POINTERS:
4201  type = eSymbolTypeCode;
4202  break; // section with only function pointers for termination
4203  case S_INTERPOSING:
4204  type = eSymbolTypeTrampoline;
4205  break; // section with only pairs of function pointers for
4206  // interposing
4207  case S_16BYTE_LITERALS:
4208  type = eSymbolTypeData;
4209  break; // section with only 16 byte literals
4210  case S_DTRACE_DOF:
4212  break;
4213  case S_LAZY_DYLIB_SYMBOL_POINTERS:
4214  type = eSymbolTypeTrampoline;
4215  break;
4216  default:
4217  switch (symbol_section->GetType()) {
4219  type = eSymbolTypeCode;
4220  break;
4221  case eSectionTypeData:
4222  case eSectionTypeDataCString: // Inlined C string data
4223  case eSectionTypeDataCStringPointers: // Pointers to C string
4224  // data
4225  case eSectionTypeDataSymbolAddress: // Address of a symbol in
4226  // the symbol table
4227  case eSectionTypeData4:
4228  case eSectionTypeData8:
4229  case eSectionTypeData16:
4230  type = eSymbolTypeData;
4231  break;
4232  default:
4233  break;
4234  }
4235  break;
4236  }
4237 
4238  if (type == eSymbolTypeInvalid) {
4239  const char *symbol_sect_name =
4240  symbol_section->GetName().AsCString();
4241  if (symbol_section->IsDescendant(text_section_sp.get())) {
4242  if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS |
4243  S_ATTR_SELF_MODIFYING_CODE |
4244  S_ATTR_SOME_INSTRUCTIONS))
4245  type = eSymbolTypeData;
4246  else
4247  type = eSymbolTypeCode;
4248  } else if (symbol_section->IsDescendant(data_section_sp.get()) ||
4249  symbol_section->IsDescendant(
4250  data_dirty_section_sp.get()) ||
4251  symbol_section->IsDescendant(
4252  data_const_section_sp.get())) {
4253  if (symbol_sect_name &&
4254  ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) {
4255  type = eSymbolTypeRuntime;
4256 
4257  if (symbol_name) {
4258  llvm::StringRef symbol_name_ref(symbol_name);
4259  if (symbol_name_ref.startswith("_OBJC_")) {
4260  llvm::StringRef g_objc_v2_prefix_class(
4261  "_OBJC_CLASS_$_");
4262  llvm::StringRef g_objc_v2_prefix_metaclass(
4263  "_OBJC_METACLASS_$_");
4264  llvm::StringRef g_objc_v2_prefix_ivar(
4265  "_OBJC_IVAR_$_");
4266  if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
4267  symbol_name_non_abi_mangled = symbol_name + 1;
4268  symbol_name =
4269  symbol_name + g_objc_v2_prefix_class.size();
4270  type = eSymbolTypeObjCClass;
4271  demangled_is_synthesized = true;
4272  } else if (symbol_name_ref.startswith(
4273  g_objc_v2_prefix_metaclass)) {
4274  symbol_name_non_abi_mangled = symbol_name + 1;
4275  symbol_name =
4276  symbol_name + g_objc_v2_prefix_metaclass.size();
4277  type = eSymbolTypeObjCMetaClass;
4278  demangled_is_synthesized = true;
4279  } else if (symbol_name_ref.startswith(
4280  g_objc_v2_prefix_ivar)) {
4281  symbol_name_non_abi_mangled = symbol_name + 1;
4282  symbol_name =
4283  symbol_name + g_objc_v2_prefix_ivar.size();
4284  type = eSymbolTypeObjCIVar;
4285  demangled_is_synthesized = true;
4286  }
4287  }
4288  }
4289  } else if (symbol_sect_name &&
4290  ::strstr(symbol_sect_name, "__gcc_except_tab") ==
4291  symbol_sect_name) {
4292  type = eSymbolTypeException;
4293  } else {
4294  type = eSymbolTypeData;
4295  }
4296  } else if (symbol_sect_name &&
4297  ::strstr(symbol_sect_name, "__IMPORT") ==
4298  symbol_sect_name) {
4299  type = eSymbolTypeTrampoline;
4300  } else if (symbol_section->IsDescendant(objc_section_sp.get())) {
4301  type = eSymbolTypeRuntime;
4302  if (symbol_name && symbol_name[0] == '.') {
4303  llvm::StringRef symbol_name_ref(symbol_name);
4304  llvm::StringRef g_objc_v1_prefix_class(
4305  ".objc_class_name_");
4306  if (symbol_name_ref.startswith(g_objc_v1_prefix_class)) {
4307  symbol_name_non_abi_mangled = symbol_name;
4308  symbol_name = symbol_name + g_objc_v1_prefix_class.size();
4309  type = eSymbolTypeObjCClass;
4310  demangled_is_synthesized = true;
4311  }
4312  }
4313  }
4314  }
4315  }
4316  } break;
4317  }
4318  }
4319 
4320  if (!add_nlist) {
4321  sym[sym_idx].Clear();
4322  return true;
4323  }
4324 
4325  uint64_t symbol_value = nlist.n_value;
4326 
4327  if (symbol_name_non_abi_mangled) {
4328  sym[sym_idx].GetMangled().SetMangledName(
4329  ConstString(symbol_name_non_abi_mangled));
4330  sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name));
4331  } else {
4332  bool symbol_name_is_mangled = false;
4333 
4334  if (symbol_name && symbol_name[0] == '_') {
4335  symbol_name_is_mangled = symbol_name[1] == '_';
4336  symbol_name++; // Skip the leading underscore
4337  }
4338 
4339  if (symbol_name) {
4340  ConstString const_symbol_name(symbol_name);
4341  sym[sym_idx].GetMangled().SetValue(const_symbol_name,
4342  symbol_name_is_mangled);
4343  }
4344  }
4345 
4346  if (is_gsym) {
4347  const char *gsym_name = sym[sym_idx]
4348  .GetMangled()
4349  .GetName(Mangled::ePreferMangled)
4350  .GetCString();
4351  if (gsym_name)
4352  N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
4353  }
4354 
4355  if (symbol_section) {
4356  const addr_t section_file_addr = symbol_section->GetFileAddress();
4357  if (symbol_byte_size == 0 && function_starts_count > 0) {
4358  addr_t symbol_lookup_file_addr = nlist.n_value;
4359  // Do an exact address match for non-ARM addresses, else get the
4360  // closest since the symbol might be a thumb symbol which has an
4361  // address with bit zero set.
4362  FunctionStarts::Entry *func_start_entry =
4363  function_starts.FindEntry(symbol_lookup_file_addr, !is_arm);
4364  if (is_arm && func_start_entry) {
4365  // Verify that the function start address is the symbol address
4366  // (ARM) or the symbol address + 1 (thumb).
4367  if (func_start_entry->addr != symbol_lookup_file_addr &&
4368  func_start_entry->addr != (symbol_lookup_file_addr + 1)) {
4369  // Not the right entry, NULL it out...
4370  func_start_entry = nullptr;
4371  }
4372  }
4373  if (func_start_entry) {
4374  func_start_entry->data = true;
4375 
4376  addr_t symbol_file_addr = func_start_entry->addr;
4377  if (is_arm)
4378  symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4379 
4380  const FunctionStarts::Entry *next_func_start_entry =
4381  function_starts.FindNextEntry(func_start_entry);
4382  const addr_t section_end_file_addr =
4383  section_file_addr + symbol_section->GetByteSize();
4384  if (next_func_start_entry) {
4385  addr_t next_symbol_file_addr = next_func_start_entry->addr;
4386  // Be sure the clear the Thumb address bit when we calculate the
4387  // size from the current and next address
4388  if (is_arm)
4389  next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4390  symbol_byte_size = std::min<lldb::addr_t>(
4391  next_symbol_file_addr - symbol_file_addr,
4392  section_end_file_addr - symbol_file_addr);
4393  } else {
4394  symbol_byte_size = section_end_file_addr - symbol_file_addr;
4395  }
4396  }
4397  }
4398  symbol_value -= section_file_addr;
4399  }
4400 
4401  if (!is_debug) {
4402  if (type == eSymbolTypeCode) {
4403  // See if we can find a N_FUN entry for any code symbols. If we do
4404  // find a match, and the name matches, then we can merge the two into
4405  // just the function symbol to avoid duplicate entries in the symbol
4406  // table.
4407  std::pair<ValueToSymbolIndexMap::const_iterator,
4408  ValueToSymbolIndexMap::const_iterator>
4409  range;
4410  range = N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
4411  if (range.first != range.second) {
4412  for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4413  pos != range.second; ++pos) {
4414  if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) ==
4415  sym[pos->second].GetMangled().GetName(
4416  Mangled::ePreferMangled)) {
4417  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4418  // We just need the flags from the linker symbol, so put these
4419  // flags into the N_FUN flags to avoid duplicate symbols in the
4420  // symbol table.
4421  sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4422  sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4423  if (resolver_addresses.find(nlist.n_value) !=
4424  resolver_addresses.end())
4425  sym[pos->second].SetType(eSymbolTypeResolver);
4426  sym[sym_idx].Clear();
4427  return true;
4428  }
4429  }
4430  } else {
4431  if (resolver_addresses.find(nlist.n_value) !=
4432  resolver_addresses.end())
4433  type = eSymbolTypeResolver;
4434  }
4435  } else if (type == eSymbolTypeData || type == eSymbolTypeObjCClass ||
4436  type == eSymbolTypeObjCMetaClass ||
4437  type == eSymbolTypeObjCIVar) {
4438  // See if we can find a N_STSYM entry for any data symbols. If we do
4439  // find a match, and the name matches, then we can merge the two into
4440  // just the Static symbol to avoid duplicate entries in the symbol
4441  // table.
4442  std::pair<ValueToSymbolIndexMap::const_iterator,
4443  ValueToSymbolIndexMap::const_iterator>
4444  range;
4445  range = N_STSYM_addr_to_sym_idx.equal_range(nlist.n_value);
4446  if (range.first != range.second) {
4447  for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4448  pos != range.second; ++pos) {
4449  if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) ==
4450  sym[pos->second].GetMangled().GetName(
4451  Mangled::ePreferMangled)) {
4452  m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4453  // We just need the flags from the linker symbol, so put these
4454  // flags into the N_STSYM flags to avoid duplicate symbols in
4455  // the symbol table.
4456  sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4457  sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4458  sym[sym_idx].Clear();
4459  return true;
4460  }
4461  }
4462  } else {
4463  // Combine N_GSYM stab entries with the non stab symbol.
4464  const char *gsym_name = sym[sym_idx]
4465  .GetMangled()
4466  .GetName(Mangled::ePreferMangled)
4467  .GetCString();
4468  if (gsym_name) {
4469  ConstNameToSymbolIndexMap::const_iterator pos =
4470  N_GSYM_name_to_sym_idx.find(gsym_name);
4471  if (pos != N_GSYM_name_to_sym_idx.end()) {
4472  const uint32_t GSYM_sym_idx = pos->second;
4473  m_nlist_idx_to_sym_idx[nlist_idx] = GSYM_sym_idx;
4474  // Copy the address, because often the N_GSYM address has an
4475  // invalid address of zero when the global is a common symbol.
4476  sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section);
4477  sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value);
4478  symbols_added.insert(
4479  sym[GSYM_sym_idx].GetAddress().GetFileAddress());
4480  // We just need the flags from the linker symbol, so put these
4481  // flags into the N_GSYM flags to avoid duplicate symbols in
4482  // the symbol table.
4483  sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4484  sym[sym_idx].Clear();
4485  return true;
4486  }
4487  }
4488  }
4489  }
4490  }
4491 
4492  sym[sym_idx].SetID(nlist_idx);
4493  sym[sym_idx].SetType(type);
4494  if (set_value) {
4495  sym[sym_idx].GetAddressRef().SetSection(symbol_section);
4496  sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
4497  symbols_added.insert(sym[sym_idx].GetAddress().GetFileAddress());
4498  }
4499  sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4500  if (nlist.n_desc & N_WEAK_REF)
4501  sym[sym_idx].SetIsWeak(true);
4502 
4503  if (symbol_byte_size > 0)
4504  sym[sym_idx].SetByteSize(symbol_byte_size);
4505 
4506  if (demangled_is_synthesized)
4507  sym[sym_idx].SetDemangledNameIsSynthesized(true);
4508 
4509  ++sym_idx;
4510  return true;
4511  };
4512 
4513  // First parse all the nlists but don't process them yet. See the next
4514  // comment for an explanation why.
4515  std::vector<struct nlist_64> nlists;
4516  nlists.reserve(symtab_load_command.nsyms);
4517  for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) {
4518  if (auto nlist =
4519  ParseNList(nlist_data, nlist_data_offset, nlist_byte_size))
4520  nlists.push_back(*nlist);
4521  else
4522  break;
4523  }
4524 
4525  // Now parse all the debug symbols. This is needed to merge non-debug
4526  // symbols in the next step. Non-debug symbols are always coalesced into
4527  // the debug symbol. Doing this in one step would mean that some symbols
4528  // won't be merged.
4529  nlist_idx = 0;
4530  for (auto &nlist : nlists) {
4531  if (!ParseSymbolLambda(nlist, nlist_idx++, DebugSymbols))
4532  break;
4533  }
4534 
4535  // Finally parse all the non debug symbols.
4536  nlist_idx = 0;
4537  for (auto &nlist : nlists) {
4538  if (!ParseSymbolLambda(nlist, nlist_idx++, NonDebugSymbols))
4539  break;
4540  }
4541 
4542  for (const auto &pos : reexport_shlib_needs_fixup) {
4543  const auto undef_pos = undefined_name_to_desc.find(pos.second);
4544  if (undef_pos != undefined_name_to_desc.end()) {
4545  const uint8_t dylib_ordinal =
4546  llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
4547  if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
4548  sym[pos.first].SetReExportedSymbolSharedLibrary(
4549  dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
4550  }
4551  }
4552  }
4553 
4554  // Count how many trie symbols we'll add to the symbol table
4555  int trie_symbol_table_augment_count = 0;
4556  for (auto &e : external_sym_trie_entries) {
4557  if (symbols_added.find(e.entry.address) == symbols_added.end())
4558  trie_symbol_table_augment_count++;
4559  }
4560 
4561  if (num_syms < sym_idx + trie_symbol_table_augment_count) {
4562  num_syms = sym_idx + trie_symbol_table_augment_count;
4563  sym = symtab->Resize(num_syms);
4564  }
4565  uint32_t synthetic_sym_id = symtab_load_command.nsyms;
4566 
4567  // Add symbols from the trie to the symbol table.
4568  for (auto &e : external_sym_trie_entries) {
4569  if (symbols_added.find(e.entry.address) != symbols_added.end())
4570  continue;
4571 
4572  // Find the section that this trie address is in, use that to annotate
4573  // symbol type as we add the trie address and name to the symbol table.
4574  Address symbol_addr;
4575  if (module_sp->ResolveFileAddress(e.entry.address, symbol_addr)) {
4576  SectionSP symbol_section(symbol_addr.GetSection());
4577  const char *symbol_name = e.entry.name.GetCString();
4578  bool demangled_is_synthesized = false;
4579  SymbolType type =
4580  GetSymbolType(symbol_name, demangled_is_synthesized, text_section_sp,
4581  data_section_sp, data_dirty_section_sp,
4582  data_const_section_sp, symbol_section);
4583 
4584  sym[sym_idx].SetType(type);
4585  if (symbol_section) {
4586  sym[sym_idx].SetID(synthetic_sym_id++);
4587  sym[sym_idx].GetMangled().SetMangledName(ConstString(symbol_name));
4588  if (demangled_is_synthesized)
4589  sym[sym_idx].SetDemangledNameIsSynthesized(true);
4590  sym[sym_idx].SetIsSynthetic(true);
4591  sym[sym_idx].SetExternal(true);
4592  sym[sym_idx].GetAddressRef() = symbol_addr;
4593  symbols_added.insert(symbol_addr.GetFileAddress());
4594  if (e.entry.flags & TRIE_SYMBOL_IS_THUMB)
4596  ++sym_idx;
4597  }
4598  }
4599  }
4600 
4601  if (function_starts_count > 0) {
4602  uint32_t num_synthetic_function_symbols = 0;
4603  for (i = 0; i < function_starts_count; ++i) {
4604  if (symbols_added.find(function_starts.GetEntryRef(i).addr) ==
4605  symbols_added.end())
4606  ++num_synthetic_function_symbols;
4607  }
4608 
4609  if (num_synthetic_function_symbols > 0) {
4610  if (num_syms < sym_idx + num_synthetic_function_symbols) {
4611  num_syms = sym_idx + num_synthetic_function_symbols;
4612  sym = symtab->Resize(num_syms);
4613  }
4614  for (i = 0; i < function_starts_count; ++i) {
4615  const FunctionStarts::Entry *func_start_entry =
4616  function_starts.GetEntryAtIndex(i);
4617  if (symbols_added.find(func_start_entry->addr) == symbols_added.end()) {
4618  addr_t symbol_file_addr = func_start_entry->addr;
4619  uint32_t symbol_flags = 0;
4620  if (func_start_entry->data)
4621  symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
4622  Address symbol_addr;
4623  if (module_sp->ResolveFileAddress(symbol_file_addr, symbol_addr)) {
4624  SectionSP symbol_section(symbol_addr.GetSection());
4625  uint32_t symbol_byte_size = 0;
4626  if (symbol_section) {
4627  const addr_t section_file_addr = symbol_section->GetFileAddress();
4628  const FunctionStarts::Entry *next_func_start_entry =
4629  function_starts.FindNextEntry(func_start_entry);
4630  const addr_t section_end_file_addr =
4631  section_file_addr + symbol_section->GetByteSize();
4632  if (next_func_start_entry) {
4633  addr_t next_symbol_file_addr = next_func_start_entry->addr;
4634  if (is_arm)
4635  next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4636  symbol_byte_size = std::min<lldb::addr_t>(
4637  next_symbol_file_addr - symbol_file_addr,
4638  section_end_file_addr - symbol_file_addr);
4639  } else {
4640  symbol_byte_size = section_end_file_addr - symbol_file_addr;
4641  }
4642  sym[sym_idx].SetID(synthetic_sym_id++);
4643  sym[sym_idx].GetMangled().SetDemangledName(
4645  sym[sym_idx].SetType(eSymbolTypeCode);
4646  sym[sym_idx].SetIsSynthetic(true);
4647  sym[sym_idx].GetAddressRef() = symbol_addr;
4648  symbols_added.insert(symbol_addr.GetFileAddress());
4649  if (symbol_flags)
4650  sym[sym_idx].SetFlags(symbol_flags);
4651  if (symbol_byte_size)
4652  sym[sym_idx].SetByteSize(symbol_byte_size);
4653  ++sym_idx;
4654  }
4655  }
4656  }
4657  }
4658  }
4659  }
4660 
4661  // Trim our symbols down to just what we ended up with after removing any
4662  // symbols.
4663  if (sym_idx < num_syms) {
4664  num_syms = sym_idx;
4665  sym = symtab->Resize(num_syms);
4666  }
4667 
4668  // Now synthesize indirect symbols
4669  if (m_dysymtab.nindirectsyms != 0) {
4670  if (indirect_symbol_index_data.GetByteSize()) {
4671  NListIndexToSymbolIndexMap::const_iterator end_index_pos =
4672  m_nlist_idx_to_sym_idx.end();
4673 
4674  for (uint32_t sect_idx = 1; sect_idx < m_mach_sections.size();
4675  ++sect_idx) {
4676  if ((m_mach_sections[sect_idx].flags & SECTION_TYPE) ==
4677  S_SYMBOL_STUBS) {
4678  uint32_t symbol_stub_byte_size = m_mach_sections[sect_idx].reserved2;
4679  if (symbol_stub_byte_size == 0)
4680  continue;
4681 
4682  const uint32_t num_symbol_stubs =
4683  m_mach_sections[sect_idx].size / symbol_stub_byte_size;
4684 
4685  if (num_symbol_stubs == 0)
4686  continue;
4687 
4688  const uint32_t symbol_stub_index_offset =
4689  m_mach_sections[sect_idx].reserved1;
4690  for (uint32_t stub_idx = 0; stub_idx < num_symbol_stubs; ++stub_idx) {
4691  const uint32_t symbol_stub_index =
4692  symbol_stub_index_offset + stub_idx;
4693  const lldb::addr_t symbol_stub_addr =
4694  m_mach_sections[sect_idx].addr +
4695  (stub_idx * symbol_stub_byte_size);
4696  lldb::offset_t symbol_stub_offset = symbol_stub_index * 4;
4697  if (indirect_symbol_index_data.ValidOffsetForDataOfSize(
4698  symbol_stub_offset, 4)) {
4699  const uint32_t stub_sym_id =
4700  indirect_symbol_index_data.GetU32(&symbol_stub_offset);
4701  if (stub_sym_id & (INDIRECT_SYMBOL_ABS | INDIRECT_SYMBOL_LOCAL))
4702  continue;
4703 
4704  NListIndexToSymbolIndexMap::const_iterator index_pos =
4705  m_nlist_idx_to_sym_idx.find(stub_sym_id);
4706  Symbol *stub_symbol = nullptr;
4707  if (index_pos != end_index_pos) {
4708  // We have a remapping from the original nlist index to a
4709  // current symbol index, so just look this up by index
4710  stub_symbol = symtab->SymbolAtIndex(index_pos->second);
4711  } else {
4712  // We need to lookup a symbol using the original nlist symbol
4713  // index since this index is coming from the S_SYMBOL_STUBS
4714  stub_symbol = symtab->FindSymbolByID(stub_sym_id);
4715  }
4716 
4717  if (stub_symbol) {
4718  Address so_addr(symbol_stub_addr, section_list);
4719 
4720  if (stub_symbol->GetType() == eSymbolTypeUndefined) {
4721  // Change the external symbol into a trampoline that makes
4722  // sense These symbols were N_UNDF N_EXT, and are useless
4723  // to us, so we can re-use them so we don't have to make up
4724  // a synthetic symbol for no good reason.
4725  if (resolver_addresses.find(symbol_stub_addr) ==
4726  resolver_addresses.end())
4727  stub_symbol->SetType(eSymbolTypeTrampoline);
4728  else
4729  stub_symbol->SetType(eSymbolTypeResolver);
4730  stub_symbol->SetExternal(false);
4731  stub_symbol->GetAddressRef() = so_addr;
4732  stub_symbol->SetByteSize(symbol_stub_byte_size);
4733  } else {
4734  // Make a synthetic symbol to describe the trampoline stub
4735  Mangled stub_symbol_mangled_name(stub_symbol->GetMangled());
4736  if (sym_idx >= num_syms) {
4737  sym = symtab->Resize(++num_syms);
4738  stub_symbol = nullptr; // this pointer no longer valid
4739  }
4740  sym[sym_idx].SetID(synthetic_sym_id++);
4741  sym[sym_idx].GetMangled() = stub_symbol_mangled_name;
4742  if (resolver_addresses.find(symbol_stub_addr) ==
4743  resolver_addresses.end())
4744  sym[sym_idx].SetType(eSymbolTypeTrampoline);
4745  else
4746  sym[sym_idx].SetType(eSymbolTypeResolver);
4747  sym[sym_idx].SetIsSynthetic(true);
4748  sym[sym_idx].GetAddressRef() = so_addr;
4749  symbols_added.insert(so_addr.GetFileAddress());
4750  sym[sym_idx].SetByteSize(symbol_stub_byte_size);
4751  ++sym_idx;
4752  }
4753  } else {
4754  if (log)
4755  log->Warning("symbol stub referencing symbol table symbol "
4756  "%u that isn't in our minimal symbol table, "
4757  "fix this!!!",
4758  stub_sym_id);
4759  }
4760  }
4761  }
4762  }
4763  }
4764  }
4765  }
4766 
4767  if (!reexport_trie_entries.empty()) {
4768  for (const auto &e : reexport_trie_entries) {
4769  if (e.entry.import_name) {
4770  // Only add indirect symbols from the Trie entries if we didn't have
4771  // a N_INDR nlist entry for this already
4772  if (indirect_symbol_names.find(e.entry.name) ==
4773  indirect_symbol_names.end()) {
4774  // Make a synthetic symbol to describe re-exported symbol.
4775  if (sym_idx >= num_syms)
4776  sym = symtab->Resize(++num_syms);
4777  sym[sym_idx].SetID(synthetic_sym_id++);
4778  sym[sym_idx].GetMangled() = Mangled(e.entry.name);
4779  sym[sym_idx].SetType(eSymbolTypeReExported);
4780  sym[sym_idx].SetIsSynthetic(true);
4781  sym[sym_idx].SetReExportedSymbolName(e.entry.import_name);
4782  if (e.entry.other > 0 && e.entry.other <= dylib_files.GetSize()) {
4783  sym[sym_idx].SetReExportedSymbolSharedLibrary(
4784  dylib_files.GetFileSpecAtIndex(e.entry.other - 1));
4785  }
4786  ++sym_idx;
4787  }
4788  }
4789  }
4790  }
4791 
4792  // StreamFile s(stdout, false);
4793  // s.Printf ("Symbol table before CalculateSymbolSizes():\n");
4794  // symtab->Dump(&s, NULL, eSortOrderNone);
4795  // Set symbol byte sizes correctly since mach-o nlist entries don't have
4796  // sizes
4797  symtab->CalculateSymbolSizes();
4798 
4799  // s.Printf ("Symbol table after CalculateSymbolSizes():\n");
4800  // symtab->Dump(&s, NULL, eSortOrderNone);
4801 
4802  return symtab->GetNumSymbols();
4803 }
4804 
4806  ModuleSP module_sp(GetModule());
4807  if (module_sp) {
4808  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
4809  s->Printf("%p: ", static_cast<void *>(this));
4810  s->Indent();
4811  if (m_header.magic == MH_MAGIC_64 || m_header.magic == MH_CIGAM_64)
4812  s->PutCString("ObjectFileMachO64");
4813  else
4814  s->PutCString("ObjectFileMachO32");
4815 
4816  *s << ", file = '" << m_file;
4817  ModuleSpecList all_specs;
4818  ModuleSpec base_spec;
4820  base_spec, all_specs);
4821  for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
4822  *s << "', triple";
4823  if (e)
4824  s->Printf("[%d]", i);
4825  *s << " = ";
4826  *s << all_specs.GetModuleSpecRefAtIndex(i)
4827  .GetArchitecture()
4828  .GetTriple()
4829  .getTriple();
4830  }
4831  *s << "\n";
4832  SectionList *sections = GetSectionList();
4833  if (sections)
4834  sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
4835  UINT32_MAX);
4836 
4837  if (m_symtab_up)
4838  m_symtab_up->Dump(s, nullptr, eSortOrderNone);
4839  }
4840 }
4841 
4842 UUID ObjectFileMachO::GetUUID(const llvm::MachO::mach_header &header,
4843  const lldb_private::DataExtractor &data,
4844  lldb::offset_t lc_offset) {
4845  uint32_t i;
4846  struct uuid_command load_cmd;
4847 
4848  lldb::offset_t offset = lc_offset;
4849  for (i = 0; i < header.ncmds; ++i) {
4850  const lldb::offset_t cmd_offset = offset;
4851  if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
4852  break;
4853 
4854  if (load_cmd.cmd == LC_UUID) {
4855  const uint8_t *uuid_bytes = data.PeekData(offset, 16);
4856 
4857  if (uuid_bytes) {
4858  // OpenCL on Mac OS X uses the same UUID for each of its object files.
4859  // We pretend these object files have no UUID to prevent crashing.
4860 
4861  const uint8_t opencl_uuid[] = {0x8c, 0x8e, 0xb3, 0x9b, 0x3b, 0xa8,
4862  0x4b, 0x16, 0xb6, 0xa4, 0x27, 0x63,
4863  0xbb, 0x14, 0xf0, 0x0d};
4864 
4865  if (!memcmp(uuid_bytes, opencl_uuid, 16))
4866  return UUID();
4867 
4868  return UUID::fromOptionalData(uuid_bytes, 16);
4869  }
4870  return UUID();
4871  }
4872  offset = cmd_offset + load_cmd.cmdsize;
4873  }
4874  return UUID();
4875 }
4876 
4877 static llvm::StringRef GetOSName(uint32_t cmd) {
4878  switch (cmd) {
4879  case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
4880  return llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4881  case llvm::MachO::LC_VERSION_MIN_MACOSX:
4882  return llvm::Triple::getOSTypeName(llvm::Triple::MacOSX);
4883  case llvm::MachO::LC_VERSION_MIN_TVOS:
4884  return llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4885  case llvm::MachO::LC_VERSION_MIN_WATCHOS:
4886  return llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4887  default:
4888  llvm_unreachable("unexpected LC_VERSION load command");
4889  }
4890 }
4891 
4892 namespace {
4893 struct OSEnv {
4894  llvm::StringRef os_type;
4895  llvm::StringRef environment;
4896  OSEnv(uint32_t cmd) {
4897  switch (cmd) {
4898  case llvm::MachO::PLATFORM_MACOS:
4899  os_type = llvm::Triple::getOSTypeName(llvm::Triple::MacOSX);
4900  return;
4901  case llvm::MachO::PLATFORM_IOS:
4902  os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4903  return;
4904  case llvm::MachO::PLATFORM_TVOS:
4905  os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4906  return;
4907  case llvm::MachO::PLATFORM_WATCHOS:
4908  os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4909  return;
4910  // NEED_BRIDGEOS_TRIPLE case llvm::MachO::PLATFORM_BRIDGEOS:
4911  // NEED_BRIDGEOS_TRIPLE os_type =
4912  // llvm::Triple::getOSTypeName(llvm::Triple::BridgeOS);
4913  // NEED_BRIDGEOS_TRIPLE return;
4914  case llvm::MachO::PLATFORM_MACCATALYST:
4915  os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4916  environment = llvm::Triple::getEnvironmentTypeName(llvm::Triple::MacABI);
4917  return;
4918  case llvm::MachO::PLATFORM_IOSSIMULATOR:
4919  os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4920  environment =
4921  llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4922  return;
4923  case llvm::MachO::PLATFORM_TVOSSIMULATOR:
4924  os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4925  environment =
4926  llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4927  return;
4928  case llvm::MachO::PLATFORM_WATCHOSSIMULATOR:
4929  os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4930  environment =
4931  llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4932  return;
4933  default: {
4936  LLDB_LOGF(log, "unsupported platform in LC_BUILD_VERSION");
4937  }
4938  }
4939  }
4940 };
4941 
4942 struct MinOS {
4943  uint32_t major_version, minor_version, patch_version;
4944  MinOS(uint32_t version)
4945  : major_version(version >> 16), minor_version((version >> 8) & 0xffu),
4946  patch_version(version & 0xffu) {}
4947 };
4948 } // namespace
4949 
4950 void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header,
4951  const lldb_private::DataExtractor &data,
4952  lldb::offset_t lc_offset,
4953  ModuleSpec &base_spec,
4954  lldb_private::ModuleSpecList &all_specs) {
4955  auto &base_arch = base_spec.GetArchitecture();
4956  base_arch.SetArchitecture(eArchTypeMachO, header.cputype, header.cpusubtype);
4957  if (!base_arch.IsValid())
4958  return;
4959 
4960  bool found_any = false;
4961  auto add_triple = [&](const llvm::Triple &triple) {
4962  auto spec = base_spec;
4963  spec.GetArchitecture().GetTriple() = triple;
4964  if (spec.GetArchitecture().IsValid()) {
4965  spec.GetUUID() = ObjectFileMachO::GetUUID(header, data, lc_offset);
4966  all_specs.Append(spec);
4967  found_any = true;
4968  }
4969  };
4970 
4971  // Set OS to an unspecified unknown or a "*" so it can match any OS
4972  llvm::Triple base_triple = base_arch.GetTriple();
4973  base_triple.setOS(llvm::Triple::UnknownOS);
4974  base_triple.setOSName(llvm::StringRef());
4975 
4976  if (header.filetype == MH_PRELOAD) {
4977  if (header.cputype == CPU_TYPE_ARM) {
4978  // If this is a 32-bit arm binary, and it's a standalone binary, force
4979  // the Vendor to Apple so we don't accidentally pick up the generic
4980  // armv7 ABI at runtime. Apple's armv7 ABI always uses r7 for the
4981  // frame pointer register; most other armv7 ABIs use a combination of
4982  // r7 and r11.
4983  base_triple.setVendor(llvm::Triple::Apple);
4984  } else {
4985  // Set vendor to an unspecified unknown or a "*" so it can match any
4986  // vendor This is required for correct behavior of EFI debugging on
4987  // x86_64
4988  base_triple.setVendor(llvm::Triple::UnknownVendor);
4989  base_triple.setVendorName(llvm::StringRef());
4990  }
4991  return add_triple(base_triple);
4992  }
4993 
4994  struct load_command load_cmd;
4995 
4996  // See if there is an LC_VERSION_MIN_* load command that can give
4997  // us the OS type.
4998  lldb::offset_t offset = lc_offset;
4999  for (uint32_t i = 0; i < header.ncmds; ++i) {
5000  const lldb::offset_t cmd_offset = offset;
5001  if (data.GetU32(&offset, &load_cmd, 2) == NULL)
5002  break;
5003 
5004  struct version_min_command version_min;
5005  switch (load_cmd.cmd) {
5006  case llvm::MachO::LC_VERSION_MIN_MACOSX:
5007  case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
5008  case llvm::MachO::LC_VERSION_MIN_TVOS:
5009  case llvm::MachO::LC_VERSION_MIN_WATCHOS: {
5010  if (load_cmd.cmdsize != sizeof(version_min))
5011  break;
5012  if (data.ExtractBytes(cmd_offset, sizeof(version_min),
5013  data.GetByteOrder(), &version_min) == 0)
5014  break;
5015  MinOS min_os(version_min.version);
5016  llvm::SmallString<32> os_name;
5017  llvm::raw_svector_ostream os(os_name);
5018  os << GetOSName(load_cmd.cmd) << min_os.major_version << '.'
5019  << min_os.minor_version << '.' << min_os.patch_version;
5020 
5021  auto triple = base_triple;
5022  triple.setOSName(os.str());
5023 
5024  // Disambiguate legacy simulator platforms.
5025  if (load_cmd.cmd != llvm::MachO::LC_VERSION_MIN_MACOSX &&
5026  (base_triple.getArch() == llvm::Triple::x86_64 ||
5027  base_triple.getArch() == llvm::Triple::x86)) {
5028  // The combination of legacy LC_VERSION_MIN load command and
5029  // x86 architecture always indicates a simulator environment.
5030  // The combination of LC_VERSION_MIN and arm architecture only
5031  // appears for native binaries. Back-deploying simulator
5032  // binaries on Apple Silicon Macs use the modern unambigous
5033  // LC_BUILD_VERSION load commands; no special handling required.
5034  triple.setEnvironment(llvm::Triple::Simulator);
5035  }
5036  add_triple(triple);
5037  break;
5038  }
5039  default:
5040  break;
5041  }
5042 
5043  offset = cmd_offset + load_cmd.cmdsize;
5044  }
5045 
5046  // See if there are LC_BUILD_VERSION load commands that can give
5047  // us the OS type.
5048  offset = lc_offset;
5049  for (uint32_t i = 0; i < header.ncmds; ++i) {
5050  const lldb::offset_t cmd_offset = offset;
5051  if (data.GetU32(&offset, &load_cmd, 2) == NULL)
5052  break;
5053 
5054  do {
5055  if (load_cmd.cmd == llvm::MachO::LC_BUILD_VERSION) {
5056  struct build_version_command build_version;
5057  if (load_cmd.cmdsize < sizeof(build_version)) {
5058  // Malformed load command.
5059  break;
5060  }
5061  if (data.ExtractBytes(cmd_offset, sizeof(build_version),
5062  data.GetByteOrder(), &build_version) == 0)
5063  break;
5064  MinOS min_os(build_version.minos);
5065  OSEnv os_env(build_version.platform);
5066  llvm::SmallString<16> os_name;
5067  llvm::raw_svector_ostream os(os_name);
5068  os << os_env.os_type << min_os.major_version << '.'
5069  << min_os.minor_version << '.' << min_os.patch_version;
5070  auto triple = base_triple;
5071  triple.setOSName(os.str());
5072  os_name.clear();
5073  if (!os_env.environment.empty())
5074  triple.setEnvironmentName(os_env.environment);
5075  add_triple(triple);
5076  }
5077  } while (0);
5078  offset = cmd_offset + load_cmd.cmdsize;
5079  }
5080 
5081  if (!found_any) {
5082  if (header.filetype == MH_KEXT_BUNDLE) {
5083  base_triple.setVendor(llvm::Triple::Apple);
5084  add_triple(base_triple);
5085  } else {
5086  // We didn't find a LC_VERSION_MIN load command and this isn't a KEXT
5087  // so lets not say our Vendor is Apple, leave it as an unspecified
5088  // unknown.
5089  base_triple.setVendor(llvm::Triple::UnknownVendor);
5090  base_triple.setVendorName(llvm::StringRef());
5091  add_triple(base_triple);
5092  }
5093  }
5094 }
5095 
5097  ModuleSP module_sp, const llvm::MachO::mach_header &header,
5098  const lldb_private::DataExtractor &data, lldb::offset_t lc_offset) {
5099  ModuleSpecList all_specs;
5100  ModuleSpec base_spec;
5101  GetAllArchSpecs(header, data, MachHeaderSizeFromMagic(header.magic),
5102  base_spec, all_specs);
5103 
5104  // If the object file offers multiple alternative load commands,
5105  // pick the one that matches the module.
5106  if (module_sp) {
5107  const ArchSpec &module_arch = module_sp->GetArchitecture();
5108  for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
5109  ArchSpec mach_arch =
5111  if (module_arch.IsCompatibleMatch(mach_arch))
5112  return mach_arch;
5113  }
5114  }
5115 
5116  // Return the first arch we found.
5117  if (all_specs.GetSize() == 0)
5118  return {};
5119  return all_specs.GetModuleSpecRefAtIndex(0).GetArchitecture();
5120 }
5121 
5123  ModuleSP module_sp(GetModule());
5124  if (module_sp) {
5125  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5127  return GetUUID(m_header, m_data, offset);
5128  }
5129  return UUID();
5130 }
5131 
5133  uint32_t count = 0;
5134  ModuleSP module_sp(GetModule());
5135  if (module_sp) {
5136  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5137  struct load_command load_cmd;
5139  std::vector<std::string> rpath_paths;
5140  std::vector<std::string> rpath_relative_paths;
5141  std::vector<std::string> at_exec_relative_paths;
5142  uint32_t i;
5143  for (i = 0; i < m_header.ncmds; ++i) {
5144  const uint32_t cmd_offset = offset;
5145  if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
5146  break;
5147 
5148  switch (load_cmd.cmd) {
5149  case LC_RPATH:
5150  case LC_LOAD_DYLIB:
5151  case LC_LOAD_WEAK_DYLIB:
5152  case LC_REEXPORT_DYLIB:
5153  case LC_LOAD_DYLINKER:
5154  case LC_LOADFVMLIB:
5155  case LC_LOAD_UPWARD_DYLIB: {
5156  uint32_t name_offset = cmd_offset + m_data.GetU32(&offset);
5157  const char *path = m_data.PeekCStr(name_offset);
5158  if (path) {
5159  if (load_cmd.cmd == LC_RPATH)
5160  rpath_paths.push_back(path);
5161  else {
5162  if (path[0] == '@') {
5163  if (strncmp(path, "@rpath", strlen("@rpath")) == 0)
5164  rpath_relative_paths.push_back(path + strlen("@rpath"));
5165  else if (strncmp(path, "@executable_path",
5166  strlen("@executable_path")) == 0)
5167  at_exec_relative_paths.push_back(path +
5168  strlen("@executable_path"));
5169  } else {
5170  FileSpec file_spec(path);
5171  if (files.AppendIfUnique(file_spec))
5172  count++;
5173  }
5174  }
5175  }
5176  } break;
5177 
5178  default:
5179  break;
5180  }
5181  offset = cmd_offset + load_cmd.cmdsize;
5182  }
5183 
5184  FileSpec this_file_spec(m_file);
5185  FileSystem::Instance().Resolve(this_file_spec);
5186 
5187  if (!rpath_paths.empty()) {
5188  // Fixup all LC_RPATH values to be absolute paths
5189  std::string loader_path("@loader_path");
5190  std::string executable_path("@executable_path");
5191  for (auto &rpath : rpath_paths) {
5192  if (llvm::StringRef(rpath).startswith(loader_path)) {
5193  rpath.erase(0, loader_path.size());
5194  rpath.insert(0, this_file_spec.GetDirectory().GetCString());
5195  } else if (llvm::StringRef(rpath).startswith(executable_path)) {
5196  rpath.erase(0, executable_path.size());
5197  rpath.insert(0, this_file_spec.GetDirectory().GetCString());
5198  }
5199  }
5200 
5201  for (const auto &rpath_relative_path : rpath_relative_paths) {
5202  for (const auto &rpath : rpath_paths) {
5203  std::string path = rpath;
5204  path += rpath_relative_path;
5205  // It is OK to resolve this path because we must find a file on disk
5206  // for us to accept it anyway if it is rpath relative.
5207  FileSpec file_spec(path);
5208  FileSystem::Instance().Resolve(file_spec);
5209  if (FileSystem::Instance().Exists(file_spec) &&
5210  files.AppendIfUnique(file_spec)) {
5211  count++;
5212  break;
5213  }
5214  }
5215  }
5216  }
5217 
5218  // We may have @executable_paths but no RPATHS. Figure those out here.
5219  // Only do this if this object file is the executable. We have no way to
5220  // get back to the actual executable otherwise, so we won't get the right
5221  // path.
5222  if (!at_exec_relative_paths.empty() && CalculateType() == eTypeExecutable) {
5223  FileSpec exec_dir = this_file_spec.CopyByRemovingLastPathComponent();
5224  for (const auto &at_exec_relative_path : at_exec_relative_paths) {
5225  FileSpec file_spec =
5226  exec_dir.CopyByAppendingPathComponent(at_exec_relative_path);
5227  if (FileSystem::Instance().Exists(file_spec) &&
5228  files.AppendIfUnique(file_spec))
5229  count++;
5230  }
5231  }
5232  }
5233  return count;
5234 }
5235 
5237  // If the object file is not an executable it can't hold the entry point.
5238  // m_entry_point_address is initialized to an invalid address, so we can just
5239  // return that. If m_entry_point_address is valid it means we've found it
5240  // already, so return the cached value.
5241 
5242  if ((!IsExecutable() && !IsDynamicLoader()) ||
5244  return m_entry_point_address;
5245  }
5246 
5247  // Otherwise, look for the UnixThread or Thread command. The data for the
5248  // Thread command is given in /usr/include/mach-o.h, but it is basically:
5249  //
5250  // uint32_t flavor - this is the flavor argument you would pass to
5251  // thread_get_state
5252  // uint32_t count - this is the count of longs in the thread state data
5253  // struct XXX_thread_state state - this is the structure from
5254  // <machine/thread_status.h> corresponding to the flavor.
5255  // <repeat this trio>
5256  //
5257  // So we just keep reading the various register flavors till we find the GPR
5258  // one, then read the PC out of there.
5259  // FIXME: We will need to have a "RegisterContext data provider" class at some
5260  // point that can get all the registers
5261  // out of data in this form & attach them to a given thread. That should
5262  // underlie the MacOS X User process plugin, and we'll also need it for the
5263  // MacOS X Core File process plugin. When we have that we can also use it
5264  // here.
5265  //
5266  // For now we hard-code the offsets and flavors we need:
5267  //
5268  //
5269 
5270  ModuleSP module_sp(GetModule());
5271  if (module_sp) {
5272  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5273  struct load_command load_cmd;
5275  uint32_t i;
5276  lldb::addr_t start_address = LLDB_INVALID_ADDRESS;
5277  bool done = false;
5278 
5279  for (i = 0; i < m_header.ncmds; ++i) {
5280  const lldb::offset_t cmd_offset = offset;
5281  if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
5282  break;
5283 
5284  switch (load_cmd.cmd) {
5285  case LC_UNIXTHREAD:
5286  case LC_THREAD: {
5287  while (offset < cmd_offset + load_cmd.cmdsize) {
5288  uint32_t flavor = m_data.GetU32(&offset);
5289  uint32_t count = m_data.GetU32(&offset);
5290  if (count == 0) {
5291  // We've gotten off somehow, log and exit;
5292  return m_entry_point_address;
5293  }
5294 
5295  switch (m_header.cputype) {
5296  case llvm::MachO::CPU_TYPE_ARM:
5297  if (flavor == 1 ||
5298  flavor == 9) // ARM_THREAD_STATE/ARM_THREAD_STATE32
5299  // from mach/arm/thread_status.h
5300  {
5301  offset += 60; // This is the offset of pc in the GPR thread state
5302  // data structure.
5303  start_address = m_data.GetU32(&offset);
5304  done = true;
5305  }
5306  break;
5309  if (flavor == 6) // ARM_THREAD_STATE64 from mach/arm/thread_status.h
5310  {
5311  offset += 256; // This is the offset of pc in the GPR thread state
5312  // data structure.
5313  start_address = m_data.GetU64(&offset);
5314  done = true;
5315  }
5316  break;
5317  case llvm::MachO::CPU_TYPE_I386:
5318  if (flavor ==
5319  1) // x86_THREAD_STATE32 from mach/i386/thread_status.h
5320  {
5321  offset += 40; // This is the offset of eip in the GPR thread state
5322  // data structure.
5323  start_address = m_data.GetU32(&offset);
5324  done = true;
5325  }
5326  break;
5327  case llvm::MachO::CPU_TYPE_X86_64:
5328  if (flavor ==
5329  4) // x86_THREAD_STATE64 from mach/i386/thread_status.h
5330  {
5331  offset += 16 * 8; // This is the offset of rip in the GPR thread
5332  // state data structure.
5333  start_address = m_data.GetU64(&offset);
5334  done = true;
5335  }
5336  break;
5337  default:
5338  return m_entry_point_address;
5339  }
5340  // Haven't found the GPR flavor yet, skip over the data for this
5341  // flavor:
5342  if (done)
5343  break;
5344  offset += count * 4;
5345  }
5346  } break;
5347  case LC_MAIN: {
5348  ConstString text_segment_name("__TEXT");
5349  uint64_t entryoffset = m_data.GetU64(&offset);
5350  SectionSP text_segment_sp =
5351  GetSectionList()->FindSectionByName(text_segment_name);
5352  if (text_segment_sp) {
5353  done = true;
5354  start_address = text_segment_sp->GetFileAddress() + entryoffset;
5355  }
5356  } break;
5357 
5358  default:
5359  break;
5360  }
5361  if (done)
5362  break;
5363 
5364  // Go to the next load command:
5365  offset = cmd_offset + load_cmd.cmdsize;
5366  }
5367 
5368  if (start_address == LLDB_INVALID_ADDRESS && IsDynamicLoader()) {
5369  if (GetSymtab()) {
5370  Symbol *dyld_start_sym = GetSymtab()->FindFirstSymbolWithNameAndType(
5371  ConstString("_dyld_start"), SymbolType::eSymbolTypeCode,
5372  Symtab::eDebugAny, Symtab::eVisibilityAny);
5373  if (dyld_start_sym && dyld_start_sym->GetAddress().IsValid()) {
5374  start_address = dyld_start_sym->GetAddress().GetFileAddress();
5375  }
5376  }
5377  }
5378 
5379  if (start_address != LLDB_INVALID_ADDRESS) {
5380  // We got the start address from the load commands, so now resolve that
5381  // address in the sections of this ObjectFile:
5383  start_address, GetSectionList())) {
5385  }
5386  } else {
5387  // We couldn't read the UnixThread load command - maybe it wasn't there.
5388  // As a fallback look for the "start" symbol in the main executable.
5389 
5390  ModuleSP module_sp(GetModule());
5391 
5392  if (module_sp) {
5393  SymbolContextList contexts;
5394  SymbolContext context;
5395  module_sp->FindSymbolsWithNameAndType(ConstString("start"),
5396  eSymbolTypeCode, contexts);
5397  if (contexts.GetSize()) {
5398  if (contexts.GetContextAtIndex(0, context))
5400  }
5401  }
5402  }
5403  }
5404 
5405  return m_entry_point_address;
5406 }
5407 
5409  lldb_private::Address header_addr;
5410  SectionList *section_list = GetSectionList();
5411  if (section_list) {
5412  SectionSP text_segment_sp(
5413  section_list->FindSectionByName(GetSegmentNameTEXT()));
5414  if (text_segment_sp) {
5415  header_addr.SetSection(text_segment_sp);
5416  header_addr.SetOffset(0);
5417  }
5418  }
5419  return header_addr;
5420 }
5421 
5423  ModuleSP module_sp(GetModule());
5424  if (module_sp) {
5425  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5429  FileRangeArray::Entry file_range;
5430  thread_command thread_cmd;
5431  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5432  const uint32_t cmd_offset = offset;
5433  if (m_data.GetU32(&offset, &thread_cmd, 2) == nullptr)
5434  break;
5435 
5436  if (thread_cmd.cmd == LC_THREAD) {
5437  file_range.SetRangeBase(offset);
5438  file_range.SetByteSize(thread_cmd.cmdsize - 8);
5439  m_thread_context_offsets.Append(file_range);
5440  }
5441  offset = cmd_offset + thread_cmd.cmdsize;
5442  }
5443  }
5444  }
5446 }
5447 
5449  std::string result;
5450  ModuleSP module_sp(GetModule());
5451  if (module_sp) {
5452  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5453 
5454  // First, look over the load commands for an LC_NOTE load command with
5455  // data_owner string "kern ver str" & use that if found.
5457  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5458  const uint32_t cmd_offset = offset;
5459  load_command lc;
5460  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
5461  break;
5462  if (lc.cmd == LC_NOTE) {
5463  char data_owner[17];
5464  m_data.CopyData(offset, 16, data_owner);
5465  data_owner[16] = '\0';
5466  offset += 16;
5467  uint64_t fileoff = m_data.GetU64_unchecked(&offset);
5468  uint64_t size = m_data.GetU64_unchecked(&offset);
5469 
5470  // "kern ver str" has a uint32_t version and then a nul terminated
5471  // c-string.
5472  if (strcmp("kern ver str", data_owner) == 0) {
5473  offset = fileoff;
5474  uint32_t version;
5475  if (m_data.GetU32(&offset, &version, 1) != nullptr) {
5476  if (version == 1) {
5477  uint32_t strsize = size - sizeof(uint32_t);
5478  char *buf = (char *)malloc(strsize);
5479  if (buf) {
5480  m_data.CopyData(offset, strsize, buf);
5481  buf[strsize - 1] = '\0';
5482  result = buf;
5483  if (buf)
5484  free(buf);
5485  return result;
5486  }
5487  }
5488  }
5489  }
5490  }
5491  offset = cmd_offset + lc.cmdsize;
5492  }
5493 
5494  // Second, make a pass over the load commands looking for an obsolete
5495  // LC_IDENT load command.
5496  offset = MachHeaderSizeFromMagic(m_header.magic);
5497  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5498  const uint32_t cmd_offset = offset;
5499  struct ident_command ident_command;
5500  if (m_data.GetU32(&offset, &ident_command, 2) == nullptr)
5501  break;
5502  if (ident_command.cmd == LC_IDENT && ident_command.cmdsize != 0) {
5503  char *buf = (char *)malloc(ident_command.cmdsize);
5504  if (buf != nullptr && m_data.CopyData(offset, ident_command.cmdsize,
5505  buf) == ident_command.cmdsize) {
5506  buf[ident_command.cmdsize - 1] = '\0';
5507  result = buf;
5508  }
5509  if (buf)
5510  free(buf);
5511  }
5512  offset = cmd_offset + ident_command.cmdsize;
5513  }
5514  }
5515  return result;
5516 }
5517 
5519  ObjectFile::BinaryType &type) {
5520  address = LLDB_INVALID_ADDRESS;
5521  uuid.Clear();
5522  ModuleSP module_sp(GetModule());
5523  if (module_sp) {
5524  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5526  for (uint32_t i = 0; i < m_header.ncmds; ++i) {
5527  const uint32_t cmd_offset = offset;
5528  load_command lc;
5529  if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
5530  break;
5531  if (lc.cmd == LC_NOTE) {
5532  char data_owner[17];
5533  memset(data_owner, 0, sizeof(data_owner));
5534  m_data.CopyData(offset, 16, data_owner);
5535  offset += 16;
5536  uint64_t fileoff = m_data.GetU64_unchecked(&offset);
5537  uint64_t size = m_data.GetU64_unchecked(&offset);
5538 
5539  // "main bin spec" (main binary specification) data payload is
5540  // formatted:
5541  // uint32_t version [currently 1]
5542  // uint32_t type [0 == unspecified, 1 == kernel,
5543  // 2 == user process, 3 == firmware ]
5544  // uint64_t address [ UINT64_MAX if address not specified ]
5545  // uuid_t uuid [ all zero's if uuid not specified ]
5546  // uint32_t log2_pagesize [ process page size in log base
5547  // 2, e.g. 4k pages are 12.
5548  // 0 for unspecified ]
5549  // uint32_t unused [ for alignment ]
5550 
5551  if (strcmp("main bin spec", data_owner) == 0 && size >= 32) {
5552  offset = fileoff;
5553  uint32_t version;
5554  if (m_data.GetU32(&offset, &version, 1) != nullptr && version == 1) {
5555  uint32_t binspec_type = 0;
5556  uuid_t raw_uuid;
5557  memset(raw_uuid, 0, sizeof(uuid_t));
5558 
5559  if (m_data.GetU32(&offset, &binspec_type, 1) &&
5560  m_data.GetU64(&offset, &address, 1) &&
5561  m_data.CopyData(offset, sizeof(uuid_t), raw_uuid) != 0) {
5562  uuid = UUID::fromOptionalData(raw_uuid, sizeof(uuid_t));
5563  // convert the "main bin spec" type into our
5564  // ObjectFile::BinaryType enum
5565  switch (binspec_type) {
5566  case 0:
5567  type = eBinaryTypeUnknown;
5568  break;
5569  case 1:
5570  type = eBinaryTypeKernel;
5571  break;
5572  case 2:
5573  type = eBinaryTypeUser;
5574  break;
5575  case 3:
5576  type = eBinaryTypeStandalone;
5577  break;
5578  }
5579  return true;
5580  }
5581  }
5582  }
5583  }
5584  offset = cmd_offset + lc.cmdsize;
5585  }
5586  }
5587  return false;
5588 }
5589 
5590 lldb::RegisterContextSP
5592  lldb_private::Thread &thread) {
5593  lldb::RegisterContextSP reg_ctx_sp;
5594 
5595  ModuleSP module_sp(GetModule());
5596  if (module_sp) {
5597  std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5600 
5601  const FileRangeArray::Entry *thread_context_file_range =
5603  if (thread_context_file_range) {
5604 
5605  DataExtractor data(m_data, thread_context_file_range->GetRangeBase(),
5606  thread_context_file_range->GetByteSize());
5607 
5608  switch (m_header.cputype) {
5611  reg_ctx_sp =
5612  std::make_shared<RegisterContextDarwin_arm64_Mach>(thread, data);
5613  break;
5614 
5615  case llvm::MachO::CPU_TYPE_ARM:
5616  reg_ctx_sp =
5617  std::make_shared<RegisterContextDarwin_arm_Mach>(thread, data);
5618  break;
5619 
5620  case llvm::MachO::CPU_TYPE_I386:
5621  reg_ctx_sp =
5622  std::make_shared<RegisterContextDarwin_i386_Mach>(thread, data);
5623  break;
5624 
5625  case llvm::MachO::CPU_TYPE_X86_64:
5626  reg_ctx_sp =
5627  std::make_shared<RegisterContextDarwin_x86_64_Mach>(thread, data);
5628  break;
5629  }
5630  }
5631  }
5632  return reg_ctx_sp;
5633 }
5634 
5636  switch (m_header.filetype) {
5637  case MH_OBJECT: // 0x1u
5638  if (GetAddressByteSize() == 4) {
5639  // 32 bit kexts are just object files, but they do have a valid
5640  // UUID load command.
5641  if (GetUUID()) {
5642  // this checking for the UUID load command is not enough we could
5643  // eventually look for the symbol named "OSKextGetCurrentIdentifier" as
5644  // this is required of kexts
5645  if (m_strata == eStrataInvalid)
5647  return eTypeSharedLibrary;
5648  }
5649  }
5650  return eTypeObjectFile;
5651 
5652  case MH_EXECUTE:
5653  return eTypeExecutable;