LLDB mainline
ObjectFileMachO.cpp
Go to the documentation of this file.
1//===-- ObjectFileMachO.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/ADT/ScopeExit.h"
10#include "llvm/ADT/StringRef.h"
11
16#include "lldb/Core/Debugger.h"
17#include "lldb/Core/Module.h"
20#include "lldb/Core/Progress.h"
21#include "lldb/Core/Section.h"
22#include "lldb/Host/Host.h"
28#include "lldb/Target/Process.h"
30#include "lldb/Target/Target.h"
31#include "lldb/Target/Thread.h"
38#include "lldb/Utility/Log.h"
41#include "lldb/Utility/Status.h"
43#include "lldb/Utility/Timer.h"
44#include "lldb/Utility/UUID.h"
45
46#include "lldb/Host/SafeMachO.h"
47
48#include "llvm/ADT/DenseSet.h"
49#include "llvm/Support/FormatVariadic.h"
50#include "llvm/Support/MemoryBuffer.h"
51
52#include "ObjectFileMachO.h"
53
54#if defined(__APPLE__)
55#include <TargetConditionals.h>
56// GetLLDBSharedCacheUUID() needs to call dlsym()
57#include <dlfcn.h>
58#include <mach/mach_init.h>
59#include <mach/vm_map.h>
60#include <lldb/Host/SafeMachO.h>
61#endif
62
63#ifndef __APPLE__
65#else
66#include <uuid/uuid.h>
67#endif
68
69#include <bitset>
70#include <memory>
71#include <optional>
72
73// Unfortunately the signpost header pulls in the system MachO header, too.
74#ifdef CPU_TYPE_ARM
75#undef CPU_TYPE_ARM
76#endif
77#ifdef CPU_TYPE_ARM64
78#undef CPU_TYPE_ARM64
79#endif
80#ifdef CPU_TYPE_ARM64_32
81#undef CPU_TYPE_ARM64_32
82#endif
83#ifdef CPU_TYPE_I386
84#undef CPU_TYPE_I386
85#endif
86#ifdef CPU_TYPE_X86_64
87#undef CPU_TYPE_X86_64
88#endif
89#ifdef MH_DYLINKER
90#undef MH_DYLINKER
91#endif
92#ifdef MH_OBJECT
93#undef MH_OBJECT
94#endif
95#ifdef LC_VERSION_MIN_MACOSX
96#undef LC_VERSION_MIN_MACOSX
97#endif
98#ifdef LC_VERSION_MIN_IPHONEOS
99#undef LC_VERSION_MIN_IPHONEOS
100#endif
101#ifdef LC_VERSION_MIN_TVOS
102#undef LC_VERSION_MIN_TVOS
103#endif
104#ifdef LC_VERSION_MIN_WATCHOS
105#undef LC_VERSION_MIN_WATCHOS
106#endif
107#ifdef LC_BUILD_VERSION
108#undef LC_BUILD_VERSION
109#endif
110#ifdef PLATFORM_MACOS
111#undef PLATFORM_MACOS
112#endif
113#ifdef PLATFORM_MACCATALYST
114#undef PLATFORM_MACCATALYST
115#endif
116#ifdef PLATFORM_IOS
117#undef PLATFORM_IOS
118#endif
119#ifdef PLATFORM_IOSSIMULATOR
120#undef PLATFORM_IOSSIMULATOR
121#endif
122#ifdef PLATFORM_TVOS
123#undef PLATFORM_TVOS
124#endif
125#ifdef PLATFORM_TVOSSIMULATOR
126#undef PLATFORM_TVOSSIMULATOR
127#endif
128#ifdef PLATFORM_WATCHOS
129#undef PLATFORM_WATCHOS
130#endif
131#ifdef PLATFORM_WATCHOSSIMULATOR
132#undef PLATFORM_WATCHOSSIMULATOR
133#endif
134
135#define THUMB_ADDRESS_BIT_MASK 0xfffffffffffffffeull
136using namespace lldb;
137using namespace lldb_private;
138using namespace llvm::MachO;
139
141
142static void PrintRegisterValue(RegisterContext *reg_ctx, const char *name,
143 const char *alt_name, size_t reg_byte_size,
144 Stream &data) {
145 const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name);
146 if (reg_info == nullptr)
147 reg_info = reg_ctx->GetRegisterInfoByName(alt_name);
148 if (reg_info) {
150 if (reg_ctx->ReadRegister(reg_info, reg_value)) {
151 if (reg_info->byte_size >= reg_byte_size)
152 data.Write(reg_value.GetBytes(), reg_byte_size);
153 else {
154 data.Write(reg_value.GetBytes(), reg_info->byte_size);
155 for (size_t i = 0, n = reg_byte_size - reg_info->byte_size; i < n; ++i)
156 data.PutChar(0);
157 }
158 return;
159 }
160 }
161 // Just write zeros if all else fails
162 for (size_t i = 0; i < reg_byte_size; ++i)
163 data.PutChar(0);
164}
165
167public:
169 const DataExtractor &data)
170 : RegisterContextDarwin_x86_64(thread, 0) {
172 }
173
174 void InvalidateAllRegisters() override {
175 // Do nothing... registers are always valid...
176 }
177
179 lldb::offset_t offset = 0;
180 SetError(GPRRegSet, Read, -1);
181 SetError(FPURegSet, Read, -1);
182 SetError(EXCRegSet, Read, -1);
183 bool done = false;
184
185 while (!done) {
186 int flavor = data.GetU32(&offset);
187 if (flavor == 0)
188 done = true;
189 else {
190 uint32_t i;
191 uint32_t count = data.GetU32(&offset);
192 switch (flavor) {
193 case GPRRegSet:
194 for (i = 0; i < count; ++i)
195 (&gpr.rax)[i] = data.GetU64(&offset);
197 done = true;
198
199 break;
200 case FPURegSet:
201 // TODO: fill in FPU regs....
202 // SetError (FPURegSet, Read, -1);
203 done = true;
204
205 break;
206 case EXCRegSet:
207 exc.trapno = data.GetU32(&offset);
208 exc.err = data.GetU32(&offset);
209 exc.faultvaddr = data.GetU64(&offset);
211 done = true;
212 break;
213 case 7:
214 case 8:
215 case 9:
216 // fancy flavors that encapsulate of the above flavors...
217 break;
218
219 default:
220 done = true;
221 break;
222 }
223 }
224 }
225 }
226
227 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
228 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
229 if (reg_ctx_sp) {
230 RegisterContext *reg_ctx = reg_ctx_sp.get();
231
232 data.PutHex32(GPRRegSet); // Flavor
234 PrintRegisterValue(reg_ctx, "rax", nullptr, 8, data);
235 PrintRegisterValue(reg_ctx, "rbx", nullptr, 8, data);
236 PrintRegisterValue(reg_ctx, "rcx", nullptr, 8, data);
237 PrintRegisterValue(reg_ctx, "rdx", nullptr, 8, data);
238 PrintRegisterValue(reg_ctx, "rdi", nullptr, 8, data);
239 PrintRegisterValue(reg_ctx, "rsi", nullptr, 8, data);
240 PrintRegisterValue(reg_ctx, "rbp", nullptr, 8, data);
241 PrintRegisterValue(reg_ctx, "rsp", nullptr, 8, data);
242 PrintRegisterValue(reg_ctx, "r8", nullptr, 8, data);
243 PrintRegisterValue(reg_ctx, "r9", nullptr, 8, data);
244 PrintRegisterValue(reg_ctx, "r10", nullptr, 8, data);
245 PrintRegisterValue(reg_ctx, "r11", nullptr, 8, data);
246 PrintRegisterValue(reg_ctx, "r12", nullptr, 8, data);
247 PrintRegisterValue(reg_ctx, "r13", nullptr, 8, data);
248 PrintRegisterValue(reg_ctx, "r14", nullptr, 8, data);
249 PrintRegisterValue(reg_ctx, "r15", nullptr, 8, data);
250 PrintRegisterValue(reg_ctx, "rip", nullptr, 8, data);
251 PrintRegisterValue(reg_ctx, "rflags", nullptr, 8, data);
252 PrintRegisterValue(reg_ctx, "cs", nullptr, 8, data);
253 PrintRegisterValue(reg_ctx, "fs", nullptr, 8, data);
254 PrintRegisterValue(reg_ctx, "gs", nullptr, 8, data);
255
256 // // Write out the FPU registers
257 // const size_t fpu_byte_size = sizeof(FPU);
258 // size_t bytes_written = 0;
259 // data.PutHex32 (FPURegSet);
260 // data.PutHex32 (fpu_byte_size/sizeof(uint64_t));
261 // bytes_written += data.PutHex32(0); // uint32_t pad[0]
262 // bytes_written += data.PutHex32(0); // uint32_t pad[1]
263 // bytes_written += WriteRegister (reg_ctx, "fcw", "fctrl", 2,
264 // data); // uint16_t fcw; // "fctrl"
265 // bytes_written += WriteRegister (reg_ctx, "fsw" , "fstat", 2,
266 // data); // uint16_t fsw; // "fstat"
267 // bytes_written += WriteRegister (reg_ctx, "ftw" , "ftag", 1,
268 // data); // uint8_t ftw; // "ftag"
269 // bytes_written += data.PutHex8 (0); // uint8_t pad1;
270 // bytes_written += WriteRegister (reg_ctx, "fop" , NULL, 2,
271 // data); // uint16_t fop; // "fop"
272 // bytes_written += WriteRegister (reg_ctx, "fioff", "ip", 4,
273 // data); // uint32_t ip; // "fioff"
274 // bytes_written += WriteRegister (reg_ctx, "fiseg", NULL, 2,
275 // data); // uint16_t cs; // "fiseg"
276 // bytes_written += data.PutHex16 (0); // uint16_t pad2;
277 // bytes_written += WriteRegister (reg_ctx, "dp", "fooff" , 4,
278 // data); // uint32_t dp; // "fooff"
279 // bytes_written += WriteRegister (reg_ctx, "foseg", NULL, 2,
280 // data); // uint16_t ds; // "foseg"
281 // bytes_written += data.PutHex16 (0); // uint16_t pad3;
282 // bytes_written += WriteRegister (reg_ctx, "mxcsr", NULL, 4,
283 // data); // uint32_t mxcsr;
284 // bytes_written += WriteRegister (reg_ctx, "mxcsrmask", NULL,
285 // 4, data);// uint32_t mxcsrmask;
286 // bytes_written += WriteRegister (reg_ctx, "stmm0", NULL,
287 // sizeof(MMSReg), data);
288 // bytes_written += WriteRegister (reg_ctx, "stmm1", NULL,
289 // sizeof(MMSReg), data);
290 // bytes_written += WriteRegister (reg_ctx, "stmm2", NULL,
291 // sizeof(MMSReg), data);
292 // bytes_written += WriteRegister (reg_ctx, "stmm3", NULL,
293 // sizeof(MMSReg), data);
294 // bytes_written += WriteRegister (reg_ctx, "stmm4", NULL,
295 // sizeof(MMSReg), data);
296 // bytes_written += WriteRegister (reg_ctx, "stmm5", NULL,
297 // sizeof(MMSReg), data);
298 // bytes_written += WriteRegister (reg_ctx, "stmm6", NULL,
299 // sizeof(MMSReg), data);
300 // bytes_written += WriteRegister (reg_ctx, "stmm7", NULL,
301 // sizeof(MMSReg), data);
302 // bytes_written += WriteRegister (reg_ctx, "xmm0" , NULL,
303 // sizeof(XMMReg), data);
304 // bytes_written += WriteRegister (reg_ctx, "xmm1" , NULL,
305 // sizeof(XMMReg), data);
306 // bytes_written += WriteRegister (reg_ctx, "xmm2" , NULL,
307 // sizeof(XMMReg), data);
308 // bytes_written += WriteRegister (reg_ctx, "xmm3" , NULL,
309 // sizeof(XMMReg), data);
310 // bytes_written += WriteRegister (reg_ctx, "xmm4" , NULL,
311 // sizeof(XMMReg), data);
312 // bytes_written += WriteRegister (reg_ctx, "xmm5" , NULL,
313 // sizeof(XMMReg), data);
314 // bytes_written += WriteRegister (reg_ctx, "xmm6" , NULL,
315 // sizeof(XMMReg), data);
316 // bytes_written += WriteRegister (reg_ctx, "xmm7" , NULL,
317 // sizeof(XMMReg), data);
318 // bytes_written += WriteRegister (reg_ctx, "xmm8" , NULL,
319 // sizeof(XMMReg), data);
320 // bytes_written += WriteRegister (reg_ctx, "xmm9" , NULL,
321 // sizeof(XMMReg), data);
322 // bytes_written += WriteRegister (reg_ctx, "xmm10", NULL,
323 // sizeof(XMMReg), data);
324 // bytes_written += WriteRegister (reg_ctx, "xmm11", NULL,
325 // sizeof(XMMReg), data);
326 // bytes_written += WriteRegister (reg_ctx, "xmm12", NULL,
327 // sizeof(XMMReg), data);
328 // bytes_written += WriteRegister (reg_ctx, "xmm13", NULL,
329 // sizeof(XMMReg), data);
330 // bytes_written += WriteRegister (reg_ctx, "xmm14", NULL,
331 // sizeof(XMMReg), data);
332 // bytes_written += WriteRegister (reg_ctx, "xmm15", NULL,
333 // sizeof(XMMReg), data);
334 //
335 // // Fill rest with zeros
336 // for (size_t i=0, n = fpu_byte_size - bytes_written; i<n; ++
337 // i)
338 // data.PutChar(0);
339
340 // Write out the EXC registers
341 data.PutHex32(EXCRegSet);
343 PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data);
344 PrintRegisterValue(reg_ctx, "err", nullptr, 4, data);
345 PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 8, data);
346 return true;
347 }
348 return false;
349 }
350
351protected:
352 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; }
353
354 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; }
355
356 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; }
357
358 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
359 return 0;
360 }
361
362 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
363 return 0;
364 }
365
366 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
367 return 0;
368 }
369};
370
372public:
374 const DataExtractor &data)
375 : RegisterContextDarwin_i386(thread, 0) {
377 }
378
379 void InvalidateAllRegisters() override {
380 // Do nothing... registers are always valid...
381 }
382
384 lldb::offset_t offset = 0;
385 SetError(GPRRegSet, Read, -1);
386 SetError(FPURegSet, Read, -1);
387 SetError(EXCRegSet, Read, -1);
388 bool done = false;
389
390 while (!done) {
391 int flavor = data.GetU32(&offset);
392 if (flavor == 0)
393 done = true;
394 else {
395 uint32_t i;
396 uint32_t count = data.GetU32(&offset);
397 switch (flavor) {
398 case GPRRegSet:
399 for (i = 0; i < count; ++i)
400 (&gpr.eax)[i] = data.GetU32(&offset);
402 done = true;
403
404 break;
405 case FPURegSet:
406 // TODO: fill in FPU regs....
407 // SetError (FPURegSet, Read, -1);
408 done = true;
409
410 break;
411 case EXCRegSet:
412 exc.trapno = data.GetU32(&offset);
413 exc.err = data.GetU32(&offset);
414 exc.faultvaddr = data.GetU32(&offset);
416 done = true;
417 break;
418 case 7:
419 case 8:
420 case 9:
421 // fancy flavors that encapsulate of the above flavors...
422 break;
423
424 default:
425 done = true;
426 break;
427 }
428 }
429 }
430 }
431
432 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
433 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
434 if (reg_ctx_sp) {
435 RegisterContext *reg_ctx = reg_ctx_sp.get();
436
437 data.PutHex32(GPRRegSet); // Flavor
439 PrintRegisterValue(reg_ctx, "eax", nullptr, 4, data);
440 PrintRegisterValue(reg_ctx, "ebx", nullptr, 4, data);
441 PrintRegisterValue(reg_ctx, "ecx", nullptr, 4, data);
442 PrintRegisterValue(reg_ctx, "edx", nullptr, 4, data);
443 PrintRegisterValue(reg_ctx, "edi", nullptr, 4, data);
444 PrintRegisterValue(reg_ctx, "esi", nullptr, 4, data);
445 PrintRegisterValue(reg_ctx, "ebp", nullptr, 4, data);
446 PrintRegisterValue(reg_ctx, "esp", nullptr, 4, data);
447 PrintRegisterValue(reg_ctx, "ss", nullptr, 4, data);
448 PrintRegisterValue(reg_ctx, "eflags", nullptr, 4, data);
449 PrintRegisterValue(reg_ctx, "eip", nullptr, 4, data);
450 PrintRegisterValue(reg_ctx, "cs", nullptr, 4, data);
451 PrintRegisterValue(reg_ctx, "ds", nullptr, 4, data);
452 PrintRegisterValue(reg_ctx, "es", nullptr, 4, data);
453 PrintRegisterValue(reg_ctx, "fs", nullptr, 4, data);
454 PrintRegisterValue(reg_ctx, "gs", nullptr, 4, data);
455
456 // Write out the EXC registers
457 data.PutHex32(EXCRegSet);
459 PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data);
460 PrintRegisterValue(reg_ctx, "err", nullptr, 4, data);
461 PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 4, data);
462 return true;
463 }
464 return false;
465 }
466
467protected:
468 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; }
469
470 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; }
471
472 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; }
473
474 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
475 return 0;
476 }
477
478 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
479 return 0;
480 }
481
482 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
483 return 0;
484 }
485};
486
488public:
490 const DataExtractor &data)
491 : RegisterContextDarwin_arm(thread, 0) {
493 }
494
495 void InvalidateAllRegisters() override {
496 // Do nothing... registers are always valid...
497 }
498
500 lldb::offset_t offset = 0;
501 SetError(GPRRegSet, Read, -1);
502 SetError(FPURegSet, Read, -1);
503 SetError(EXCRegSet, Read, -1);
504 bool done = false;
505
506 while (!done) {
507 int flavor = data.GetU32(&offset);
508 uint32_t count = data.GetU32(&offset);
509 lldb::offset_t next_thread_state = offset + (count * 4);
510 switch (flavor) {
511 case GPRAltRegSet:
512 case GPRRegSet: {
513 // r0-r15, plus CPSR
514 uint32_t gpr_buf_count = (sizeof(gpr.r) / sizeof(gpr.r[0])) + 1;
515 if (count == gpr_buf_count) {
516 for (uint32_t i = 0; i < (count - 1); ++i) {
517 gpr.r[i] = data.GetU32(&offset);
518 }
519 gpr.cpsr = data.GetU32(&offset);
520
522 }
523 }
524 offset = next_thread_state;
525 break;
526
527 case FPURegSet: {
528 uint8_t *fpu_reg_buf = (uint8_t *)&fpu.floats;
529 const int fpu_reg_buf_size = sizeof(fpu.floats);
530 if (data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle,
531 fpu_reg_buf) == fpu_reg_buf_size) {
532 offset += fpu_reg_buf_size;
533 fpu.fpscr = data.GetU32(&offset);
535 } else {
536 done = true;
537 }
538 }
539 offset = next_thread_state;
540 break;
541
542 case EXCRegSet:
543 if (count == 3) {
544 exc.exception = data.GetU32(&offset);
545 exc.fsr = data.GetU32(&offset);
546 exc.far = data.GetU32(&offset);
548 }
549 done = true;
550 offset = next_thread_state;
551 break;
552
553 // Unknown register set flavor, stop trying to parse.
554 default:
555 done = true;
556 }
557 }
558 }
559
560 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
561 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
562 if (reg_ctx_sp) {
563 RegisterContext *reg_ctx = reg_ctx_sp.get();
564
565 data.PutHex32(GPRRegSet); // Flavor
567 PrintRegisterValue(reg_ctx, "r0", nullptr, 4, data);
568 PrintRegisterValue(reg_ctx, "r1", nullptr, 4, data);
569 PrintRegisterValue(reg_ctx, "r2", nullptr, 4, data);
570 PrintRegisterValue(reg_ctx, "r3", nullptr, 4, data);
571 PrintRegisterValue(reg_ctx, "r4", nullptr, 4, data);
572 PrintRegisterValue(reg_ctx, "r5", nullptr, 4, data);
573 PrintRegisterValue(reg_ctx, "r6", nullptr, 4, data);
574 PrintRegisterValue(reg_ctx, "r7", nullptr, 4, data);
575 PrintRegisterValue(reg_ctx, "r8", nullptr, 4, data);
576 PrintRegisterValue(reg_ctx, "r9", nullptr, 4, data);
577 PrintRegisterValue(reg_ctx, "r10", nullptr, 4, data);
578 PrintRegisterValue(reg_ctx, "r11", nullptr, 4, data);
579 PrintRegisterValue(reg_ctx, "r12", nullptr, 4, data);
580 PrintRegisterValue(reg_ctx, "sp", nullptr, 4, data);
581 PrintRegisterValue(reg_ctx, "lr", nullptr, 4, data);
582 PrintRegisterValue(reg_ctx, "pc", nullptr, 4, data);
583 PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data);
584
585 // Write out the EXC registers
586 // data.PutHex32 (EXCRegSet);
587 // data.PutHex32 (EXCWordCount);
588 // WriteRegister (reg_ctx, "exception", NULL, 4, data);
589 // WriteRegister (reg_ctx, "fsr", NULL, 4, data);
590 // WriteRegister (reg_ctx, "far", NULL, 4, data);
591 return true;
592 }
593 return false;
594 }
595
596protected:
597 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
598
599 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
600
601 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
602
603 int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
604
605 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
606 return 0;
607 }
608
609 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
610 return 0;
611 }
612
613 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
614 return 0;
615 }
616
617 int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
618 return -1;
619 }
620};
621
623public:
625 const DataExtractor &data)
626 : RegisterContextDarwin_arm64(thread, 0) {
628 }
629
630 void InvalidateAllRegisters() override {
631 // Do nothing... registers are always valid...
632 }
633
635 lldb::offset_t offset = 0;
636 SetError(GPRRegSet, Read, -1);
637 SetError(FPURegSet, Read, -1);
638 SetError(EXCRegSet, Read, -1);
639 bool done = false;
640 while (!done) {
641 int flavor = data.GetU32(&offset);
642 uint32_t count = data.GetU32(&offset);
643 lldb::offset_t next_thread_state = offset + (count * 4);
644 switch (flavor) {
645 case GPRRegSet:
646 // x0-x29 + fp + lr + sp + pc (== 33 64-bit registers) plus cpsr (1
647 // 32-bit register)
648 if (count >= (33 * 2) + 1) {
649 for (uint32_t i = 0; i < 29; ++i)
650 gpr.x[i] = data.GetU64(&offset);
651 gpr.fp = data.GetU64(&offset);
652 gpr.lr = data.GetU64(&offset);
653 gpr.sp = data.GetU64(&offset);
654 gpr.pc = data.GetU64(&offset);
655 gpr.cpsr = data.GetU32(&offset);
657 }
658 offset = next_thread_state;
659 break;
660 case FPURegSet: {
661 uint8_t *fpu_reg_buf = (uint8_t *)&fpu.v[0];
662 const int fpu_reg_buf_size = sizeof(fpu);
663 if (fpu_reg_buf_size == count * sizeof(uint32_t) &&
664 data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle,
665 fpu_reg_buf) == fpu_reg_buf_size) {
667 } else {
668 done = true;
669 }
670 }
671 offset = next_thread_state;
672 break;
673 case EXCRegSet:
674 if (count == 4) {
675 exc.far = data.GetU64(&offset);
676 exc.esr = data.GetU32(&offset);
677 exc.exception = data.GetU32(&offset);
679 }
680 offset = next_thread_state;
681 break;
682 default:
683 done = true;
684 break;
685 }
686 }
687 }
688
689 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
690 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
691 if (reg_ctx_sp) {
692 RegisterContext *reg_ctx = reg_ctx_sp.get();
693
694 data.PutHex32(GPRRegSet); // Flavor
696 PrintRegisterValue(reg_ctx, "x0", nullptr, 8, data);
697 PrintRegisterValue(reg_ctx, "x1", nullptr, 8, data);
698 PrintRegisterValue(reg_ctx, "x2", nullptr, 8, data);
699 PrintRegisterValue(reg_ctx, "x3", nullptr, 8, data);
700 PrintRegisterValue(reg_ctx, "x4", nullptr, 8, data);
701 PrintRegisterValue(reg_ctx, "x5", nullptr, 8, data);
702 PrintRegisterValue(reg_ctx, "x6", nullptr, 8, data);
703 PrintRegisterValue(reg_ctx, "x7", nullptr, 8, data);
704 PrintRegisterValue(reg_ctx, "x8", nullptr, 8, data);
705 PrintRegisterValue(reg_ctx, "x9", nullptr, 8, data);
706 PrintRegisterValue(reg_ctx, "x10", nullptr, 8, data);
707 PrintRegisterValue(reg_ctx, "x11", nullptr, 8, data);
708 PrintRegisterValue(reg_ctx, "x12", nullptr, 8, data);
709 PrintRegisterValue(reg_ctx, "x13", nullptr, 8, data);
710 PrintRegisterValue(reg_ctx, "x14", nullptr, 8, data);
711 PrintRegisterValue(reg_ctx, "x15", nullptr, 8, data);
712 PrintRegisterValue(reg_ctx, "x16", nullptr, 8, data);
713 PrintRegisterValue(reg_ctx, "x17", nullptr, 8, data);
714 PrintRegisterValue(reg_ctx, "x18", nullptr, 8, data);
715 PrintRegisterValue(reg_ctx, "x19", nullptr, 8, data);
716 PrintRegisterValue(reg_ctx, "x20", nullptr, 8, data);
717 PrintRegisterValue(reg_ctx, "x21", nullptr, 8, data);
718 PrintRegisterValue(reg_ctx, "x22", nullptr, 8, data);
719 PrintRegisterValue(reg_ctx, "x23", nullptr, 8, data);
720 PrintRegisterValue(reg_ctx, "x24", nullptr, 8, data);
721 PrintRegisterValue(reg_ctx, "x25", nullptr, 8, data);
722 PrintRegisterValue(reg_ctx, "x26", nullptr, 8, data);
723 PrintRegisterValue(reg_ctx, "x27", nullptr, 8, data);
724 PrintRegisterValue(reg_ctx, "x28", nullptr, 8, data);
725 PrintRegisterValue(reg_ctx, "fp", nullptr, 8, data);
726 PrintRegisterValue(reg_ctx, "lr", nullptr, 8, data);
727 PrintRegisterValue(reg_ctx, "sp", nullptr, 8, data);
728 PrintRegisterValue(reg_ctx, "pc", nullptr, 8, data);
729 PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data);
730 data.PutHex32(0); // uint32_t pad at the end
731
732 // Write out the EXC registers
733 data.PutHex32(EXCRegSet);
735 PrintRegisterValue(reg_ctx, "far", nullptr, 8, data);
736 PrintRegisterValue(reg_ctx, "esr", nullptr, 4, data);
737 PrintRegisterValue(reg_ctx, "exception", nullptr, 4, data);
738 return true;
739 }
740 return false;
741 }
742
743protected:
744 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
745
746 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
747
748 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
749
750 int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
751
752 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
753 return 0;
754 }
755
756 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
757 return 0;
758 }
759
760 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
761 return 0;
762 }
763
764 int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
765 return -1;
766 }
767};
768
769static uint32_t MachHeaderSizeFromMagic(uint32_t magic) {
770 switch (magic) {
771 case MH_MAGIC:
772 case MH_CIGAM:
773 return sizeof(struct llvm::MachO::mach_header);
774
775 case MH_MAGIC_64:
776 case MH_CIGAM_64:
777 return sizeof(struct llvm::MachO::mach_header_64);
778 break;
779
780 default:
781 break;
782 }
783 return 0;
784}
785
786#define MACHO_NLIST_ARM_SYMBOL_IS_THUMB 0x0008
787
789
794}
795
798}
799
801 DataBufferSP data_sp,
802 lldb::offset_t data_offset,
803 const FileSpec *file,
804 lldb::offset_t file_offset,
805 lldb::offset_t length) {
806 if (!data_sp) {
807 data_sp = MapFileData(*file, length, file_offset);
808 if (!data_sp)
809 return nullptr;
810 data_offset = 0;
811 }
812
813 if (!ObjectFileMachO::MagicBytesMatch(data_sp, data_offset, length))
814 return nullptr;
815
816 // Update the data to contain the entire file if it doesn't already
817 if (data_sp->GetByteSize() < length) {
818 data_sp = MapFileData(*file, length, file_offset);
819 if (!data_sp)
820 return nullptr;
821 data_offset = 0;
822 }
823 auto objfile_up = std::make_unique<ObjectFileMachO>(
824 module_sp, data_sp, data_offset, file, file_offset, length);
825 if (!objfile_up || !objfile_up->ParseHeader())
826 return nullptr;
827
828 return objfile_up.release();
829}
830
832 const lldb::ModuleSP &module_sp, WritableDataBufferSP data_sp,
833 const ProcessSP &process_sp, lldb::addr_t header_addr) {
834 if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) {
835 std::unique_ptr<ObjectFile> objfile_up(
836 new ObjectFileMachO(module_sp, data_sp, process_sp, header_addr));
837 if (objfile_up.get() && objfile_up->ParseHeader())
838 return objfile_up.release();
839 }
840 return nullptr;
841}
842
844 const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp,
845 lldb::offset_t data_offset, lldb::offset_t file_offset,
847 const size_t initial_count = specs.GetSize();
848
849 if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) {
850 DataExtractor data;
851 data.SetData(data_sp);
852 llvm::MachO::mach_header header;
853 if (ParseHeader(data, &data_offset, header)) {
854 size_t header_and_load_cmds =
855 header.sizeofcmds + MachHeaderSizeFromMagic(header.magic);
856 if (header_and_load_cmds >= data_sp->GetByteSize()) {
857 data_sp = MapFileData(file, header_and_load_cmds, file_offset);
858 data.SetData(data_sp);
859 data_offset = MachHeaderSizeFromMagic(header.magic);
860 }
861 if (data_sp) {
862 ModuleSpec base_spec;
863 base_spec.GetFileSpec() = file;
864 base_spec.SetObjectOffset(file_offset);
865 base_spec.SetObjectSize(length);
866 GetAllArchSpecs(header, data, data_offset, base_spec, specs);
867 }
868 }
869 }
870 return specs.GetSize() - initial_count;
871}
872
874 static ConstString g_segment_name_TEXT("__TEXT");
875 return g_segment_name_TEXT;
876}
877
879 static ConstString g_segment_name_DATA("__DATA");
880 return g_segment_name_DATA;
881}
882
884 static ConstString g_segment_name("__DATA_DIRTY");
885 return g_segment_name;
886}
887
889 static ConstString g_segment_name("__DATA_CONST");
890 return g_segment_name;
891}
892
894 static ConstString g_segment_name_OBJC("__OBJC");
895 return g_segment_name_OBJC;
896}
897
899 static ConstString g_section_name_LINKEDIT("__LINKEDIT");
900 return g_section_name_LINKEDIT;
901}
902
904 static ConstString g_section_name("__DWARF");
905 return g_section_name;
906}
907
909 static ConstString g_section_name_eh_frame("__eh_frame");
910 return g_section_name_eh_frame;
911}
912
914 lldb::addr_t data_offset,
915 lldb::addr_t data_length) {
916 DataExtractor data;
917 data.SetData(data_sp, data_offset, data_length);
918 lldb::offset_t offset = 0;
919 uint32_t magic = data.GetU32(&offset);
920
921 offset += 4; // cputype
922 offset += 4; // cpusubtype
923 uint32_t filetype = data.GetU32(&offset);
924
925 // A fileset has a Mach-O header but is not an
926 // individual file and must be handled via an
927 // ObjectContainer plugin.
928 if (filetype == llvm::MachO::MH_FILESET)
929 return false;
930
931 return MachHeaderSizeFromMagic(magic) != 0;
932}
933
935 DataBufferSP data_sp,
936 lldb::offset_t data_offset,
937 const FileSpec *file,
938 lldb::offset_t file_offset,
939 lldb::offset_t length)
940 : ObjectFile(module_sp, file, file_offset, length, data_sp, data_offset),
941 m_mach_sections(), m_entry_point_address(), m_thread_context_offsets(),
942 m_thread_context_offsets_valid(false), m_reexported_dylibs(),
943 m_allow_assembly_emulation_unwind_plans(true) {
944 ::memset(&m_header, 0, sizeof(m_header));
945 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
946}
947
949 lldb::WritableDataBufferSP header_data_sp,
950 const lldb::ProcessSP &process_sp,
951 lldb::addr_t header_addr)
952 : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
953 m_mach_sections(), m_entry_point_address(), m_thread_context_offsets(),
954 m_thread_context_offsets_valid(false), m_reexported_dylibs(),
955 m_allow_assembly_emulation_unwind_plans(true) {
956 ::memset(&m_header, 0, sizeof(m_header));
957 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
958}
959
961 lldb::offset_t *data_offset_ptr,
962 llvm::MachO::mach_header &header) {
964 // Leave magic in the original byte order
965 header.magic = data.GetU32(data_offset_ptr);
966 bool can_parse = false;
967 bool is_64_bit = false;
968 switch (header.magic) {
969 case MH_MAGIC:
971 data.SetAddressByteSize(4);
972 can_parse = true;
973 break;
974
975 case MH_MAGIC_64:
977 data.SetAddressByteSize(8);
978 can_parse = true;
979 is_64_bit = true;
980 break;
981
982 case MH_CIGAM:
985 : eByteOrderBig);
986 data.SetAddressByteSize(4);
987 can_parse = true;
988 break;
989
990 case MH_CIGAM_64:
993 : eByteOrderBig);
994 data.SetAddressByteSize(8);
995 is_64_bit = true;
996 can_parse = true;
997 break;
998
999 default:
1000 break;
1001 }
1002
1003 if (can_parse) {
1004 data.GetU32(data_offset_ptr, &header.cputype, 6);
1005 if (is_64_bit)
1006 *data_offset_ptr += 4;
1007 return true;
1008 } else {
1009 memset(&header, 0, sizeof(header));
1010 }
1011 return false;
1012}
1013
1015 ModuleSP module_sp(GetModule());
1016 if (!module_sp)
1017 return false;
1018
1019 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
1020 bool can_parse = false;
1021 lldb::offset_t offset = 0;
1023 // Leave magic in the original byte order
1024 m_header.magic = m_data.GetU32(&offset);
1025 switch (m_header.magic) {
1026 case MH_MAGIC:
1029 can_parse = true;
1030 break;
1031
1032 case MH_MAGIC_64:
1035 can_parse = true;
1036 break;
1037
1038 case MH_CIGAM:
1041 : eByteOrderBig);
1043 can_parse = true;
1044 break;
1045
1046 case MH_CIGAM_64:
1049 : eByteOrderBig);
1051 can_parse = true;
1052 break;
1053
1054 default:
1055 break;
1056 }
1057
1058 if (can_parse) {
1059 m_data.GetU32(&offset, &m_header.cputype, 6);
1060
1061 ModuleSpecList all_specs;
1062 ModuleSpec base_spec;
1064 base_spec, all_specs);
1065
1066 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
1067 ArchSpec mach_arch =
1069
1070 // Check if the module has a required architecture
1071 const ArchSpec &module_arch = module_sp->GetArchitecture();
1072 if (module_arch.IsValid() && !module_arch.IsCompatibleMatch(mach_arch))
1073 continue;
1074
1075 if (SetModulesArchitecture(mach_arch)) {
1076 const size_t header_and_lc_size =
1077 m_header.sizeofcmds + MachHeaderSizeFromMagic(m_header.magic);
1078 if (m_data.GetByteSize() < header_and_lc_size) {
1079 DataBufferSP data_sp;
1080 ProcessSP process_sp(m_process_wp.lock());
1081 if (process_sp) {
1082 data_sp = ReadMemory(process_sp, m_memory_addr, header_and_lc_size);
1083 } else {
1084 // Read in all only the load command data from the file on disk
1085 data_sp = MapFileData(m_file, header_and_lc_size, m_file_offset);
1086 if (data_sp->GetByteSize() != header_and_lc_size)
1087 continue;
1088 }
1089 if (data_sp)
1090 m_data.SetData(data_sp);
1091 }
1092 }
1093 return true;
1094 }
1095 // None found.
1096 return false;
1097 } else {
1098 memset(&m_header, 0, sizeof(struct llvm::MachO::mach_header));
1099 }
1100 return false;
1101}
1102
1104 return m_data.GetByteOrder();
1105}
1106
1108 return m_header.filetype == MH_EXECUTE;
1109}
1110
1112 return m_header.filetype == MH_DYLINKER;
1113}
1114
1116 return m_header.flags & MH_DYLIB_IN_CACHE;
1117}
1118
1120 return m_header.filetype == MH_KEXT_BUNDLE;
1121}
1122
1124 return m_data.GetAddressByteSize();
1125}
1126
1128 Symtab *symtab = GetSymtab();
1129 if (!symtab)
1130 return AddressClass::eUnknown;
1131
1132 Symbol *symbol = symtab->FindSymbolContainingFileAddress(file_addr);
1133 if (symbol) {
1134 if (symbol->ValueIsAddress()) {
1135 SectionSP section_sp(symbol->GetAddressRef().GetSection());
1136 if (section_sp) {
1137 const lldb::SectionType section_type = section_sp->GetType();
1138 switch (section_type) {
1140 return AddressClass::eUnknown;
1141
1142 case eSectionTypeCode:
1143 if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1144 // For ARM we have a bit in the n_desc field of the symbol that
1145 // tells us ARM/Thumb which is bit 0x0008.
1147 return AddressClass::eCodeAlternateISA;
1148 }
1149 return AddressClass::eCode;
1150
1152 return AddressClass::eUnknown;
1153
1154 case eSectionTypeData:
1158 case eSectionTypeData4:
1159 case eSectionTypeData8:
1160 case eSectionTypeData16:
1166 return AddressClass::eData;
1167
1168 case eSectionTypeDebug:
1203 case eSectionTypeCTF:
1205 return AddressClass::eDebug;
1206
1211 return AddressClass::eRuntime;
1212
1218 case eSectionTypeOther:
1219 return AddressClass::eUnknown;
1220 }
1221 }
1222 }
1223
1224 const SymbolType symbol_type = symbol->GetType();
1225 switch (symbol_type) {
1226 case eSymbolTypeAny:
1227 return AddressClass::eUnknown;
1229 return AddressClass::eUnknown;
1230
1231 case eSymbolTypeCode:
1234 if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1235 // For ARM we have a bit in the n_desc field of the symbol that tells
1236 // us ARM/Thumb which is bit 0x0008.
1238 return AddressClass::eCodeAlternateISA;
1239 }
1240 return AddressClass::eCode;
1241
1242 case eSymbolTypeData:
1243 return AddressClass::eData;
1244 case eSymbolTypeRuntime:
1245 return AddressClass::eRuntime;
1247 return AddressClass::eRuntime;
1249 return AddressClass::eDebug;
1251 return AddressClass::eDebug;
1253 return AddressClass::eDebug;
1255 return AddressClass::eDebug;
1256 case eSymbolTypeBlock:
1257 return AddressClass::eDebug;
1258 case eSymbolTypeLocal:
1259 return AddressClass::eData;
1260 case eSymbolTypeParam:
1261 return AddressClass::eData;
1263 return AddressClass::eData;
1265 return AddressClass::eDebug;
1267 return AddressClass::eDebug;
1269 return AddressClass::eDebug;
1271 return AddressClass::eDebug;
1273 return AddressClass::eDebug;
1275 return AddressClass::eUnknown;
1277 return AddressClass::eDebug;
1279 return AddressClass::eDebug;
1281 return AddressClass::eUnknown;
1283 return AddressClass::eRuntime;
1285 return AddressClass::eRuntime;
1287 return AddressClass::eRuntime;
1289 return AddressClass::eRuntime;
1290 }
1291 }
1292 return AddressClass::eUnknown;
1293}
1294
1296 if (m_dysymtab.cmd == 0) {
1297 ModuleSP module_sp(GetModule());
1298 if (module_sp) {
1300 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1301 const lldb::offset_t load_cmd_offset = offset;
1302
1303 llvm::MachO::load_command lc = {};
1304 if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
1305 break;
1306 if (lc.cmd == LC_DYSYMTAB) {
1307 m_dysymtab.cmd = lc.cmd;
1308 m_dysymtab.cmdsize = lc.cmdsize;
1309 if (m_data.GetU32(&offset, &m_dysymtab.ilocalsym,
1310 (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2) ==
1311 nullptr) {
1312 // Clear m_dysymtab if we were unable to read all items from the
1313 // load command
1314 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
1315 }
1316 }
1317 offset = load_cmd_offset + lc.cmdsize;
1318 }
1319 }
1320 }
1321 if (m_dysymtab.cmd)
1322 return m_dysymtab.nlocalsym <= 1;
1323 return false;
1324}
1325
1327 EncryptedFileRanges result;
1329
1330 llvm::MachO::encryption_info_command encryption_cmd;
1331 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1332 const lldb::offset_t load_cmd_offset = offset;
1333 if (m_data.GetU32(&offset, &encryption_cmd, 2) == nullptr)
1334 break;
1335
1336 // LC_ENCRYPTION_INFO and LC_ENCRYPTION_INFO_64 have the same sizes for the
1337 // 3 fields we care about, so treat them the same.
1338 if (encryption_cmd.cmd == LC_ENCRYPTION_INFO ||
1339 encryption_cmd.cmd == LC_ENCRYPTION_INFO_64) {
1340 if (m_data.GetU32(&offset, &encryption_cmd.cryptoff, 3)) {
1341 if (encryption_cmd.cryptid != 0) {
1343 entry.SetRangeBase(encryption_cmd.cryptoff);
1344 entry.SetByteSize(encryption_cmd.cryptsize);
1345 result.Append(entry);
1346 }
1347 }
1348 }
1349 offset = load_cmd_offset + encryption_cmd.cmdsize;
1350 }
1351
1352 return result;
1353}
1354
1356 llvm::MachO::segment_command_64 &seg_cmd, uint32_t cmd_idx) {
1357 if (m_length == 0 || seg_cmd.filesize == 0)
1358 return;
1359
1360 if (IsSharedCacheBinary() && !IsInMemory()) {
1361 // In shared cache images, the load commands are relative to the
1362 // shared cache file, and not the specific image we are
1363 // examining. Let's fix this up so that it looks like a normal
1364 // image.
1365 if (strncmp(seg_cmd.segname, GetSegmentNameTEXT().GetCString(),
1366 sizeof(seg_cmd.segname)) == 0)
1367 m_text_address = seg_cmd.vmaddr;
1368 if (strncmp(seg_cmd.segname, GetSegmentNameLINKEDIT().GetCString(),
1369 sizeof(seg_cmd.segname)) == 0)
1370 m_linkedit_original_offset = seg_cmd.fileoff;
1371
1372 seg_cmd.fileoff = seg_cmd.vmaddr - m_text_address;
1373 }
1374
1375 if (seg_cmd.fileoff > m_length) {
1376 // We have a load command that says it extends past the end of the file.
1377 // This is likely a corrupt file. We don't have any way to return an error
1378 // condition here (this method was likely invoked from something like
1379 // ObjectFile::GetSectionList()), so we just null out the section contents,
1380 // and dump a message to stdout. The most common case here is core file
1381 // debugging with a truncated file.
1382 const char *lc_segment_name =
1383 seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1384 GetModule()->ReportWarning(
1385 "load command {0} {1} has a fileoff ({2:x16}) that extends beyond "
1386 "the end of the file ({3:x16}), ignoring this section",
1387 cmd_idx, lc_segment_name, seg_cmd.fileoff, m_length);
1388
1389 seg_cmd.fileoff = 0;
1390 seg_cmd.filesize = 0;
1391 }
1392
1393 if (seg_cmd.fileoff + seg_cmd.filesize > m_length) {
1394 // We have a load command that says it extends past the end of the file.
1395 // This is likely a corrupt file. We don't have any way to return an error
1396 // condition here (this method was likely invoked from something like
1397 // ObjectFile::GetSectionList()), so we just null out the section contents,
1398 // and dump a message to stdout. The most common case here is core file
1399 // debugging with a truncated file.
1400 const char *lc_segment_name =
1401 seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1402 GetModule()->ReportWarning(
1403 "load command {0} {1} has a fileoff + filesize ({2:x16}) that "
1404 "extends beyond the end of the file ({4:x16}), the segment will be "
1405 "truncated to match",
1406 cmd_idx, lc_segment_name, seg_cmd.fileoff + seg_cmd.filesize, m_length);
1407
1408 // Truncate the length
1409 seg_cmd.filesize = m_length - seg_cmd.fileoff;
1410 }
1411}
1412
1413static uint32_t
1414GetSegmentPermissions(const llvm::MachO::segment_command_64 &seg_cmd) {
1415 uint32_t result = 0;
1416 if (seg_cmd.initprot & VM_PROT_READ)
1417 result |= ePermissionsReadable;
1418 if (seg_cmd.initprot & VM_PROT_WRITE)
1419 result |= ePermissionsWritable;
1420 if (seg_cmd.initprot & VM_PROT_EXECUTE)
1421 result |= ePermissionsExecutable;
1422 return result;
1423}
1424
1425static lldb::SectionType GetSectionType(uint32_t flags,
1426 ConstString section_name) {
1427
1428 if (flags & (S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS))
1429 return eSectionTypeCode;
1430
1431 uint32_t mach_sect_type = flags & SECTION_TYPE;
1432 static ConstString g_sect_name_objc_data("__objc_data");
1433 static ConstString g_sect_name_objc_msgrefs("__objc_msgrefs");
1434 static ConstString g_sect_name_objc_selrefs("__objc_selrefs");
1435 static ConstString g_sect_name_objc_classrefs("__objc_classrefs");
1436 static ConstString g_sect_name_objc_superrefs("__objc_superrefs");
1437 static ConstString g_sect_name_objc_const("__objc_const");
1438 static ConstString g_sect_name_objc_classlist("__objc_classlist");
1439 static ConstString g_sect_name_cfstring("__cfstring");
1440
1441 static ConstString g_sect_name_dwarf_debug_abbrev("__debug_abbrev");
1442 static ConstString g_sect_name_dwarf_debug_abbrev_dwo("__debug_abbrev.dwo");
1443 static ConstString g_sect_name_dwarf_debug_addr("__debug_addr");
1444 static ConstString g_sect_name_dwarf_debug_aranges("__debug_aranges");
1445 static ConstString g_sect_name_dwarf_debug_cu_index("__debug_cu_index");
1446 static ConstString g_sect_name_dwarf_debug_frame("__debug_frame");
1447 static ConstString g_sect_name_dwarf_debug_info("__debug_info");
1448 static ConstString g_sect_name_dwarf_debug_info_dwo("__debug_info.dwo");
1449 static ConstString g_sect_name_dwarf_debug_line("__debug_line");
1450 static ConstString g_sect_name_dwarf_debug_line_dwo("__debug_line.dwo");
1451 static ConstString g_sect_name_dwarf_debug_line_str("__debug_line_str");
1452 static ConstString g_sect_name_dwarf_debug_loc("__debug_loc");
1453 static ConstString g_sect_name_dwarf_debug_loclists("__debug_loclists");
1454 static ConstString g_sect_name_dwarf_debug_loclists_dwo("__debug_loclists.dwo");
1455 static ConstString g_sect_name_dwarf_debug_macinfo("__debug_macinfo");
1456 static ConstString g_sect_name_dwarf_debug_macro("__debug_macro");
1457 static ConstString g_sect_name_dwarf_debug_macro_dwo("__debug_macro.dwo");
1458 static ConstString g_sect_name_dwarf_debug_names("__debug_names");
1459 static ConstString g_sect_name_dwarf_debug_pubnames("__debug_pubnames");
1460 static ConstString g_sect_name_dwarf_debug_pubtypes("__debug_pubtypes");
1461 static ConstString g_sect_name_dwarf_debug_ranges("__debug_ranges");
1462 static ConstString g_sect_name_dwarf_debug_rnglists("__debug_rnglists");
1463 static ConstString g_sect_name_dwarf_debug_str("__debug_str");
1464 static ConstString g_sect_name_dwarf_debug_str_dwo("__debug_str.dwo");
1465 static ConstString g_sect_name_dwarf_debug_str_offs("__debug_str_offs");
1466 static ConstString g_sect_name_dwarf_debug_str_offs_dwo("__debug_str_offs.dwo");
1467 static ConstString g_sect_name_dwarf_debug_tu_index("__debug_tu_index");
1468 static ConstString g_sect_name_dwarf_debug_types("__debug_types");
1469 static ConstString g_sect_name_dwarf_apple_names("__apple_names");
1470 static ConstString g_sect_name_dwarf_apple_types("__apple_types");
1471 static ConstString g_sect_name_dwarf_apple_namespaces("__apple_namespac");
1472 static ConstString g_sect_name_dwarf_apple_objc("__apple_objc");
1473 static ConstString g_sect_name_eh_frame("__eh_frame");
1474 static ConstString g_sect_name_compact_unwind("__unwind_info");
1475 static ConstString g_sect_name_text("__text");
1476 static ConstString g_sect_name_data("__data");
1477 static ConstString g_sect_name_go_symtab("__gosymtab");
1478 static ConstString g_sect_name_ctf("__ctf");
1479 static ConstString g_sect_name_swift_ast("__swift_ast");
1480
1481 if (section_name == g_sect_name_dwarf_debug_abbrev)
1483 if (section_name == g_sect_name_dwarf_debug_abbrev_dwo)
1485 if (section_name == g_sect_name_dwarf_debug_addr)
1487 if (section_name == g_sect_name_dwarf_debug_aranges)
1489 if (section_name == g_sect_name_dwarf_debug_cu_index)
1491 if (section_name == g_sect_name_dwarf_debug_frame)
1493 if (section_name == g_sect_name_dwarf_debug_info)
1495 if (section_name == g_sect_name_dwarf_debug_info_dwo)
1497 if (section_name == g_sect_name_dwarf_debug_line)
1499 if (section_name == g_sect_name_dwarf_debug_line_dwo)
1500 return eSectionTypeDWARFDebugLine; // Same as debug_line.
1501 if (section_name == g_sect_name_dwarf_debug_line_str)
1503 if (section_name == g_sect_name_dwarf_debug_loc)
1505 if (section_name == g_sect_name_dwarf_debug_loclists)
1507 if (section_name == g_sect_name_dwarf_debug_loclists_dwo)
1509 if (section_name == g_sect_name_dwarf_debug_macinfo)
1511 if (section_name == g_sect_name_dwarf_debug_macro)
1513 if (section_name == g_sect_name_dwarf_debug_macro_dwo)
1514 return eSectionTypeDWARFDebugMacInfo; // Same as debug_macro.
1515 if (section_name == g_sect_name_dwarf_debug_names)
1517 if (section_name == g_sect_name_dwarf_debug_pubnames)
1519 if (section_name == g_sect_name_dwarf_debug_pubtypes)
1521 if (section_name == g_sect_name_dwarf_debug_ranges)
1523 if (section_name == g_sect_name_dwarf_debug_rnglists)
1525 if (section_name == g_sect_name_dwarf_debug_str)
1527 if (section_name == g_sect_name_dwarf_debug_str_dwo)
1529 if (section_name == g_sect_name_dwarf_debug_str_offs)
1531 if (section_name == g_sect_name_dwarf_debug_str_offs_dwo)
1533 if (section_name == g_sect_name_dwarf_debug_tu_index)
1535 if (section_name == g_sect_name_dwarf_debug_types)
1537 if (section_name == g_sect_name_dwarf_apple_names)
1539 if (section_name == g_sect_name_dwarf_apple_types)
1541 if (section_name == g_sect_name_dwarf_apple_namespaces)
1543 if (section_name == g_sect_name_dwarf_apple_objc)
1545 if (section_name == g_sect_name_objc_selrefs)
1547 if (section_name == g_sect_name_objc_msgrefs)
1549 if (section_name == g_sect_name_eh_frame)
1550 return eSectionTypeEHFrame;
1551 if (section_name == g_sect_name_compact_unwind)
1553 if (section_name == g_sect_name_cfstring)
1555 if (section_name == g_sect_name_go_symtab)
1556 return eSectionTypeGoSymtab;
1557 if (section_name == g_sect_name_ctf)
1558 return eSectionTypeCTF;
1559 if (section_name == g_sect_name_swift_ast)
1561 if (section_name == g_sect_name_objc_data ||
1562 section_name == g_sect_name_objc_classrefs ||
1563 section_name == g_sect_name_objc_superrefs ||
1564 section_name == g_sect_name_objc_const ||
1565 section_name == g_sect_name_objc_classlist) {
1567 }
1568
1569 switch (mach_sect_type) {
1570 // TODO: categorize sections by other flags for regular sections
1571 case S_REGULAR:
1572 if (section_name == g_sect_name_text)
1573 return eSectionTypeCode;
1574 if (section_name == g_sect_name_data)
1575 return eSectionTypeData;
1576 return eSectionTypeOther;
1577 case S_ZEROFILL:
1578 return eSectionTypeZeroFill;
1579 case S_CSTRING_LITERALS: // section with only literal C strings
1581 case S_4BYTE_LITERALS: // section with only 4 byte literals
1582 return eSectionTypeData4;
1583 case S_8BYTE_LITERALS: // section with only 8 byte literals
1584 return eSectionTypeData8;
1585 case S_LITERAL_POINTERS: // section with only pointers to literals
1587 case S_NON_LAZY_SYMBOL_POINTERS: // section with only non-lazy symbol pointers
1589 case S_LAZY_SYMBOL_POINTERS: // section with only lazy symbol pointers
1591 case S_SYMBOL_STUBS: // section with only symbol stubs, byte size of stub in
1592 // the reserved2 field
1593 return eSectionTypeCode;
1594 case S_MOD_INIT_FUNC_POINTERS: // section with only function pointers for
1595 // initialization
1597 case S_MOD_TERM_FUNC_POINTERS: // section with only function pointers for
1598 // termination
1600 case S_COALESCED:
1601 return eSectionTypeOther;
1602 case S_GB_ZEROFILL:
1603 return eSectionTypeZeroFill;
1604 case S_INTERPOSING: // section with only pairs of function pointers for
1605 // interposing
1606 return eSectionTypeCode;
1607 case S_16BYTE_LITERALS: // section with only 16 byte literals
1608 return eSectionTypeData16;
1609 case S_DTRACE_DOF:
1610 return eSectionTypeDebug;
1611 case S_LAZY_DYLIB_SYMBOL_POINTERS:
1613 default:
1614 return eSectionTypeOther;
1615 }
1616}
1617
1621 uint32_t NextSegmentIdx = 0;
1622 uint32_t NextSectionIdx = 0;
1624
1628};
1629
1631 const llvm::MachO::load_command &load_cmd_, lldb::offset_t offset,
1632 uint32_t cmd_idx, SegmentParsingContext &context) {
1633 llvm::MachO::segment_command_64 load_cmd;
1634 memcpy(&load_cmd, &load_cmd_, sizeof(load_cmd_));
1635
1636 if (!m_data.GetU8(&offset, (uint8_t *)load_cmd.segname, 16))
1637 return;
1638
1639 ModuleSP module_sp = GetModule();
1640 const bool is_core = GetType() == eTypeCoreFile;
1641 const bool is_dsym = (m_header.filetype == MH_DSYM);
1642 bool add_section = true;
1643 bool add_to_unified = true;
1644 ConstString const_segname(
1645 load_cmd.segname, strnlen(load_cmd.segname, sizeof(load_cmd.segname)));
1646
1647 SectionSP unified_section_sp(
1648 context.UnifiedList.FindSectionByName(const_segname));
1649 if (is_dsym && unified_section_sp) {
1650 if (const_segname == GetSegmentNameLINKEDIT()) {
1651 // We need to keep the __LINKEDIT segment private to this object file
1652 // only
1653 add_to_unified = false;
1654 } else {
1655 // This is the dSYM file and this section has already been created by the
1656 // object file, no need to create it.
1657 add_section = false;
1658 }
1659 }
1660 load_cmd.vmaddr = m_data.GetAddress(&offset);
1661 load_cmd.vmsize = m_data.GetAddress(&offset);
1662 load_cmd.fileoff = m_data.GetAddress(&offset);
1663 load_cmd.filesize = m_data.GetAddress(&offset);
1664 if (!m_data.GetU32(&offset, &load_cmd.maxprot, 4))
1665 return;
1666
1667 SanitizeSegmentCommand(load_cmd, cmd_idx);
1668
1669 const uint32_t segment_permissions = GetSegmentPermissions(load_cmd);
1670 const bool segment_is_encrypted =
1671 (load_cmd.flags & SG_PROTECTED_VERSION_1) != 0;
1672
1673 // Use a segment ID of the segment index shifted left by 8 so they never
1674 // conflict with any of the sections.
1675 SectionSP segment_sp;
1676 if (add_section && (const_segname || is_core)) {
1677 segment_sp = std::make_shared<Section>(
1678 module_sp, // Module to which this section belongs
1679 this, // Object file to which this sections belongs
1680 ++context.NextSegmentIdx
1681 << 8, // Section ID is the 1 based segment index
1682 // shifted right by 8 bits as not to collide with any of the 256
1683 // section IDs that are possible
1684 const_segname, // Name of this section
1685 eSectionTypeContainer, // This section is a container of other
1686 // sections.
1687 load_cmd.vmaddr, // File VM address == addresses as they are
1688 // found in the object file
1689 load_cmd.vmsize, // VM size in bytes of this section
1690 load_cmd.fileoff, // Offset to the data for this section in
1691 // the file
1692 load_cmd.filesize, // Size in bytes of this section as found
1693 // in the file
1694 0, // Segments have no alignment information
1695 load_cmd.flags); // Flags for this section
1696
1697 segment_sp->SetIsEncrypted(segment_is_encrypted);
1698 m_sections_up->AddSection(segment_sp);
1699 segment_sp->SetPermissions(segment_permissions);
1700 if (add_to_unified)
1701 context.UnifiedList.AddSection(segment_sp);
1702 } else if (unified_section_sp) {
1703 // If this is a dSYM and the file addresses in the dSYM differ from the
1704 // file addresses in the ObjectFile, we must use the file base address for
1705 // the Section from the dSYM for the DWARF to resolve correctly.
1706 // This only happens with binaries in the shared cache in practice;
1707 // normally a mismatch like this would give a binary & dSYM that do not
1708 // match UUIDs. When a binary is included in the shared cache, its
1709 // segments are rearranged to optimize the shared cache, so its file
1710 // addresses will differ from what the ObjectFile had originally,
1711 // and what the dSYM has.
1712 if (is_dsym && unified_section_sp->GetFileAddress() != load_cmd.vmaddr) {
1713 Log *log = GetLog(LLDBLog::Symbols);
1714 if (log) {
1715 log->Printf(
1716 "Installing dSYM's %s segment file address over ObjectFile's "
1717 "so symbol table/debug info resolves correctly for %s",
1718 const_segname.AsCString(),
1719 module_sp->GetFileSpec().GetFilename().AsCString());
1720 }
1721
1722 // Make sure we've parsed the symbol table from the ObjectFile before
1723 // we go around changing its Sections.
1724 module_sp->GetObjectFile()->GetSymtab();
1725 // eh_frame would present the same problems but we parse that on a per-
1726 // function basis as-needed so it's more difficult to remove its use of
1727 // the Sections. Realistically, the environments where this code path
1728 // will be taken will not have eh_frame sections.
1729
1730 unified_section_sp->SetFileAddress(load_cmd.vmaddr);
1731
1732 // Notify the module that the section addresses have been changed once
1733 // we're done so any file-address caches can be updated.
1734 context.FileAddressesChanged = true;
1735 }
1736 m_sections_up->AddSection(unified_section_sp);
1737 }
1738
1739 llvm::MachO::section_64 sect64;
1740 ::memset(&sect64, 0, sizeof(sect64));
1741 // Push a section into our mach sections for the section at index zero
1742 // (NO_SECT) if we don't have any mach sections yet...
1743 if (m_mach_sections.empty())
1744 m_mach_sections.push_back(sect64);
1745 uint32_t segment_sect_idx;
1746 const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1;
1747
1748 const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8;
1749 for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects;
1750 ++segment_sect_idx) {
1751 if (m_data.GetU8(&offset, (uint8_t *)sect64.sectname,
1752 sizeof(sect64.sectname)) == nullptr)
1753 break;
1754 if (m_data.GetU8(&offset, (uint8_t *)sect64.segname,
1755 sizeof(sect64.segname)) == nullptr)
1756 break;
1757 sect64.addr = m_data.GetAddress(&offset);
1758 sect64.size = m_data.GetAddress(&offset);
1759
1760 if (m_data.GetU32(&offset, &sect64.offset, num_u32s) == nullptr)
1761 break;
1762
1763 if (IsSharedCacheBinary() && !IsInMemory()) {
1764 sect64.offset = sect64.addr - m_text_address;
1765 }
1766
1767 // Keep a list of mach sections around in case we need to get at data that
1768 // isn't stored in the abstracted Sections.
1769 m_mach_sections.push_back(sect64);
1770
1771 if (add_section) {
1772 ConstString section_name(
1773 sect64.sectname, strnlen(sect64.sectname, sizeof(sect64.sectname)));
1774 if (!const_segname) {
1775 // We have a segment with no name so we need to conjure up segments
1776 // that correspond to the section's segname if there isn't already such
1777 // a section. If there is such a section, we resize the section so that
1778 // it spans all sections. We also mark these sections as fake so
1779 // address matches don't hit if they land in the gaps between the child
1780 // sections.
1781 const_segname.SetTrimmedCStringWithLength(sect64.segname,
1782 sizeof(sect64.segname));
1783 segment_sp = context.UnifiedList.FindSectionByName(const_segname);
1784 if (segment_sp.get()) {
1785 Section *segment = segment_sp.get();
1786 // Grow the section size as needed.
1787 const lldb::addr_t sect64_min_addr = sect64.addr;
1788 const lldb::addr_t sect64_max_addr = sect64_min_addr + sect64.size;
1789 const lldb::addr_t curr_seg_byte_size = segment->GetByteSize();
1790 const lldb::addr_t curr_seg_min_addr = segment->GetFileAddress();
1791 const lldb::addr_t curr_seg_max_addr =
1792 curr_seg_min_addr + curr_seg_byte_size;
1793 if (sect64_min_addr >= curr_seg_min_addr) {
1794 const lldb::addr_t new_seg_byte_size =
1795 sect64_max_addr - curr_seg_min_addr;
1796 // Only grow the section size if needed
1797 if (new_seg_byte_size > curr_seg_byte_size)
1798 segment->SetByteSize(new_seg_byte_size);
1799 } else {
1800 // We need to change the base address of the segment and adjust the
1801 // child section offsets for all existing children.
1802 const lldb::addr_t slide_amount =
1803 sect64_min_addr - curr_seg_min_addr;
1804 segment->Slide(slide_amount, false);
1805 segment->GetChildren().Slide(-slide_amount, false);
1806 segment->SetByteSize(curr_seg_max_addr - sect64_min_addr);
1807 }
1808
1809 // Grow the section size as needed.
1810 if (sect64.offset) {
1811 const lldb::addr_t segment_min_file_offset =
1812 segment->GetFileOffset();
1813 const lldb::addr_t segment_max_file_offset =
1814 segment_min_file_offset + segment->GetFileSize();
1815
1816 const lldb::addr_t section_min_file_offset = sect64.offset;
1817 const lldb::addr_t section_max_file_offset =
1818 section_min_file_offset + sect64.size;
1819 const lldb::addr_t new_file_offset =
1820 std::min(section_min_file_offset, segment_min_file_offset);
1821 const lldb::addr_t new_file_size =
1822 std::max(section_max_file_offset, segment_max_file_offset) -
1823 new_file_offset;
1824 segment->SetFileOffset(new_file_offset);
1825 segment->SetFileSize(new_file_size);
1826 }
1827 } else {
1828 // Create a fake section for the section's named segment
1829 segment_sp = std::make_shared<Section>(
1830 segment_sp, // Parent section
1831 module_sp, // Module to which this section belongs
1832 this, // Object file to which this section belongs
1833 ++context.NextSegmentIdx
1834 << 8, // Section ID is the 1 based segment index
1835 // shifted right by 8 bits as not to
1836 // collide with any of the 256 section IDs
1837 // that are possible
1838 const_segname, // Name of this section
1839 eSectionTypeContainer, // This section is a container of
1840 // other sections.
1841 sect64.addr, // File VM address == addresses as they are
1842 // found in the object file
1843 sect64.size, // VM size in bytes of this section
1844 sect64.offset, // Offset to the data for this section in
1845 // the file
1846 sect64.offset ? sect64.size : 0, // Size in bytes of
1847 // this section as
1848 // found in the file
1849 sect64.align,
1850 load_cmd.flags); // Flags for this section
1851 segment_sp->SetIsFake(true);
1852 segment_sp->SetPermissions(segment_permissions);
1853 m_sections_up->AddSection(segment_sp);
1854 if (add_to_unified)
1855 context.UnifiedList.AddSection(segment_sp);
1856 segment_sp->SetIsEncrypted(segment_is_encrypted);
1857 }
1858 }
1859 assert(segment_sp.get());
1860
1861 lldb::SectionType sect_type = GetSectionType(sect64.flags, section_name);
1862
1863 SectionSP section_sp(new Section(
1864 segment_sp, module_sp, this, ++context.NextSectionIdx, section_name,
1865 sect_type, sect64.addr - segment_sp->GetFileAddress(), sect64.size,
1866 sect64.offset, sect64.offset == 0 ? 0 : sect64.size, sect64.align,
1867 sect64.flags));
1868 // Set the section to be encrypted to match the segment
1869
1870 bool section_is_encrypted = false;
1871 if (!segment_is_encrypted && load_cmd.filesize != 0)
1872 section_is_encrypted = context.EncryptedRanges.FindEntryThatContains(
1873 sect64.offset) != nullptr;
1874
1875 section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted);
1876 section_sp->SetPermissions(segment_permissions);
1877 segment_sp->GetChildren().AddSection(section_sp);
1878
1879 if (segment_sp->IsFake()) {
1880 segment_sp.reset();
1881 const_segname.Clear();
1882 }
1883 }
1884 }
1885 if (segment_sp && is_dsym) {
1886 if (first_segment_sectID <= context.NextSectionIdx) {
1887 lldb::user_id_t sect_uid;
1888 for (sect_uid = first_segment_sectID; sect_uid <= context.NextSectionIdx;
1889 ++sect_uid) {
1890 SectionSP curr_section_sp(
1891 segment_sp->GetChildren().FindSectionByID(sect_uid));
1892 SectionSP next_section_sp;
1893 if (sect_uid + 1 <= context.NextSectionIdx)
1894 next_section_sp =
1895 segment_sp->GetChildren().FindSectionByID(sect_uid + 1);
1896
1897 if (curr_section_sp.get()) {
1898 if (curr_section_sp->GetByteSize() == 0) {
1899 if (next_section_sp.get() != nullptr)
1900 curr_section_sp->SetByteSize(next_section_sp->GetFileAddress() -
1901 curr_section_sp->GetFileAddress());
1902 else
1903 curr_section_sp->SetByteSize(load_cmd.vmsize);
1904 }
1905 }
1906 }
1907 }
1908 }
1909}
1910
1912 const llvm::MachO::load_command &load_cmd, lldb::offset_t offset) {
1913 m_dysymtab.cmd = load_cmd.cmd;
1914 m_dysymtab.cmdsize = load_cmd.cmdsize;
1915 m_data.GetU32(&offset, &m_dysymtab.ilocalsym,
1916 (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2);
1917}
1918
1920 if (m_sections_up)
1921 return;
1922
1923 m_sections_up = std::make_unique<SectionList>();
1924
1926 // bool dump_sections = false;
1927 ModuleSP module_sp(GetModule());
1928
1929 offset = MachHeaderSizeFromMagic(m_header.magic);
1930
1931 SegmentParsingContext context(GetEncryptedFileRanges(), unified_section_list);
1932 llvm::MachO::load_command load_cmd;
1933 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1934 const lldb::offset_t load_cmd_offset = offset;
1935 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
1936 break;
1937
1938 if (load_cmd.cmd == LC_SEGMENT || load_cmd.cmd == LC_SEGMENT_64)
1939 ProcessSegmentCommand(load_cmd, offset, i, context);
1940 else if (load_cmd.cmd == LC_DYSYMTAB)
1941 ProcessDysymtabCommand(load_cmd, offset);
1942
1943 offset = load_cmd_offset + load_cmd.cmdsize;
1944 }
1945
1946 if (context.FileAddressesChanged && module_sp)
1947 module_sp->SectionFileAddressesChanged();
1948}
1949
1951public:
1953 : m_section_list(section_list), m_section_infos() {
1954 // Get the number of sections down to a depth of 1 to include all segments
1955 // and their sections, but no other sections that may be added for debug
1956 // map or
1957 m_section_infos.resize(section_list->GetNumSections(1));
1958 }
1959
1960 SectionSP GetSection(uint8_t n_sect, addr_t file_addr) {
1961 if (n_sect == 0)
1962 return SectionSP();
1963 if (n_sect < m_section_infos.size()) {
1964 if (!m_section_infos[n_sect].section_sp) {
1965 SectionSP section_sp(m_section_list->FindSectionByID(n_sect));
1966 m_section_infos[n_sect].section_sp = section_sp;
1967 if (section_sp) {
1968 m_section_infos[n_sect].vm_range.SetBaseAddress(
1969 section_sp->GetFileAddress());
1970 m_section_infos[n_sect].vm_range.SetByteSize(
1971 section_sp->GetByteSize());
1972 } else {
1973 std::string filename = "<unknown>";
1974 SectionSP first_section_sp(m_section_list->GetSectionAtIndex(0));
1975 if (first_section_sp)
1976 filename = first_section_sp->GetObjectFile()->GetFileSpec().GetPath();
1977
1979 llvm::formatv("unable to find section {0} for a symbol in "
1980 "{1}, corrupt file?",
1981 n_sect, filename));
1982 }
1983 }
1984 if (m_section_infos[n_sect].vm_range.Contains(file_addr)) {
1985 // Symbol is in section.
1986 return m_section_infos[n_sect].section_sp;
1987 } else if (m_section_infos[n_sect].vm_range.GetByteSize() == 0 &&
1988 m_section_infos[n_sect].vm_range.GetBaseAddress() ==
1989 file_addr) {
1990 // Symbol is in section with zero size, but has the same start address
1991 // as the section. This can happen with linker symbols (symbols that
1992 // start with the letter 'l' or 'L'.
1993 return m_section_infos[n_sect].section_sp;
1994 }
1995 }
1997 }
1998
1999protected:
2002
2005 };
2007 std::vector<SectionInfo> m_section_infos;
2008};
2009
2010#define TRIE_SYMBOL_IS_THUMB (1ULL << 63)
2012 void Dump() const {
2013 printf("0x%16.16llx 0x%16.16llx 0x%16.16llx \"%s\"",
2014 static_cast<unsigned long long>(address),
2015 static_cast<unsigned long long>(flags),
2016 static_cast<unsigned long long>(other), name.GetCString());
2017 if (import_name)
2018 printf(" -> \"%s\"\n", import_name.GetCString());
2019 else
2020 printf("\n");
2021 }
2024 uint64_t flags =
2025 0; // EXPORT_SYMBOL_FLAGS_REEXPORT, EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER,
2026 // TRIE_SYMBOL_IS_THUMB
2027 uint64_t other = 0;
2029};
2030
2034
2036
2037 void Dump(uint32_t idx) const {
2038 printf("[%3u] 0x%16.16llx: ", idx,
2039 static_cast<unsigned long long>(nodeOffset));
2040 entry.Dump();
2041 }
2042
2043 bool operator<(const TrieEntryWithOffset &other) const {
2044 return (nodeOffset < other.nodeOffset);
2045 }
2046};
2047
2049 const bool is_arm, addr_t text_seg_base_addr,
2050 std::vector<llvm::StringRef> &nameSlices,
2051 std::set<lldb::addr_t> &resolver_addresses,
2052 std::vector<TrieEntryWithOffset> &reexports,
2053 std::vector<TrieEntryWithOffset> &ext_symbols) {
2054 if (!data.ValidOffset(offset))
2055 return true;
2056
2057 // Terminal node -- end of a branch, possibly add this to
2058 // the symbol table or resolver table.
2059 const uint64_t terminalSize = data.GetULEB128(&offset);
2060 lldb::offset_t children_offset = offset + terminalSize;
2061 if (terminalSize != 0) {
2062 TrieEntryWithOffset e(offset);
2063 e.entry.flags = data.GetULEB128(&offset);
2064 const char *import_name = nullptr;
2065 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
2066 e.entry.address = 0;
2067 e.entry.other = data.GetULEB128(&offset); // dylib ordinal
2068 import_name = data.GetCStr(&offset);
2069 } else {
2070 e.entry.address = data.GetULEB128(&offset);
2071 if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
2072 e.entry.address += text_seg_base_addr;
2073 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
2074 e.entry.other = data.GetULEB128(&offset);
2075 uint64_t resolver_addr = e.entry.other;
2076 if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
2077 resolver_addr += text_seg_base_addr;
2078 if (is_arm)
2079 resolver_addr &= THUMB_ADDRESS_BIT_MASK;
2080 resolver_addresses.insert(resolver_addr);
2081 } else
2082 e.entry.other = 0;
2083 }
2084 bool add_this_entry = false;
2085 if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT) &&
2086 import_name && import_name[0]) {
2087 // add symbols that are reexport symbols with a valid import name.
2088 add_this_entry = true;
2089 } else if (e.entry.flags == 0 &&
2090 (import_name == nullptr || import_name[0] == '\0')) {
2091 // add externally visible symbols, in case the nlist record has
2092 // been stripped/omitted.
2093 add_this_entry = true;
2094 }
2095 if (add_this_entry) {
2096 std::string name;
2097 if (!nameSlices.empty()) {
2098 for (auto name_slice : nameSlices)
2099 name.append(name_slice.data(), name_slice.size());
2100 }
2101 if (name.size() > 1) {
2102 // Skip the leading '_'
2103 e.entry.name.SetCStringWithLength(name.c_str() + 1, name.size() - 1);
2104 }
2105 if (import_name) {
2106 // Skip the leading '_'
2107 e.entry.import_name.SetCString(import_name + 1);
2108 }
2109 if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT)) {
2110 reexports.push_back(e);
2111 } else {
2112 if (is_arm && (e.entry.address & 1)) {
2115 }
2116 ext_symbols.push_back(e);
2117 }
2118 }
2119 }
2120
2121 const uint8_t childrenCount = data.GetU8(&children_offset);
2122 for (uint8_t i = 0; i < childrenCount; ++i) {
2123 const char *cstr = data.GetCStr(&children_offset);
2124 if (cstr)
2125 nameSlices.push_back(llvm::StringRef(cstr));
2126 else
2127 return false; // Corrupt data
2128 lldb::offset_t childNodeOffset = data.GetULEB128(&children_offset);
2129 if (childNodeOffset) {
2130 if (!ParseTrieEntries(data, childNodeOffset, is_arm, text_seg_base_addr,
2131 nameSlices, resolver_addresses, reexports,
2132 ext_symbols)) {
2133 return false;
2134 }
2135 }
2136 nameSlices.pop_back();
2137 }
2138 return true;
2139}
2140
2141static SymbolType GetSymbolType(const char *&symbol_name,
2142 bool &demangled_is_synthesized,
2143 const SectionSP &text_section_sp,
2144 const SectionSP &data_section_sp,
2145 const SectionSP &data_dirty_section_sp,
2146 const SectionSP &data_const_section_sp,
2147 const SectionSP &symbol_section) {
2149
2150 const char *symbol_sect_name = symbol_section->GetName().AsCString();
2151 if (symbol_section->IsDescendant(text_section_sp.get())) {
2152 if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS |
2153 S_ATTR_SELF_MODIFYING_CODE |
2154 S_ATTR_SOME_INSTRUCTIONS))
2155 type = eSymbolTypeData;
2156 else
2157 type = eSymbolTypeCode;
2158 } else if (symbol_section->IsDescendant(data_section_sp.get()) ||
2159 symbol_section->IsDescendant(data_dirty_section_sp.get()) ||
2160 symbol_section->IsDescendant(data_const_section_sp.get())) {
2161 if (symbol_sect_name &&
2162 ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) {
2163 type = eSymbolTypeRuntime;
2164
2165 if (symbol_name) {
2166 llvm::StringRef symbol_name_ref(symbol_name);
2167 if (symbol_name_ref.startswith("OBJC_")) {
2168 static const llvm::StringRef g_objc_v2_prefix_class("OBJC_CLASS_$_");
2169 static const llvm::StringRef g_objc_v2_prefix_metaclass(
2170 "OBJC_METACLASS_$_");
2171 static const llvm::StringRef g_objc_v2_prefix_ivar("OBJC_IVAR_$_");
2172 if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
2173 symbol_name = symbol_name + g_objc_v2_prefix_class.size();
2174 type = eSymbolTypeObjCClass;
2175 demangled_is_synthesized = true;
2176 } else if (symbol_name_ref.startswith(g_objc_v2_prefix_metaclass)) {
2177 symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
2179 demangled_is_synthesized = true;
2180 } else if (symbol_name_ref.startswith(g_objc_v2_prefix_ivar)) {
2181 symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
2182 type = eSymbolTypeObjCIVar;
2183 demangled_is_synthesized = true;
2184 }
2185 }
2186 }
2187 } else if (symbol_sect_name &&
2188 ::strstr(symbol_sect_name, "__gcc_except_tab") ==
2189 symbol_sect_name) {
2190 type = eSymbolTypeException;
2191 } else {
2192 type = eSymbolTypeData;
2193 }
2194 } else if (symbol_sect_name &&
2195 ::strstr(symbol_sect_name, "__IMPORT") == symbol_sect_name) {
2196 type = eSymbolTypeTrampoline;
2197 }
2198 return type;
2199}
2200
2201static std::optional<struct nlist_64>
2202ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset,
2203 size_t nlist_byte_size) {
2204 struct nlist_64 nlist;
2205 if (!nlist_data.ValidOffsetForDataOfSize(nlist_data_offset, nlist_byte_size))
2206 return {};
2207 nlist.n_strx = nlist_data.GetU32_unchecked(&nlist_data_offset);
2208 nlist.n_type = nlist_data.GetU8_unchecked(&nlist_data_offset);
2209 nlist.n_sect = nlist_data.GetU8_unchecked(&nlist_data_offset);
2210 nlist.n_desc = nlist_data.GetU16_unchecked(&nlist_data_offset);
2211 nlist.n_value = nlist_data.GetAddress_unchecked(&nlist_data_offset);
2212 return nlist;
2213}
2214
2215enum { DebugSymbols = true, NonDebugSymbols = false };
2216
2218 ModuleSP module_sp(GetModule());
2219 if (!module_sp)
2220 return;
2221
2222 Log *log = GetLog(LLDBLog::Symbols);
2223
2224 const FileSpec &file = m_file ? m_file : module_sp->GetFileSpec();
2225 const char *file_name = file.GetFilename().AsCString("<Unknown>");
2226 LLDB_SCOPED_TIMERF("ObjectFileMachO::ParseSymtab () module = %s", file_name);
2227 LLDB_LOG(log, "Parsing symbol table for {0}", file_name);
2228 Progress progress(llvm::formatv("Parsing symbol table for {0}", file_name));
2229
2230 llvm::MachO::symtab_command symtab_load_command = {0, 0, 0, 0, 0, 0};
2231 llvm::MachO::linkedit_data_command function_starts_load_command = {0, 0, 0, 0};
2232 llvm::MachO::linkedit_data_command exports_trie_load_command = {0, 0, 0, 0};
2233 llvm::MachO::dyld_info_command dyld_info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2234 llvm::MachO::dysymtab_command dysymtab = m_dysymtab;
2235 // The data element of type bool indicates that this entry is thumb
2236 // code.
2237 typedef AddressDataArray<lldb::addr_t, bool, 100> FunctionStarts;
2238
2239 // Record the address of every function/data that we add to the symtab.
2240 // We add symbols to the table in the order of most information (nlist
2241 // records) to least (function starts), and avoid duplicating symbols
2242 // via this set.
2243 llvm::DenseSet<addr_t> symbols_added;
2244
2245 // We are using a llvm::DenseSet for "symbols_added" so we must be sure we
2246 // do not add the tombstone or empty keys to the set.
2247 auto add_symbol_addr = [&symbols_added](lldb::addr_t file_addr) {
2248 // Don't add the tombstone or empty keys.
2249 if (file_addr == UINT64_MAX || file_addr == UINT64_MAX - 1)
2250 return;
2251 symbols_added.insert(file_addr);
2252 };
2253 FunctionStarts function_starts;
2255 uint32_t i;
2256 FileSpecList dylib_files;
2257 llvm::StringRef g_objc_v2_prefix_class("_OBJC_CLASS_$_");
2258 llvm::StringRef g_objc_v2_prefix_metaclass("_OBJC_METACLASS_$_");
2259 llvm::StringRef g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
2260 UUID image_uuid;
2261
2262 for (i = 0; i < m_header.ncmds; ++i) {
2263 const lldb::offset_t cmd_offset = offset;
2264 // Read in the load command and load command size
2265 llvm::MachO::load_command lc;
2266 if (m_data.GetU32(&offset, &lc, 2) == nullptr)
2267 break;
2268 // Watch for the symbol table load command
2269 switch (lc.cmd) {
2270 case LC_SYMTAB:
2271 symtab_load_command.cmd = lc.cmd;
2272 symtab_load_command.cmdsize = lc.cmdsize;
2273 // Read in the rest of the symtab load command
2274 if (m_data.GetU32(&offset, &symtab_load_command.symoff, 4) ==
2275 nullptr) // fill in symoff, nsyms, stroff, strsize fields
2276 return;
2277 break;
2278
2279 case LC_DYLD_INFO:
2280 case LC_DYLD_INFO_ONLY:
2281 if (m_data.GetU32(&offset, &dyld_info.rebase_off, 10)) {
2282 dyld_info.cmd = lc.cmd;
2283 dyld_info.cmdsize = lc.cmdsize;
2284 } else {
2285 memset(&dyld_info, 0, sizeof(dyld_info));
2286 }
2287 break;
2288
2289 case LC_LOAD_DYLIB:
2290 case LC_LOAD_WEAK_DYLIB:
2291 case LC_REEXPORT_DYLIB:
2292 case LC_LOADFVMLIB:
2293 case LC_LOAD_UPWARD_DYLIB: {
2294 uint32_t name_offset = cmd_offset + m_data.GetU32(&offset);
2295 const char *path = m_data.PeekCStr(name_offset);
2296 if (path) {
2297 FileSpec file_spec(path);
2298 // Strip the path if there is @rpath, @executable, etc so we just use
2299 // the basename
2300 if (path[0] == '@')
2301 file_spec.ClearDirectory();
2302
2303 if (lc.cmd == LC_REEXPORT_DYLIB) {
2305 }
2306
2307 dylib_files.Append(file_spec);
2308 }
2309 } break;
2310
2311 case LC_DYLD_EXPORTS_TRIE:
2312 exports_trie_load_command.cmd = lc.cmd;
2313 exports_trie_load_command.cmdsize = lc.cmdsize;
2314 if (m_data.GetU32(&offset, &exports_trie_load_command.dataoff, 2) ==
2315 nullptr) // fill in offset and size fields
2316 memset(&exports_trie_load_command, 0,
2317 sizeof(exports_trie_load_command));
2318 break;
2319 case LC_FUNCTION_STARTS:
2320 function_starts_load_command.cmd = lc.cmd;
2321 function_starts_load_command.cmdsize = lc.cmdsize;
2322 if (m_data.GetU32(&offset, &function_starts_load_command.dataoff, 2) ==
2323 nullptr) // fill in data offset and size fields
2324 memset(&function_starts_load_command, 0,
2325 sizeof(function_starts_load_command));
2326 break;
2327
2328 case LC_UUID: {
2329 const uint8_t *uuid_bytes = m_data.PeekData(offset, 16);
2330
2331 if (uuid_bytes)
2332 image_uuid = UUID(uuid_bytes, 16);
2333 break;
2334 }
2335
2336 default:
2337 break;
2338 }
2339 offset = cmd_offset + lc.cmdsize;
2340 }
2341
2342 if (!symtab_load_command.cmd)
2343 return;
2344
2345 SectionList *section_list = GetSectionList();
2346 if (section_list == nullptr)
2347 return;
2348
2349 const uint32_t addr_byte_size = m_data.GetAddressByteSize();
2350 const ByteOrder byte_order = m_data.GetByteOrder();
2351 bool bit_width_32 = addr_byte_size == 4;
2352 const size_t nlist_byte_size =
2353 bit_width_32 ? sizeof(struct nlist) : sizeof(struct nlist_64);
2354
2355 DataExtractor nlist_data(nullptr, 0, byte_order, addr_byte_size);
2356 DataExtractor strtab_data(nullptr, 0, byte_order, addr_byte_size);
2357 DataExtractor function_starts_data(nullptr, 0, byte_order, addr_byte_size);
2358 DataExtractor indirect_symbol_index_data(nullptr, 0, byte_order,
2359 addr_byte_size);
2360 DataExtractor dyld_trie_data(nullptr, 0, byte_order, addr_byte_size);
2361
2362 const addr_t nlist_data_byte_size =
2363 symtab_load_command.nsyms * nlist_byte_size;
2364 const addr_t strtab_data_byte_size = symtab_load_command.strsize;
2365 addr_t strtab_addr = LLDB_INVALID_ADDRESS;
2366
2367 ProcessSP process_sp(m_process_wp.lock());
2368 Process *process = process_sp.get();
2369
2370 uint32_t memory_module_load_level = eMemoryModuleLoadLevelComplete;
2371 bool is_shared_cache_image = IsSharedCacheBinary();
2372 bool is_local_shared_cache_image = is_shared_cache_image && !IsInMemory();
2373 SectionSP linkedit_section_sp(
2374 section_list->FindSectionByName(GetSegmentNameLINKEDIT()));
2375
2376 if (process && m_header.filetype != llvm::MachO::MH_OBJECT &&
2377 !is_local_shared_cache_image) {
2378 Target &target = process->GetTarget();
2379
2380 memory_module_load_level = target.GetMemoryModuleLoadLevel();
2381
2382 // Reading mach file from memory in a process or core file...
2383
2384 if (linkedit_section_sp) {
2385 addr_t linkedit_load_addr =
2386 linkedit_section_sp->GetLoadBaseAddress(&target);
2387 if (linkedit_load_addr == LLDB_INVALID_ADDRESS) {
2388 // We might be trying to access the symbol table before the
2389 // __LINKEDIT's load address has been set in the target. We can't
2390 // fail to read the symbol table, so calculate the right address
2391 // manually
2392 linkedit_load_addr = CalculateSectionLoadAddressForMemoryImage(
2393 m_memory_addr, GetMachHeaderSection(), linkedit_section_sp.get());
2394 }
2395
2396 const addr_t linkedit_file_offset = linkedit_section_sp->GetFileOffset();
2397 const addr_t symoff_addr = linkedit_load_addr +
2398 symtab_load_command.symoff -
2399 linkedit_file_offset;
2400 strtab_addr = linkedit_load_addr + symtab_load_command.stroff -
2401 linkedit_file_offset;
2402
2403 // Always load dyld - the dynamic linker - from memory if we didn't
2404 // find a binary anywhere else. lldb will not register
2405 // dylib/framework/bundle loads/unloads if we don't have the dyld
2406 // symbols, we force dyld to load from memory despite the user's
2407 // target.memory-module-load-level setting.
2408 if (memory_module_load_level == eMemoryModuleLoadLevelComplete ||
2409 m_header.filetype == llvm::MachO::MH_DYLINKER) {
2410 DataBufferSP nlist_data_sp(
2411 ReadMemory(process_sp, symoff_addr, nlist_data_byte_size));
2412 if (nlist_data_sp)
2413 nlist_data.SetData(nlist_data_sp, 0, nlist_data_sp->GetByteSize());
2414 if (dysymtab.nindirectsyms != 0) {
2415 const addr_t indirect_syms_addr = linkedit_load_addr +
2416 dysymtab.indirectsymoff -
2417 linkedit_file_offset;
2418 DataBufferSP indirect_syms_data_sp(ReadMemory(
2419 process_sp, indirect_syms_addr, dysymtab.nindirectsyms * 4));
2420 if (indirect_syms_data_sp)
2421 indirect_symbol_index_data.SetData(
2422 indirect_syms_data_sp, 0,
2423 indirect_syms_data_sp->GetByteSize());
2424 // If this binary is outside the shared cache,
2425 // cache the string table.
2426 // Binaries in the shared cache all share a giant string table,
2427 // and we can't share the string tables across multiple
2428 // ObjectFileMachO's, so we'd end up re-reading this mega-strtab
2429 // for every binary in the shared cache - it would be a big perf
2430 // problem. For binaries outside the shared cache, it's faster to
2431 // read the entire strtab at once instead of piece-by-piece as we
2432 // process the nlist records.
2433 if (!is_shared_cache_image) {
2434 DataBufferSP strtab_data_sp(
2435 ReadMemory(process_sp, strtab_addr, strtab_data_byte_size));
2436 if (strtab_data_sp) {
2437 strtab_data.SetData(strtab_data_sp, 0,
2438 strtab_data_sp->GetByteSize());
2439 }
2440 }
2441 }
2442 if (memory_module_load_level >= eMemoryModuleLoadLevelPartial) {
2443 if (function_starts_load_command.cmd) {
2444 const addr_t func_start_addr =
2445 linkedit_load_addr + function_starts_load_command.dataoff -
2446 linkedit_file_offset;
2447 DataBufferSP func_start_data_sp(
2448 ReadMemory(process_sp, func_start_addr,
2449 function_starts_load_command.datasize));
2450 if (func_start_data_sp)
2451 function_starts_data.SetData(func_start_data_sp, 0,
2452 func_start_data_sp->GetByteSize());
2453 }
2454 }
2455 }
2456 }
2457 } else {
2458 if (is_local_shared_cache_image) {
2459 // The load commands in shared cache images are relative to the
2460 // beginning of the shared cache, not the library image. The
2461 // data we get handed when creating the ObjectFileMachO starts
2462 // at the beginning of a specific library and spans to the end
2463 // of the cache to be able to reach the shared LINKEDIT
2464 // segments. We need to convert the load command offsets to be
2465 // relative to the beginning of our specific image.
2466 lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset();
2467 lldb::offset_t linkedit_slide =
2468 linkedit_offset - m_linkedit_original_offset;
2469 symtab_load_command.symoff += linkedit_slide;
2470 symtab_load_command.stroff += linkedit_slide;
2471 dyld_info.export_off += linkedit_slide;
2472 dysymtab.indirectsymoff += linkedit_slide;
2473 function_starts_load_command.dataoff += linkedit_slide;
2474 exports_trie_load_command.dataoff += linkedit_slide;
2475 }
2476
2477 nlist_data.SetData(m_data, symtab_load_command.symoff,
2478 nlist_data_byte_size);
2479 strtab_data.SetData(m_data, symtab_load_command.stroff,
2480 strtab_data_byte_size);
2481
2482 // We shouldn't have exports data from both the LC_DYLD_INFO command
2483 // AND the LC_DYLD_EXPORTS_TRIE command in the same binary:
2484 lldbassert(!((dyld_info.export_size > 0)
2485 && (exports_trie_load_command.datasize > 0)));
2486 if (dyld_info.export_size > 0) {
2487 dyld_trie_data.SetData(m_data, dyld_info.export_off,
2488 dyld_info.export_size);
2489 } else if (exports_trie_load_command.datasize > 0) {
2490 dyld_trie_data.SetData(m_data, exports_trie_load_command.dataoff,
2491 exports_trie_load_command.datasize);
2492 }
2493
2494 if (dysymtab.nindirectsyms != 0) {
2495 indirect_symbol_index_data.SetData(m_data, dysymtab.indirectsymoff,
2496 dysymtab.nindirectsyms * 4);
2497 }
2498 if (function_starts_load_command.cmd) {
2499 function_starts_data.SetData(m_data, function_starts_load_command.dataoff,
2500 function_starts_load_command.datasize);
2501 }
2502 }
2503
2504 const bool have_strtab_data = strtab_data.GetByteSize() > 0;
2505
2506 ConstString g_segment_name_TEXT = GetSegmentNameTEXT();
2507 ConstString g_segment_name_DATA = GetSegmentNameDATA();
2508 ConstString g_segment_name_DATA_DIRTY = GetSegmentNameDATA_DIRTY();
2509 ConstString g_segment_name_DATA_CONST = GetSegmentNameDATA_CONST();
2510 ConstString g_segment_name_OBJC = GetSegmentNameOBJC();
2511 ConstString g_section_name_eh_frame = GetSectionNameEHFrame();
2512 SectionSP text_section_sp(
2513 section_list->FindSectionByName(g_segment_name_TEXT));
2514 SectionSP data_section_sp(
2515 section_list->FindSectionByName(g_segment_name_DATA));
2516 SectionSP data_dirty_section_sp(
2517 section_list->FindSectionByName(g_segment_name_DATA_DIRTY));
2518 SectionSP data_const_section_sp(
2519 section_list->FindSectionByName(g_segment_name_DATA_CONST));
2520 SectionSP objc_section_sp(
2521 section_list->FindSectionByName(g_segment_name_OBJC));
2522 SectionSP eh_frame_section_sp;
2523 if (text_section_sp.get())
2524 eh_frame_section_sp = text_section_sp->GetChildren().FindSectionByName(
2525 g_section_name_eh_frame);
2526 else
2527 eh_frame_section_sp =
2528 section_list->FindSectionByName(g_section_name_eh_frame);
2529
2530 const bool is_arm = (m_header.cputype == llvm::MachO::CPU_TYPE_ARM);
2531 const bool always_thumb = GetArchitecture().IsAlwaysThumbInstructions();
2532
2533 // lldb works best if it knows the start address of all functions in a
2534 // module. Linker symbols or debug info are normally the best source of
2535 // information for start addr / size but they may be stripped in a released
2536 // binary. Two additional sources of information exist in Mach-O binaries:
2537 // LC_FUNCTION_STARTS - a list of ULEB128 encoded offsets of each
2538 // function's start address in the
2539 // binary, relative to the text section.
2540 // eh_frame - the eh_frame FDEs have the start addr & size of
2541 // each function
2542 // LC_FUNCTION_STARTS is the fastest source to read in, and is present on
2543 // all modern binaries.
2544 // Binaries built to run on older releases may need to use eh_frame
2545 // information.
2546
2547 if (text_section_sp && function_starts_data.GetByteSize()) {
2548 FunctionStarts::Entry function_start_entry;
2549 function_start_entry.data = false;
2550 lldb::offset_t function_start_offset = 0;
2551 function_start_entry.addr = text_section_sp->GetFileAddress();
2552 uint64_t delta;
2553 while ((delta = function_starts_data.GetULEB128(&function_start_offset)) >
2554 0) {
2555 // Now append the current entry
2556 function_start_entry.addr += delta;
2557 if (is_arm) {
2558 if (function_start_entry.addr & 1) {
2559 function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2560 function_start_entry.data = true;
2561 } else if (always_thumb) {
2562 function_start_entry.data = true;
2563 }
2564 }
2565 function_starts.Append(function_start_entry);
2566 }
2567 } else {
2568 // If m_type is eTypeDebugInfo, then this is a dSYM - it will have the
2569 // load command claiming an eh_frame but it doesn't actually have the
2570 // eh_frame content. And if we have a dSYM, we don't need to do any of
2571 // this fill-in-the-missing-symbols works anyway - the debug info should
2572 // give us all the functions in the module.
2573 if (text_section_sp.get() && eh_frame_section_sp.get() &&
2575 DWARFCallFrameInfo eh_frame(*this, eh_frame_section_sp,
2578 eh_frame.GetFunctionAddressAndSizeVector(functions);
2579 addr_t text_base_addr = text_section_sp->GetFileAddress();
2580 size_t count = functions.GetSize();
2581 for (size_t i = 0; i < count; ++i) {
2583 functions.GetEntryAtIndex(i);
2584 if (func) {
2585 FunctionStarts::Entry function_start_entry;
2586 function_start_entry.addr = func->base - text_base_addr;
2587 if (is_arm) {
2588 if (function_start_entry.addr & 1) {
2589 function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2590 function_start_entry.data = true;
2591 } else if (always_thumb) {
2592 function_start_entry.data = true;
2593 }
2594 }
2595 function_starts.Append(function_start_entry);
2596 }
2597 }
2598 }
2599 }
2600
2601 const size_t function_starts_count = function_starts.GetSize();
2602
2603 // For user process binaries (executables, dylibs, frameworks, bundles), if
2604 // we don't have LC_FUNCTION_STARTS/eh_frame section in this binary, we're
2605 // going to assume the binary has been stripped. Don't allow assembly
2606 // language instruction emulation because we don't know proper function
2607 // start boundaries.
2608 //
2609 // For all other types of binaries (kernels, stand-alone bare board
2610 // binaries, kexts), they may not have LC_FUNCTION_STARTS / eh_frame
2611 // sections - we should not make any assumptions about them based on that.
2612 if (function_starts_count == 0 && CalculateStrata() == eStrataUser) {
2614 Log *unwind_or_symbol_log(GetLog(LLDBLog::Symbols | LLDBLog::Unwind));
2615
2616 if (unwind_or_symbol_log)
2617 module_sp->LogMessage(
2618 unwind_or_symbol_log,
2619 "no LC_FUNCTION_STARTS, will not allow assembly profiled unwinds");
2620 }
2621
2622 const user_id_t TEXT_eh_frame_sectID = eh_frame_section_sp.get()
2623 ? eh_frame_section_sp->GetID()
2624 : static_cast<user_id_t>(NO_SECT);
2625
2626 uint32_t N_SO_index = UINT32_MAX;
2627
2628 MachSymtabSectionInfo section_info(section_list);
2629 std::vector<uint32_t> N_FUN_indexes;
2630 std::vector<uint32_t> N_NSYM_indexes;
2631 std::vector<uint32_t> N_INCL_indexes;
2632 std::vector<uint32_t> N_BRAC_indexes;
2633 std::vector<uint32_t> N_COMM_indexes;
2634 typedef std::multimap<uint64_t, uint32_t> ValueToSymbolIndexMap;
2635 typedef llvm::DenseMap<uint32_t, uint32_t> NListIndexToSymbolIndexMap;
2636 typedef llvm::DenseMap<const char *, uint32_t> ConstNameToSymbolIndexMap;
2637 ValueToSymbolIndexMap N_FUN_addr_to_sym_idx;
2638 ValueToSymbolIndexMap N_STSYM_addr_to_sym_idx;
2639 ConstNameToSymbolIndexMap N_GSYM_name_to_sym_idx;
2640 // Any symbols that get merged into another will get an entry in this map
2641 // so we know
2642 NListIndexToSymbolIndexMap m_nlist_idx_to_sym_idx;
2643 uint32_t nlist_idx = 0;
2644 Symbol *symbol_ptr = nullptr;
2645
2646 uint32_t sym_idx = 0;
2647 Symbol *sym = nullptr;
2648 size_t num_syms = 0;
2649 std::string memory_symbol_name;
2650 uint32_t unmapped_local_symbols_found = 0;
2651
2652 std::vector<TrieEntryWithOffset> reexport_trie_entries;
2653 std::vector<TrieEntryWithOffset> external_sym_trie_entries;
2654 std::set<lldb::addr_t> resolver_addresses;
2655
2656 const size_t dyld_trie_data_size = dyld_trie_data.GetByteSize();
2657 if (dyld_trie_data_size > 0) {
2658 LLDB_LOG(log, "Parsing {0} bytes of dyld trie data", dyld_trie_data_size);
2659 SectionSP text_segment_sp =
2661 lldb::addr_t text_segment_file_addr = LLDB_INVALID_ADDRESS;
2662 if (text_segment_sp)
2663 text_segment_file_addr = text_segment_sp->GetFileAddress();
2664 std::vector<llvm::StringRef> nameSlices;
2665 ParseTrieEntries(dyld_trie_data, 0, is_arm, text_segment_file_addr,
2666 nameSlices, resolver_addresses, reexport_trie_entries,
2667 external_sym_trie_entries);
2668 }
2669
2670 typedef std::set<ConstString> IndirectSymbols;
2671 IndirectSymbols indirect_symbol_names;
2672
2673#if TARGET_OS_IPHONE
2674
2675 // Some recent builds of the dyld_shared_cache (hereafter: DSC) have been
2676 // optimized by moving LOCAL symbols out of the memory mapped portion of
2677 // the DSC. The symbol information has all been retained, but it isn't
2678 // available in the normal nlist data. However, there *are* duplicate
2679 // entries of *some*
2680 // LOCAL symbols in the normal nlist data. To handle this situation
2681 // correctly, we must first attempt
2682 // to parse any DSC unmapped symbol information. If we find any, we set a
2683 // flag that tells the normal nlist parser to ignore all LOCAL symbols.
2684
2685 if (IsSharedCacheBinary()) {
2686 // Before we can start mapping the DSC, we need to make certain the
2687 // target process is actually using the cache we can find.
2688
2689 // Next we need to determine the correct path for the dyld shared cache.
2690
2691 ArchSpec header_arch = GetArchitecture();
2692
2693 UUID dsc_uuid;
2694 UUID process_shared_cache_uuid;
2695 addr_t process_shared_cache_base_addr;
2696
2697 if (process) {
2698 GetProcessSharedCacheUUID(process, process_shared_cache_base_addr,
2699 process_shared_cache_uuid);
2700 }
2701
2702 __block bool found_image = false;
2703 __block void *nlist_buffer = nullptr;
2704 __block unsigned nlist_count = 0;
2705 __block char *string_table = nullptr;
2706 __block vm_offset_t vm_nlist_memory = 0;
2707 __block mach_msg_type_number_t vm_nlist_bytes_read = 0;
2708 __block vm_offset_t vm_string_memory = 0;
2709 __block mach_msg_type_number_t vm_string_bytes_read = 0;
2710
2711 auto _ = llvm::make_scope_exit(^{
2712 if (vm_nlist_memory)
2713 vm_deallocate(mach_task_self(), vm_nlist_memory, vm_nlist_bytes_read);
2714 if (vm_string_memory)
2715 vm_deallocate(mach_task_self(), vm_string_memory, vm_string_bytes_read);
2716 });
2717
2718 typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
2719 typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
2720 UndefinedNameToDescMap undefined_name_to_desc;
2721 SymbolIndexToName reexport_shlib_needs_fixup;
2722
2723 dyld_for_each_installed_shared_cache(^(dyld_shared_cache_t shared_cache) {
2724 uuid_t cache_uuid;
2725 dyld_shared_cache_copy_uuid(shared_cache, &cache_uuid);
2726 if (found_image)
2727 return;
2728
2729 if (process_shared_cache_uuid.IsValid() &&
2730 process_shared_cache_uuid != UUID::fromData(&cache_uuid, 16))
2731 return;
2732
2733 dyld_shared_cache_for_each_image(shared_cache, ^(dyld_image_t image) {
2734 uuid_t dsc_image_uuid;
2735 if (found_image)
2736 return;
2737
2738 dyld_image_copy_uuid(image, &dsc_image_uuid);
2739 if (image_uuid != UUID::fromData(dsc_image_uuid, 16))
2740 return;
2741
2742 found_image = true;
2743
2744 // Compute the size of the string table. We need to ask dyld for a
2745 // new SPI to avoid this step.
2746 dyld_image_local_nlist_content_4Symbolication(
2747 image, ^(const void *nlistStart, uint64_t nlistCount,
2748 const char *stringTable) {
2749 if (!nlistStart || !nlistCount)
2750 return;
2751
2752 // The buffers passed here are valid only inside the block.
2753 // Use vm_read to make a cheap copy of them available for our
2754 // processing later.
2755 kern_return_t ret =
2756 vm_read(mach_task_self(), (vm_address_t)nlistStart,
2757 nlist_byte_size * nlistCount, &vm_nlist_memory,
2758 &vm_nlist_bytes_read);
2759 if (ret != KERN_SUCCESS)
2760 return;
2761 assert(vm_nlist_bytes_read == nlist_byte_size * nlistCount);
2762
2763 // We don't know the size of the string table. It's cheaper
2764 // to map the whole VM region than to determine the size by
2765 // parsing all the nlist entries.
2766 vm_address_t string_address = (vm_address_t)stringTable;
2767 vm_size_t region_size;
2768 mach_msg_type_number_t info_count = VM_REGION_BASIC_INFO_COUNT_64;
2769 vm_region_basic_info_data_t info;
2770 memory_object_name_t object;
2771 ret = vm_region_64(mach_task_self(), &string_address,
2772 &region_size, VM_REGION_BASIC_INFO_64,
2773 (vm_region_info_t)&info, &info_count, &object);
2774 if (ret != KERN_SUCCESS)
2775 return;
2776
2777 ret = vm_read(mach_task_self(), (vm_address_t)stringTable,
2778 region_size -
2779 ((vm_address_t)stringTable - string_address),
2780 &vm_string_memory, &vm_string_bytes_read);
2781 if (ret != KERN_SUCCESS)
2782 return;
2783
2784 nlist_buffer = (void *)vm_nlist_memory;
2785 string_table = (char *)vm_string_memory;
2786 nlist_count = nlistCount;
2787 });
2788 });
2789 });
2790 if (nlist_buffer) {
2791 DataExtractor dsc_local_symbols_data(nlist_buffer,
2792 nlist_count * nlist_byte_size,
2793 byte_order, addr_byte_size);
2794 unmapped_local_symbols_found = nlist_count;
2795
2796 // The normal nlist code cannot correctly size the Symbols
2797 // array, we need to allocate it here.
2798 sym = symtab.Resize(
2799 symtab_load_command.nsyms + m_dysymtab.nindirectsyms +
2800 unmapped_local_symbols_found - m_dysymtab.nlocalsym);
2801 num_syms = symtab.GetNumSymbols();
2802
2803 lldb::offset_t nlist_data_offset = 0;
2804
2805 for (uint32_t nlist_index = 0;
2806 nlist_index < nlist_count;
2807 nlist_index++) {
2808 /////////////////////////////
2809 {
2810 std::optional<struct nlist_64> nlist_maybe =
2811 ParseNList(dsc_local_symbols_data, nlist_data_offset,
2812 nlist_byte_size);
2813 if (!nlist_maybe)
2814 break;
2815 struct nlist_64 nlist = *nlist_maybe;
2816
2818 const char *symbol_name = string_table + nlist.n_strx;
2819
2820 if (symbol_name == NULL) {
2821 // No symbol should be NULL, even the symbols with no
2822 // string values should have an offset zero which
2823 // points to an empty C-string
2824 Debugger::ReportError(llvm::formatv(
2825 "DSC unmapped local symbol[{0}] has invalid "
2826 "string table offset {1:x} in {2}, ignoring symbol",
2827 nlist_index, nlist.n_strx,
2828 module_sp->GetFileSpec().GetPath());
2829 continue;
2830 }
2831 if (symbol_name[0] == '\0')
2832 symbol_name = NULL;
2833
2834 const char *symbol_name_non_abi_mangled = NULL;
2835
2836 SectionSP symbol_section;
2837 uint32_t symbol_byte_size = 0;
2838 bool add_nlist = true;
2839 bool is_debug = ((nlist.n_type & N_STAB) != 0);
2840 bool demangled_is_synthesized = false;
2841 bool is_gsym = false;
2842 bool set_value = true;
2843
2844 assert(sym_idx < num_syms);
2845
2846 sym[sym_idx].SetDebug(is_debug);
2847
2848 if (is_debug) {
2849 switch (nlist.n_type) {
2850 case N_GSYM:
2851 // global symbol: name,,NO_SECT,type,0
2852 // Sometimes the N_GSYM value contains the address.
2853
2854 // FIXME: In the .o files, we have a GSYM and a debug
2855 // symbol for all the ObjC data. They
2856 // have the same address, but we want to ensure that
2857 // we always find only the real symbol, 'cause we
2858 // don't currently correctly attribute the
2859 // GSYM one to the ObjCClass/Ivar/MetaClass
2860 // symbol type. This is a temporary hack to make
2861 // sure the ObjectiveC symbols get treated correctly.
2862 // To do this right, we should coalesce all the GSYM
2863 // & global symbols that have the same address.
2864
2865 is_gsym = true;
2866 sym[sym_idx].SetExternal(true);
2867
2868 if (symbol_name && symbol_name[0] == '_' &&
2869 symbol_name[1] == 'O') {
2870 llvm::StringRef symbol_name_ref(symbol_name);
2871 if (symbol_name_ref.startswith(
2872 g_objc_v2_prefix_class)) {
2873 symbol_name_non_abi_mangled = symbol_name + 1;
2874 symbol_name =
2875 symbol_name + g_objc_v2_prefix_class.size();
2876 type = eSymbolTypeObjCClass;
2877 demangled_is_synthesized = true;
2878
2879 } else if (symbol_name_ref.startswith(
2880 g_objc_v2_prefix_metaclass)) {
2881 symbol_name_non_abi_mangled = symbol_name + 1;
2882 symbol_name =
2883 symbol_name + g_objc_v2_prefix_metaclass.size();
2885 demangled_is_synthesized = true;
2886 } else if (symbol_name_ref.startswith(
2887 g_objc_v2_prefix_ivar)) {
2888 symbol_name_non_abi_mangled = symbol_name + 1;
2889 symbol_name =
2890 symbol_name + g_objc_v2_prefix_ivar.size();
2891 type = eSymbolTypeObjCIVar;
2892 demangled_is_synthesized = true;
2893 }
2894 } else {
2895 if (nlist.n_value != 0)
2896 symbol_section = section_info.GetSection(
2897 nlist.n_sect, nlist.n_value);
2898 type = eSymbolTypeData;
2899 }
2900 break;
2901
2902 case N_FNAME:
2903 // procedure name (f77 kludge): name,,NO_SECT,0,0
2904 type = eSymbolTypeCompiler;
2905 break;
2906
2907 case N_FUN:
2908 // procedure: name,,n_sect,linenumber,address
2909 if (symbol_name) {
2910 type = eSymbolTypeCode;
2911 symbol_section = section_info.GetSection(
2912 nlist.n_sect, nlist.n_value);
2913
2914 N_FUN_addr_to_sym_idx.insert(
2915 std::make_pair(nlist.n_value, sym_idx));
2916 // We use the current number of symbols in the
2917 // symbol table in lieu of using nlist_idx in case
2918 // we ever start trimming entries out
2919 N_FUN_indexes.push_back(sym_idx);
2920 } else {
2921 type = eSymbolTypeCompiler;
2922
2923 if (!N_FUN_indexes.empty()) {
2924 // Copy the size of the function into the
2925 // original
2926 // STAB entry so we don't have
2927 // to hunt for it later
2928 symtab.SymbolAtIndex(N_FUN_indexes.back())
2929 ->SetByteSize(nlist.n_value);
2930 N_FUN_indexes.pop_back();
2931 // We don't really need the end function STAB as
2932 // it contains the size which we already placed
2933 // with the original symbol, so don't add it if
2934 // we want a minimal symbol table
2935 add_nlist = false;
2936 }
2937 }
2938 break;
2939
2940 case N_STSYM:
2941 // static symbol: name,,n_sect,type,address
2942 N_STSYM_addr_to_sym_idx.insert(
2943 std::make_pair(nlist.n_value, sym_idx));
2944 symbol_section = section_info.GetSection(nlist.n_sect,
2945 nlist.n_value);
2946 if (symbol_name && symbol_name[0]) {
2948 symbol_name + 1, eSymbolTypeData);
2949 }
2950 break;
2951
2952 case N_LCSYM:
2953 // .lcomm symbol: name,,n_sect,type,address
2954 symbol_section = section_info.GetSection(nlist.n_sect,
2955 nlist.n_value);
2957 break;
2958
2959 case N_BNSYM:
2960 // We use the current number of symbols in the symbol
2961 // table in lieu of using nlist_idx in case we ever
2962 // start trimming entries out Skip these if we want
2963 // minimal symbol tables
2964 add_nlist = false;
2965 break;
2966
2967 case N_ENSYM:
2968 // Set the size of the N_BNSYM to the terminating
2969 // index of this N_ENSYM so that we can always skip
2970 // the entire symbol if we need to navigate more
2971 // quickly at the source level when parsing STABS
2972 // Skip these if we want minimal symbol tables
2973 add_nlist = false;
2974 break;
2975
2976 case N_OPT:
2977 // emitted with gcc2_compiled and in gcc source
2978 type = eSymbolTypeCompiler;
2979 break;
2980
2981 case N_RSYM:
2982 // register sym: name,,NO_SECT,type,register
2983 type = eSymbolTypeVariable;
2984 break;
2985
2986 case N_SLINE:
2987 // src line: 0,,n_sect,linenumber,address
2988 symbol_section = section_info.GetSection(nlist.n_sect,
2989 nlist.n_value);
2990 type = eSymbolTypeLineEntry;
2991 break;
2992
2993 case N_SSYM:
2994 // structure elt: name,,NO_SECT,type,struct_offset
2996 break;
2997
2998 case N_SO:
2999 // source file name
3000 type = eSymbolTypeSourceFile;
3001 if (symbol_name == NULL) {
3002 add_nlist = false;
3003 if (N_SO_index != UINT32_MAX) {
3004 // Set the size of the N_SO to the terminating
3005 // index of this N_SO so that we can always skip
3006 // the entire N_SO if we need to navigate more
3007 // quickly at the source level when parsing STABS
3008 symbol_ptr = symtab.SymbolAtIndex(N_SO_index);
3009 symbol_ptr->SetByteSize(sym_idx);
3010 symbol_ptr->SetSizeIsSibling(true);
3011 }
3012 N_NSYM_indexes.clear();
3013 N_INCL_indexes.clear();
3014 N_BRAC_indexes.clear();
3015 N_COMM_indexes.clear();
3016 N_FUN_indexes.clear();
3017 N_SO_index = UINT32_MAX;
3018 } else {
3019 // We use the current number of symbols in the
3020 // symbol table in lieu of using nlist_idx in case
3021 // we ever start trimming entries out
3022 const bool N_SO_has_full_path = symbol_name[0] == '/';
3023 if (N_SO_has_full_path) {
3024 if ((N_SO_index == sym_idx - 1) &&
3025 ((sym_idx - 1) < num_syms)) {
3026 // We have two consecutive N_SO entries where
3027 // the first contains a directory and the
3028 // second contains a full path.
3029 sym[sym_idx - 1].GetMangled().SetValue(
3030 ConstString(symbol_name));
3031 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3032 add_nlist = false;
3033 } else {
3034 // This is the first entry in a N_SO that
3035 // contains a directory or
3036 // a full path to the source file
3037 N_SO_index = sym_idx;
3038 }
3039 } else if ((N_SO_index == sym_idx - 1) &&
3040 ((sym_idx - 1) < num_syms)) {
3041 // This is usually the second N_SO entry that
3042 // contains just the filename, so here we combine
3043 // it with the first one if we are minimizing the
3044 // symbol table
3045 const char *so_path = sym[sym_idx - 1]
3046 .GetMangled()
3048 .AsCString();
3049 if (so_path && so_path[0]) {
3050 std::string full_so_path(so_path);
3051 const size_t double_slash_pos =
3052 full_so_path.find("//");
3053 if (double_slash_pos != std::string::npos) {
3054 // The linker has been generating bad N_SO
3055 // entries with doubled up paths
3056 // in the format "%s%s" where the first
3057 // string in the DW_AT_comp_dir, and the
3058 // second is the directory for the source
3059 // file so you end up with a path that looks
3060 // like "/tmp/src//tmp/src/"
3061 FileSpec so_dir(so_path);
3062 if (!FileSystem::Instance().Exists(so_dir)) {
3063 so_dir.SetFile(
3064 &full_so_path[double_slash_pos + 1],
3065 FileSpec::Style::native);
3066 if (FileSystem::Instance().Exists(so_dir)) {
3067 // Trim off the incorrect path
3068 full_so_path.erase(0, double_slash_pos + 1);
3069 }
3070 }
3071 }
3072 if (*full_so_path.rbegin() != '/')
3073 full_so_path += '/';
3074 full_so_path += symbol_name;
3075 sym[sym_idx - 1].GetMangled().SetValue(
3076 ConstString(full_so_path.c_str()));
3077 add_nlist = false;
3078 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3079 }
3080 } else {
3081 // This could be a relative path to a N_SO
3082 N_SO_index = sym_idx;
3083 }
3084 }
3085 break;
3086
3087 case N_OSO:
3088 // object file name: name,,0,0,st_mtime
3089 type = eSymbolTypeObjectFile;
3090 break;
3091
3092 case N_LSYM:
3093 // local sym: name,,NO_SECT,type,offset
3094 type = eSymbolTypeLocal;
3095 break;
3096
3097 // INCL scopes
3098 case N_BINCL:
3099 // include file beginning: name,,NO_SECT,0,sum We use
3100 // the current number of symbols in the symbol table
3101 // in lieu of using nlist_idx in case we ever start
3102 // trimming entries out
3103 N_INCL_indexes.push_back(sym_idx);
3104 type = eSymbolTypeScopeBegin;
3105 break;
3106
3107 case N_EINCL:
3108 // include file end: name,,NO_SECT,0,0
3109 // Set the size of the N_BINCL to the terminating
3110 // index of this N_EINCL so that we can always skip
3111 // the entire symbol if we need to navigate more
3112 // quickly at the source level when parsing STABS
3113 if (!N_INCL_indexes.empty()) {
3114 symbol_ptr =
3115 symtab.SymbolAtIndex(N_INCL_indexes.back());
3116 symbol_ptr->SetByteSize(sym_idx + 1);
3117 symbol_ptr->SetSizeIsSibling(true);
3118 N_INCL_indexes.pop_back();
3119 }
3120 type = eSymbolTypeScopeEnd;
3121 break;
3122
3123 case N_SOL:
3124 // #included file name: name,,n_sect,0,address
3125 type = eSymbolTypeHeaderFile;
3126
3127 // We currently don't use the header files on darwin
3128 add_nlist = false;
3129 break;
3130
3131 case N_PARAMS:
3132 // compiler parameters: name,,NO_SECT,0,0
3133 type = eSymbolTypeCompiler;
3134 break;
3135
3136 case N_VERSION:
3137 // compiler version: name,,NO_SECT,0,0
3138 type = eSymbolTypeCompiler;
3139 break;
3140
3141 case N_OLEVEL:
3142 // compiler -O level: name,,NO_SECT,0,0
3143 type = eSymbolTypeCompiler;
3144 break;
3145
3146 case N_PSYM:
3147 // parameter: name,,NO_SECT,type,offset
3148 type = eSymbolTypeVariable;
3149 break;
3150
3151 case N_ENTRY:
3152 // alternate entry: name,,n_sect,linenumber,address
3153 symbol_section = section_info.GetSection(nlist.n_sect,
3154 nlist.n_value);
3155 type = eSymbolTypeLineEntry;
3156 break;
3157
3158 // Left and Right Braces
3159 case N_LBRAC:
3160 // left bracket: 0,,NO_SECT,nesting level,address We
3161 // use the current number of symbols in the symbol
3162 // table in lieu of using nlist_idx in case we ever
3163 // start trimming entries out
3164 symbol_section = section_info.GetSection(nlist.n_sect,
3165 nlist.n_value);
3166 N_BRAC_indexes.push_back(sym_idx);
3167 type = eSymbolTypeScopeBegin;
3168 break;
3169
3170 case N_RBRAC:
3171 // right bracket: 0,,NO_SECT,nesting level,address
3172 // Set the size of the N_LBRAC to the terminating
3173 // index of this N_RBRAC so that we can always skip
3174 // the entire symbol if we need to navigate more
3175 // quickly at the source level when parsing STABS
3176 symbol_section = section_info.GetSection(nlist.n_sect,
3177 nlist.n_value);
3178 if (!N_BRAC_indexes.empty()) {
3179 symbol_ptr =
3180 symtab.SymbolAtIndex(N_BRAC_indexes.back());
3181 symbol_ptr->SetByteSize(sym_idx + 1);
3182 symbol_ptr->SetSizeIsSibling(true);
3183 N_BRAC_indexes.pop_back();
3184 }
3185 type = eSymbolTypeScopeEnd;
3186 break;
3187
3188 case N_EXCL:
3189 // deleted include file: name,,NO_SECT,0,sum
3190 type = eSymbolTypeHeaderFile;
3191 break;
3192
3193 // COMM scopes
3194 case N_BCOMM:
3195 // begin common: name,,NO_SECT,0,0
3196 // We use the current number of symbols in the symbol
3197 // table in lieu of using nlist_idx in case we ever
3198 // start trimming entries out
3199 type = eSymbolTypeScopeBegin;
3200 N_COMM_indexes.push_back(sym_idx);
3201 break;
3202
3203 case N_ECOML:
3204 // end common (local name): 0,,n_sect,0,address
3205 symbol_section = section_info.GetSection(nlist.n_sect,
3206 nlist.n_value);
3207 // Fall through
3208
3209 case N_ECOMM:
3210 // end common: name,,n_sect,0,0
3211 // Set the size of the N_BCOMM to the terminating
3212 // index of this N_ECOMM/N_ECOML so that we can
3213 // always skip the entire symbol if we need to
3214 // navigate more quickly at the source level when
3215 // parsing STABS
3216 if (!N_COMM_indexes.empty()) {
3217 symbol_ptr =
3218 symtab.SymbolAtIndex(N_COMM_indexes.back());
3219 symbol_ptr->SetByteSize(sym_idx + 1);
3220 symbol_ptr->SetSizeIsSibling(true);
3221 N_COMM_indexes.pop_back();
3222 }
3223 type = eSymbolTypeScopeEnd;
3224 break;
3225
3226 case N_LENG:
3227 // second stab entry with length information
3228 type = eSymbolTypeAdditional;
3229 break;
3230
3231 default:
3232 break;
3233 }
3234 } else {
3235 // uint8_t n_pext = N_PEXT & nlist.n_type;
3236 uint8_t n_type = N_TYPE & nlist.n_type;
3237 sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
3238
3239 switch (n_type) {
3240 case N_INDR: {
3241 const char *reexport_name_cstr =
3242 strtab_data.PeekCStr(nlist.n_value);
3243 if (reexport_name_cstr && reexport_name_cstr[0]) {
3244 type = eSymbolTypeReExported;
3245 ConstString reexport_name(
3246 reexport_name_cstr +
3247 ((reexport_name_cstr[0] == '_') ? 1 : 0));
3248 sym[sym_idx].SetReExportedSymbolName(reexport_name);
3249 set_value = false;
3250 reexport_shlib_needs_fixup[sym_idx] = reexport_name;
3251 indirect_symbol_names.insert(ConstString(
3252 symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
3253 } else
3254 type = eSymbolTypeUndefined;
3255 } break;
3256
3257 case N_UNDF:
3258 if (symbol_name && symbol_name[0]) {
3259 ConstString undefined_name(
3260 symbol_name + ((symbol_name[0] == '_') ? 1 : 0));
3261 undefined_name_to_desc[undefined_name] = nlist.n_desc;
3262 }
3263 // Fall through
3264 case N_PBUD:
3265 type = eSymbolTypeUndefined;
3266 break;
3267
3268 case N_ABS:
3269 type = eSymbolTypeAbsolute;
3270 break;
3271
3272 case N_SECT: {
3273 symbol_section = section_info.GetSection(nlist.n_sect,
3274 nlist.n_value);
3275
3276 if (symbol_section == NULL) {
3277 // TODO: warn about this?
3278 add_nlist = false;
3279 break;
3280 }
3281
3282 if (TEXT_eh_frame_sectID == nlist.n_sect) {
3283 type = eSymbolTypeException;
3284 } else {
3285 uint32_t section_type =
3286 symbol_section->Get() & SECTION_TYPE;
3287
3288 switch (section_type) {
3289 case S_CSTRING_LITERALS:
3290 type = eSymbolTypeData;
3291 break; // section with only literal C strings
3292 case S_4BYTE_LITERALS:
3293 type = eSymbolTypeData;
3294 break; // section with only 4 byte literals
3295 case S_8BYTE_LITERALS:
3296 type = eSymbolTypeData;
3297 break; // section with only 8 byte literals
3298 case S_LITERAL_POINTERS:
3299 type = eSymbolTypeTrampoline;
3300 break; // section with only pointers to literals
3301 case S_NON_LAZY_SYMBOL_POINTERS:
3302 type = eSymbolTypeTrampoline;
3303 break; // section with only non-lazy symbol
3304 // pointers
3305 case S_LAZY_SYMBOL_POINTERS:
3306 type = eSymbolTypeTrampoline;
3307 break; // section with only lazy symbol pointers
3308 case S_SYMBOL_STUBS:
3309 type = eSymbolTypeTrampoline;
3310 break; // section with only symbol stubs, byte
3311 // size of stub in the reserved2 field
3312 case S_MOD_INIT_FUNC_POINTERS:
3313 type = eSymbolTypeCode;
3314 break; // section with only function pointers for
3315 // initialization
3316 case S_MOD_TERM_FUNC_POINTERS:
3317 type = eSymbolTypeCode;
3318 break; // section with only function pointers for
3319 // termination
3320 case S_INTERPOSING:
3321 type = eSymbolTypeTrampoline;
3322 break; // section with only pairs of function
3323 // pointers for interposing
3324 case S_16BYTE_LITERALS:
3325 type = eSymbolTypeData;
3326 break; // section with only 16 byte literals
3327 case S_DTRACE_DOF:
3329 break;
3330 case S_LAZY_DYLIB_SYMBOL_POINTERS:
3331 type = eSymbolTypeTrampoline;
3332 break;
3333 default:
3334 switch (symbol_section->GetType()) {
3336 type = eSymbolTypeCode;
3337 break;
3338 case eSectionTypeData:
3339 case eSectionTypeDataCString: // Inlined C string
3340 // data
3341 case eSectionTypeDataCStringPointers: // Pointers
3342 // to C
3343 // string
3344 // data
3345 case eSectionTypeDataSymbolAddress: // Address of
3346 // a symbol in
3347 // the symbol
3348 // table
3349 case eSectionTypeData4:
3350 case eSectionTypeData8:
3351 case eSectionTypeData16:
3352 type = eSymbolTypeData;
3353 break;
3354 default:
3355 break;
3356 }
3357 break;
3358 }
3359
3360 if (type == eSymbolTypeInvalid) {
3361 const char *symbol_sect_name =
3362 symbol_section->GetName().AsCString();
3363 if (symbol_section->IsDescendant(
3364 text_section_sp.get())) {
3365 if (symbol_section->IsClear(
3366 S_ATTR_PURE_INSTRUCTIONS |
3367 S_ATTR_SELF_MODIFYING_CODE |
3368 S_ATTR_SOME_INSTRUCTIONS))
3369 type = eSymbolTypeData;
3370 else
3371 type = eSymbolTypeCode;
3372 } else if (symbol_section->IsDescendant(
3373 data_section_sp.get()) ||
3374 symbol_section->IsDescendant(
3375 data_dirty_section_sp.get()) ||
3376 symbol_section->IsDescendant(
3377 data_const_section_sp.get())) {
3378 if (symbol_sect_name &&
3379 ::strstr(symbol_sect_name, "__objc") ==
3380 symbol_sect_name) {
3381 type = eSymbolTypeRuntime;
3382
3383 if (symbol_name) {
3384 llvm::StringRef symbol_name_ref(symbol_name);
3385 if (symbol_name_ref.startswith("_OBJC_")) {
3386 llvm::StringRef
3387 g_objc_v2_prefix_class(
3388 "_OBJC_CLASS_$_");
3389 llvm::StringRef
3390 g_objc_v2_prefix_metaclass(
3391 "_OBJC_METACLASS_$_");
3392 llvm::StringRef
3393 g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
3394 if (symbol_name_ref.startswith(
3395 g_objc_v2_prefix_class)) {
3396 symbol_name_non_abi_mangled =
3397 symbol_name + 1;
3398 symbol_name =
3399 symbol_name +
3400 g_objc_v2_prefix_class.size();
3401 type = eSymbolTypeObjCClass;
3402 demangled_is_synthesized = true;
3403 } else if (
3404 symbol_name_ref.startswith(
3405 g_objc_v2_prefix_metaclass)) {
3406 symbol_name_non_abi_mangled =
3407 symbol_name + 1;
3408 symbol_name =
3409 symbol_name +
3410 g_objc_v2_prefix_metaclass.size();
3412 demangled_is_synthesized = true;
3413 } else if (symbol_name_ref.startswith(
3414 g_objc_v2_prefix_ivar)) {
3415 symbol_name_non_abi_mangled =
3416 symbol_name + 1;
3417 symbol_name =
3418 symbol_name +
3419 g_objc_v2_prefix_ivar.size();
3420 type = eSymbolTypeObjCIVar;
3421 demangled_is_synthesized = true;
3422 }
3423 }
3424 }
3425 } else if (symbol_sect_name &&
3426 ::strstr(symbol_sect_name,
3427 "__gcc_except_tab") ==
3428 symbol_sect_name) {
3429 type = eSymbolTypeException;
3430 } else {
3431 type = eSymbolTypeData;
3432 }
3433 } else if (symbol_sect_name &&
3434 ::strstr(symbol_sect_name, "__IMPORT") ==
3435 symbol_sect_name) {
3436 type = eSymbolTypeTrampoline;
3437 } else if (symbol_section->IsDescendant(
3438 objc_section_sp.get())) {
3439 type = eSymbolTypeRuntime;
3440 if (symbol_name && symbol_name[0] == '.') {
3441 llvm::StringRef symbol_name_ref(symbol_name);
3442 llvm::StringRef
3443 g_objc_v1_prefix_class(".objc_class_name_");
3444 if (symbol_name_ref.startswith(
3445 g_objc_v1_prefix_class)) {
3446 symbol_name_non_abi_mangled = symbol_name;
3447 symbol_name = symbol_name +
3448 g_objc_v1_prefix_class.size();
3449 type = eSymbolTypeObjCClass;
3450 demangled_is_synthesized = true;
3451 }
3452 }
3453 }
3454 }
3455 }
3456 } break;
3457 }
3458 }
3459
3460 if (add_nlist) {
3461 uint64_t symbol_value = nlist.n_value;
3462 if (symbol_name_non_abi_mangled) {
3463 sym[sym_idx].GetMangled().SetMangledName(
3464 ConstString(symbol_name_non_abi_mangled));
3465 sym[sym_idx].GetMangled().SetDemangledName(
3466 ConstString(symbol_name));
3467 } else {
3468 if (symbol_name && symbol_name[0] == '_') {
3469 symbol_name++; // Skip the leading underscore
3470 }
3471
3472 if (symbol_name) {
3473 ConstString const_symbol_name(symbol_name);
3474 sym[sym_idx].GetMangled().SetValue(const_symbol_name);
3475 if (is_gsym && is_debug) {
3476 const char *gsym_name =
3477 sym[sym_idx]
3478 .GetMangled()
3480 .GetCString();
3481 if (gsym_name)
3482 N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
3483 }
3484 }
3485 }
3486 if (symbol_section) {
3487 const addr_t section_file_addr =
3488 symbol_section->GetFileAddress();
3489 if (symbol_byte_size == 0 &&
3490 function_starts_count > 0) {
3491 addr_t symbol_lookup_file_addr = nlist.n_value;
3492 // Do an exact address match for non-ARM addresses,
3493 // else get the closest since the symbol might be a
3494 // thumb symbol which has an address with bit zero
3495 // set
3496 FunctionStarts::Entry *func_start_entry =
3497 function_starts.FindEntry(symbol_lookup_file_addr,
3498 !is_arm);
3499 if (is_arm && func_start_entry) {
3500 // Verify that the function start address is the
3501 // symbol address (ARM) or the symbol address + 1
3502 // (thumb)
3503 if (func_start_entry->addr !=
3504 symbol_lookup_file_addr &&
3505 func_start_entry->addr !=
3506 (symbol_lookup_file_addr + 1)) {
3507 // Not the right entry, NULL it out...
3508 func_start_entry = NULL;
3509 }
3510 }
3511 if (func_start_entry) {
3512 func_start_entry->data = true;
3513
3514 addr_t symbol_file_addr = func_start_entry->addr;
3515 uint32_t symbol_flags = 0;
3516 if (is_arm) {
3517 if (symbol_file_addr & 1)
3518 symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
3519 symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3520 }
3521
3522 const FunctionStarts::Entry *next_func_start_entry =
3523 function_starts.FindNextEntry(func_start_entry);
3524 const addr_t section_end_file_addr =
3525 section_file_addr +
3526 symbol_section->GetByteSize();
3527 if (next_func_start_entry) {
3528 addr_t next_symbol_file_addr =
3529 next_func_start_entry->addr;
3530 // Be sure the clear the Thumb address bit when
3531 // we calculate the size from the current and
3532 // next address
3533 if (is_arm)
3534 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3535 symbol_byte_size = std::min<lldb::addr_t>(
3536 next_symbol_file_addr - symbol_file_addr,
3537 section_end_file_addr - symbol_file_addr);
3538 } else {
3539 symbol_byte_size =
3540 section_end_file_addr - symbol_file_addr;
3541 }
3542 }
3543 }
3544 symbol_value -= section_file_addr;
3545 }
3546
3547 if (is_debug == false) {
3548 if (type == eSymbolTypeCode) {
3549 // See if we can find a N_FUN entry for any code
3550 // symbols. If we do find a match, and the name
3551 // matches, then we can merge the two into just the
3552 // function symbol to avoid duplicate entries in
3553 // the symbol table
3554 auto range =
3555 N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
3556 if (range.first != range.second) {
3557 bool found_it = false;
3558 for (auto pos = range.first; pos != range.second;
3559 ++pos) {
3560 if (sym[sym_idx].GetMangled().GetName(
3562 sym[pos->second].GetMangled().GetName(
3564 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3565 // We just need the flags from the linker
3566 // symbol, so put these flags
3567 // into the N_FUN flags to avoid duplicate
3568 // symbols in the symbol table
3569 sym[pos->second].SetExternal(
3570 sym[sym_idx].IsExternal());
3571 sym[pos->second].SetFlags(nlist.n_type << 16 |
3572 nlist.n_desc);
3573 if (resolver_addresses.find(nlist.n_value) !=
3574 resolver_addresses.end())
3575 sym[pos->second].SetType(eSymbolTypeResolver);
3576 sym[sym_idx].Clear();
3577 found_it = true;
3578 break;
3579 }
3580 }
3581 if (found_it)
3582 continue;
3583 } else {
3584 if (resolver_addresses.find(nlist.n_value) !=
3585 resolver_addresses.end())
3586 type = eSymbolTypeResolver;
3587 }
3588 } else if (type == eSymbolTypeData ||
3589 type == eSymbolTypeObjCClass ||
3590 type == eSymbolTypeObjCMetaClass ||
3591 type == eSymbolTypeObjCIVar) {
3592 // See if we can find a N_STSYM entry for any data
3593 // symbols. If we do find a match, and the name
3594 // matches, then we can merge the two into just the
3595 // Static symbol to avoid duplicate entries in the
3596 // symbol table
3597 auto range = N_STSYM_addr_to_sym_idx.equal_range(
3598 nlist.n_value);
3599 if (range.first != range.second) {
3600 bool found_it = false;
3601 for (auto pos = range.first; pos != range.second;
3602 ++pos) {
3603 if (sym[sym_idx].GetMangled().GetName(
3605 sym[pos->second].GetMangled().GetName(
3607 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3608 // We just need the flags from the linker
3609 // symbol, so put these flags
3610 // into the N_STSYM flags to avoid duplicate
3611 // symbols in the symbol table
3612 sym[pos->second].SetExternal(
3613 sym[sym_idx].IsExternal());
3614 sym[pos->second].SetFlags(nlist.n_type << 16 |
3615 nlist.n_desc);
3616 sym[sym_idx].Clear();
3617 found_it = true;
3618 break;
3619 }
3620 }
3621 if (found_it)
3622 continue;
3623 } else {
3624 const char *gsym_name =
3625 sym[sym_idx]
3626 .GetMangled()
3628 .GetCString();
3629 if (gsym_name) {
3630 // Combine N_GSYM stab entries with the non
3631 // stab symbol
3632 ConstNameToSymbolIndexMap::const_iterator pos =
3633 N_GSYM_name_to_sym_idx.find(gsym_name);
3634 if (pos != N_GSYM_name_to_sym_idx.end()) {
3635 const uint32_t GSYM_sym_idx = pos->second;
3636 m_nlist_idx_to_sym_idx[nlist_idx] =
3637 GSYM_sym_idx;
3638 // Copy the address, because often the N_GSYM
3639 // address has an invalid address of zero
3640 // when the global is a common symbol
3641 sym[GSYM_sym_idx].GetAddressRef().SetSection(
3642 symbol_section);
3643 sym[GSYM_sym_idx].GetAddressRef().SetOffset(
3644 symbol_value);
3645 add_symbol_addr(sym[GSYM_sym_idx]
3646 .GetAddress()
3647 .GetFileAddress());
3648 // We just need the flags from the linker
3649 // symbol, so put these flags
3650 // into the N_GSYM flags to avoid duplicate
3651 // symbols in the symbol table
3652 sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 |
3653 nlist.n_desc);
3654 sym[sym_idx].Clear();
3655 continue;
3656 }
3657 }
3658 }
3659 }
3660 }
3661
3662 sym[sym_idx].SetID(nlist_idx);
3663 sym[sym_idx].SetType(type);
3664 if (set_value) {
3665 sym[sym_idx].GetAddressRef().SetSection(symbol_section);
3666 sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
3667 add_symbol_addr(
3668 sym[sym_idx].GetAddress().GetFileAddress());
3669 }
3670 sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
3671
3672 if (symbol_byte_size > 0)
3673 sym[sym_idx].SetByteSize(symbol_byte_size);
3674
3675 if (demangled_is_synthesized)
3676 sym[sym_idx].SetDemangledNameIsSynthesized(true);
3677 ++sym_idx;
3678 } else {
3679 sym[sym_idx].Clear();
3680 }
3681 }
3682 /////////////////////////////
3683 }
3684 }
3685
3686 for (const auto &pos : reexport_shlib_needs_fixup) {
3687 const auto undef_pos = undefined_name_to_desc.find(pos.second);
3688 if (undef_pos != undefined_name_to_desc.end()) {
3689 const uint8_t dylib_ordinal =
3690 llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
3691 if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
3692 sym[pos.first].SetReExportedSymbolSharedLibrary(
3693 dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
3694 }
3695 }
3696 }
3697
3698#endif
3699 lldb::offset_t nlist_data_offset = 0;
3700
3701 if (nlist_data.GetByteSize() > 0) {
3702
3703 // If the sym array was not created while parsing the DSC unmapped
3704 // symbols, create it now.
3705 if (sym == nullptr) {
3706 sym =
3707 symtab.Resize(symtab_load_command.nsyms + m_dysymtab.nindirectsyms);
3708 num_syms = symtab.GetNumSymbols();
3709 }
3710
3711 if (unmapped_local_symbols_found) {
3712 assert(m_dysymtab.ilocalsym == 0);
3713 nlist_data_offset += (m_dysymtab.nlocalsym * nlist_byte_size);
3714 nlist_idx = m_dysymtab.nlocalsym;
3715 } else {
3716 nlist_idx = 0;
3717 }
3718
3719 typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
3720 typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
3721 UndefinedNameToDescMap undefined_name_to_desc;
3722 SymbolIndexToName reexport_shlib_needs_fixup;
3723
3724 // Symtab parsing is a huge mess. Everything is entangled and the code
3725 // requires access to a ridiculous amount of variables. LLDB depends
3726 // heavily on the proper merging of symbols and to get that right we need
3727 // to make sure we have parsed all the debug symbols first. Therefore we
3728 // invoke the lambda twice, once to parse only the debug symbols and then
3729 // once more to parse the remaining symbols.
3730 auto ParseSymbolLambda = [&](struct nlist_64 &nlist, uint32_t nlist_idx,
3731 bool debug_only) {
3732 const bool is_debug = ((nlist.n_type & N_STAB) != 0);
3733 if (is_debug != debug_only)
3734 return true;
3735
3736 const char *symbol_name_non_abi_mangled = nullptr;
3737 const char *symbol_name = nullptr;
3738
3739 if (have_strtab_data) {
3740 symbol_name = strtab_data.PeekCStr(nlist.n_strx);
3741
3742 if (symbol_name == nullptr) {
3743 // No symbol should be NULL, even the symbols with no string values
3744 // should have an offset zero which points to an empty C-string
3745 Debugger::ReportError(llvm::formatv(
3746 "symbol[{0}] has invalid string table offset {1:x} in {2}, "
3747 "ignoring symbol",
3748 nlist_idx, nlist.n_strx, module_sp->GetFileSpec().GetPath()));
3749 return true;
3750 }
3751 if (symbol_name[0] == '\0')
3752 symbol_name = nullptr;
3753 } else {
3754 const addr_t str_addr = strtab_addr + nlist.n_strx;
3755 Status str_error;
3756 if (process->ReadCStringFromMemory(str_addr, memory_symbol_name,
3757 str_error))
3758 symbol_name = memory_symbol_name.c_str();
3759 }
3760
3762 SectionSP symbol_section;
3763 lldb::addr_t symbol_byte_size = 0;
3764 bool add_nlist = true;
3765 bool is_gsym = false;
3766 bool demangled_is_synthesized = false;
3767 bool set_value = true;
3768
3769 assert(sym_idx < num_syms);
3770 sym[sym_idx].SetDebug(is_debug);
3771
3772 if (is_debug) {
3773 switch (nlist.n_type) {
3774 case N_GSYM:
3775 // global symbol: name,,NO_SECT,type,0
3776 // Sometimes the N_GSYM value contains the address.
3777
3778 // FIXME: In the .o files, we have a GSYM and a debug symbol for all
3779 // the ObjC data. They
3780 // have the same address, but we want to ensure that we always find
3781 // only the real symbol, 'cause we don't currently correctly
3782 // attribute the GSYM one to the ObjCClass/Ivar/MetaClass symbol
3783 // type. This is a temporary hack to make sure the ObjectiveC
3784 // symbols get treated correctly. To do this right, we should
3785 // coalesce all the GSYM & global symbols that have the same
3786 // address.
3787 is_gsym = true;
3788 sym[sym_idx].SetExternal(true);
3789
3790 if (symbol_name && symbol_name[0] == '_' && symbol_name[1] == 'O') {
3791 llvm::StringRef symbol_name_ref(symbol_name);
3792 if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
3793 symbol_name_non_abi_mangled = symbol_name + 1;
3794 symbol_name = symbol_name + g_objc_v2_prefix_class.size();
3795 type = eSymbolTypeObjCClass;
3796 demangled_is_synthesized = true;
3797
3798 } else if (symbol_name_ref.startswith(g_objc_v2_prefix_metaclass)) {
3799 symbol_name_non_abi_mangled = symbol_name + 1;
3800 symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
3802 demangled_is_synthesized = true;
3803 } else if (symbol_name_ref.startswith(g_objc_v2_prefix_ivar)) {
3804 symbol_name_non_abi_mangled = symbol_name + 1;
3805 symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
3806 type = eSymbolTypeObjCIVar;
3807 demangled_is_synthesized = true;
3808 }
3809 } else {
3810 if (nlist.n_value != 0)
3811 symbol_section =
3812 section_info.GetSection(nlist.n_sect, nlist.n_value);
3813 type = eSymbolTypeData;
3814 }
3815 break;
3816
3817 case N_FNAME:
3818 // procedure name (f77 kludge): name,,NO_SECT,0,0
3819 type = eSymbolTypeCompiler;
3820 break;
3821
3822 case N_FUN:
3823 // procedure: name,,n_sect,linenumber,address
3824 if (symbol_name) {
3825 type = eSymbolTypeCode;
3826 symbol_section =
3827 section_info.GetSection(nlist.n_sect, nlist.n_value);
3828
3829 N_FUN_addr_to_sym_idx.insert(
3830 std::make_pair(nlist.n_value, sym_idx));
3831 // We use the current number of symbols in the symbol table in
3832 // lieu of using nlist_idx in case we ever start trimming entries
3833 // out
3834 N_FUN_indexes.push_back(sym_idx);
3835 } else {
3836 type = eSymbolTypeCompiler;
3837
3838 if (!N_FUN_indexes.empty()) {
3839 // Copy the size of the function into the original STAB entry
3840 // so we don't have to hunt for it later
3841 symtab.SymbolAtIndex(N_FUN_indexes.back())
3842 ->SetByteSize(nlist.n_value);
3843 N_FUN_indexes.pop_back();
3844 // We don't really need the end function STAB as it contains
3845 // the size which we already placed with the original symbol,
3846 // so don't add it if we want a minimal symbol table
3847 add_nlist = false;
3848 }
3849 }
3850 break;
3851
3852 case N_STSYM:
3853 // static symbol: name,,n_sect,type,address
3854 N_STSYM_addr_to_sym_idx.insert(
3855 std::make_pair(nlist.n_value, sym_idx));
3856 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3857 if (symbol_name && symbol_name[0]) {
3858 type = ObjectFile::GetSymbolTypeFromName(symbol_name + 1,
3860 }
3861 break;
3862
3863 case N_LCSYM:
3864 // .lcomm symbol: name,,n_sect,type,address
3865 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3867 break;
3868
3869 case N_BNSYM:
3870 // We use the current number of symbols in the symbol table in lieu
3871 // of using nlist_idx in case we ever start trimming entries out
3872 // Skip these if we want minimal symbol tables
3873 add_nlist = false;
3874 break;
3875
3876 case N_ENSYM:
3877 // Set the size of the N_BNSYM to the terminating index of this
3878 // N_ENSYM so that we can always skip the entire symbol if we need
3879 // to navigate more quickly at the source level when parsing STABS
3880 // Skip these if we want minimal symbol tables
3881 add_nlist = false;
3882 break;
3883
3884 case N_OPT:
3885 // emitted with gcc2_compiled and in gcc source
3886 type = eSymbolTypeCompiler;
3887 break;
3888
3889 case N_RSYM:
3890 // register sym: name,,NO_SECT,type,register
3891 type = eSymbolTypeVariable;
3892 break;
3893
3894 case N_SLINE:
3895 // src line: 0,,n_sect,linenumber,address
3896 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3897 type = eSymbolTypeLineEntry;
3898 break;
3899
3900 case N_SSYM:
3901 // structure elt: name,,NO_SECT,type,struct_offset
3903 break;
3904
3905 case N_SO:
3906 // source file name
3907 type = eSymbolTypeSourceFile;
3908 if (symbol_name == nullptr) {
3909 add_nlist = false;
3910 if (N_SO_index != UINT32_MAX) {
3911 // Set the size of the N_SO to the terminating index of this
3912 // N_SO so that we can always skip the entire N_SO if we need
3913 // to navigate more quickly at the source level when parsing
3914 // STABS
3915 symbol_ptr = symtab.SymbolAtIndex(N_SO_index);
3916 symbol_ptr->SetByteSize(sym_idx);
3917 symbol_ptr->SetSizeIsSibling(true);
3918 }
3919 N_NSYM_indexes.clear();
3920 N_INCL_indexes.clear();
3921 N_BRAC_indexes.clear();
3922 N_COMM_indexes.clear();
3923 N_FUN_indexes.clear();
3924 N_SO_index = UINT32_MAX;
3925 } else {
3926 // We use the current number of symbols in the symbol table in
3927 // lieu of using nlist_idx in case we ever start trimming entries
3928 // out
3929 const bool N_SO_has_full_path = symbol_name[0] == '/';
3930 if (N_SO_has_full_path) {
3931 if ((N_SO_index == sym_idx - 1) && ((sym_idx - 1) < num_syms)) {
3932 // We have two consecutive N_SO entries where the first
3933 // contains a directory and the second contains a full path.
3934 sym[sym_idx - 1].GetMangled().SetValue(
3935 ConstString(symbol_name));
3936 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3937 add_nlist = false;
3938 } else {
3939 // This is the first entry in a N_SO that contains a
3940 // directory or a full path to the source file
3941 N_SO_index = sym_idx;
3942 }
3943 } else if ((N_SO_index == sym_idx - 1) &&
3944 ((sym_idx - 1) < num_syms)) {
3945 // This is usually the second N_SO entry that contains just the
3946 // filename, so here we combine it with the first one if we are
3947 // minimizing the symbol table
3948 const char *so_path =
3949 sym[sym_idx - 1].GetMangled().GetDemangledName().AsCString();
3950 if (so_path && so_path[0]) {
3951 std::string full_so_path(so_path);
3952 const size_t double_slash_pos = full_so_path.find("//");
3953 if (double_slash_pos != std::string::npos) {
3954 // The linker has been generating bad N_SO entries with
3955 // doubled up paths in the format "%s%s" where the first
3956 // string in the DW_AT_comp_dir, and the second is the
3957 // directory for the source file so you end up with a path
3958 // that looks like "/tmp/src//tmp/src/"
3959 FileSpec so_dir(so_path);
3960 if (!FileSystem::Instance().Exists(so_dir)) {
3961 so_dir.SetFile(&full_so_path[double_slash_pos + 1],
3962 FileSpec::Style::native);
3963 if (FileSystem::Instance().Exists(so_dir)) {
3964 // Trim off the incorrect path
3965 full_so_path.erase(0, double_slash_pos + 1);
3966 }
3967 }
3968 }
3969 if (*full_so_path.rbegin() != '/')
3970 full_so_path += '/';
3971 full_so_path += symbol_name;
3972 sym[sym_idx - 1].GetMangled().SetValue(
3973 ConstString(full_so_path.c_str()));
3974 add_nlist = false;
3975 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3976 }
3977 } else {
3978 // This could be a relative path to a N_SO
3979 N_SO_index = sym_idx;
3980 }
3981 }
3982 break;
3983
3984 case N_OSO:
3985 // object file name: name,,0,0,st_mtime
3986 type = eSymbolTypeObjectFile;
3987 break;
3988
3989 case N_LSYM:
3990 // local sym: name,,NO_SECT,type,offset
3991 type = eSymbolTypeLocal;
3992 break;
3993
3994 // INCL scopes
3995 case N_BINCL:
3996 // include file beginning: name,,NO_SECT,0,sum We use the current
3997 // number of symbols in the symbol table in lieu of using nlist_idx
3998 // in case we ever start trimming entries out
3999 N_INCL_indexes.push_back(sym_idx);
4000 type = eSymbolTypeScopeBegin;
4001 break;
4002
4003 case N_EINCL:
4004 // include file end: name,,NO_SECT,0,0
4005 // Set the size of the N_BINCL to the terminating index of this
4006 // N_EINCL so that we can always skip the entire symbol if we need
4007 // to navigate more quickly at the source level when parsing STABS
4008 if (!N_INCL_indexes.empty()) {
4009 symbol_ptr = symtab.SymbolAtIndex(N_INCL_indexes.back());
4010 symbol_ptr->SetByteSize(sym_idx + 1);
4011 symbol_ptr->SetSizeIsSibling(true);
4012 N_INCL_indexes.pop_back();
4013 }
4014 type = eSymbolTypeScopeEnd;
4015 break;
4016
4017 case N_SOL:
4018 // #included file name: name,,n_sect,0,address
4019 type = eSymbolTypeHeaderFile;
4020
4021 // We currently don't use the header files on darwin
4022 add_nlist = false;
4023 break;
4024
4025 case N_PARAMS:
4026 // compiler parameters: name,,NO_SECT,0,0
4027 type = eSymbolTypeCompiler;
4028 break;
4029
4030 case N_VERSION:
4031 // compiler version: name,,NO_SECT,0,0
4032 type = eSymbolTypeCompiler;
4033 break;
4034
4035 case N_OLEVEL:
4036 // compiler -O level: name,,NO_SECT,0,0
4037 type = eSymbolTypeCompiler;
4038 break;
4039
4040 case N_PSYM:
4041 // parameter: name,,NO_SECT,type,offset
4042 type = eSymbolTypeVariable;
4043 break;
4044
4045 case N_ENTRY:
4046 // alternate entry: name,,n_sect,linenumber,address
4047 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4048 type = eSymbolTypeLineEntry;
4049 break;
4050
4051 // Left and Right Braces
4052 case N_LBRAC:
4053 // left bracket: 0,,NO_SECT,nesting level,address We use the
4054 // current number of symbols in the symbol table in lieu of using
4055 // nlist_idx in case we ever start trimming entries out
4056 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4057 N_BRAC_indexes.push_back(sym_idx);
4058 type = eSymbolTypeScopeBegin;
4059 break;
4060
4061 case N_RBRAC:
4062 // right bracket: 0,,NO_SECT,nesting level,address Set the size of
4063 // the N_LBRAC to the terminating index of this N_RBRAC so that we
4064 // can always skip the entire symbol if we need to navigate more
4065 // quickly at the source level when parsing STABS
4066 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4067 if (!N_BRAC_indexes.empty()) {
4068 symbol_ptr = symtab.SymbolAtIndex(N_BRAC_indexes.back());
4069 symbol_ptr->SetByteSize(sym_idx + 1);
4070 symbol_ptr->SetSizeIsSibling(true);
4071 N_BRAC_indexes.pop_back();
4072 }
4073 type = eSymbolTypeScopeEnd;
4074 break;
4075
4076 case N_EXCL:
4077 // deleted include file: name,,NO_SECT,0,sum
4078 type = eSymbolTypeHeaderFile;
4079 break;
4080
4081 // COMM scopes
4082 case N_BCOMM:
4083 // begin common: name,,NO_SECT,0,0
4084 // We use the current number of symbols in the symbol table in lieu
4085 // of using nlist_idx in case we ever start trimming entries out
4086 type = eSymbolTypeScopeBegin;
4087 N_COMM_indexes.push_back(sym_idx);
4088 break;
4089
4090 case N_ECOML:
4091 // end common (local name): 0,,n_sect,0,address
4092 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4093 [[fallthrough]];
4094
4095 case N_ECOMM:
4096 // end common: name,,n_sect,0,0
4097 // Set the size of the N_BCOMM to the terminating index of this
4098 // N_ECOMM/N_ECOML so that we can always skip the entire symbol if
4099 // we need to navigate more quickly at the source level when
4100 // parsing STABS
4101 if (!N_COMM_indexes.empty()) {
4102 symbol_ptr = symtab.SymbolAtIndex(N_COMM_indexes.back());
4103 symbol_ptr->SetByteSize(sym_idx + 1);
4104 symbol_ptr->SetSizeIsSibling(true);
4105 N_COMM_indexes.pop_back();
4106 }
4107 type = eSymbolTypeScopeEnd;
4108 break;
4109
4110 case N_LENG:
4111 // second stab entry with length information
4112 type = eSymbolTypeAdditional;
4113 break;
4114
4115 default:
4116 break;
4117 }
4118 } else {
4119 uint8_t n_type = N_TYPE & nlist.n_type;
4120 sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
4121
4122 switch (n_type) {
4123 case N_INDR: {
4124 const char *reexport_name_cstr = strtab_data.PeekCStr(nlist.n_value);
4125 if (reexport_name_cstr && reexport_name_cstr[0] && symbol_name) {
4126 type = eSymbolTypeReExported;
4127 ConstString reexport_name(reexport_name_cstr +
4128 ((reexport_name_cstr[0] == '_') ? 1 : 0));
4129 sym[sym_idx].SetReExportedSymbolName(reexport_name);
4130 set_value = false;
4131 reexport_shlib_needs_fixup[sym_idx] = reexport_name;
4132 indirect_symbol_names.insert(
4133 ConstString(symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
4134 } else
4135 type = eSymbolTypeUndefined;
4136 } break;
4137
4138 case N_UNDF:
4139 if (symbol_name && symbol_name[0]) {
4140 ConstString undefined_name(symbol_name +
4141 ((symbol_name[0] == '_') ? 1 : 0));
4142 undefined_name_to_desc[undefined_name] = nlist.n_desc;
4143 }
4144 [[fallthrough]];
4145
4146 case N_PBUD:
4147 type = eSymbolTypeUndefined;
4148 break;
4149
4150 case N_ABS:
4151 type = eSymbolTypeAbsolute;
4152 break;
4153
4154 case N_SECT: {
4155 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4156
4157 if (!symbol_section) {
4158 // TODO: warn about this?
4159 add_nlist = false;
4160 break;
4161 }
4162
4163 if (TEXT_eh_frame_sectID == nlist.n_sect) {
4164 type = eSymbolTypeException;
4165 } else {
4166 uint32_t section_type = symbol_section->Get() & SECTION_TYPE;
4167
4168 switch (section_type) {
4169 case S_CSTRING_LITERALS:
4170 type = eSymbolTypeData;
4171 break; // section with only literal C strings
4172 case S_4BYTE_LITERALS:
4173 type = eSymbolTypeData;
4174 break; // section with only 4 byte literals
4175 case S_8BYTE_LITERALS:
4176 type = eSymbolTypeData;
4177 break; // section with only 8 byte literals
4178 case S_LITERAL_POINTERS:
4179 type = eSymbolTypeTrampoline;
4180 break; // section with only pointers to literals
4181 case S_NON_LAZY_SYMBOL_POINTERS:
4182 type = eSymbolTypeTrampoline;
4183 break; // section with only non-lazy symbol pointers
4184 case S_LAZY_SYMBOL_POINTERS:
4185 type = eSymbolTypeTrampoline;
4186 break; // section with only lazy symbol pointers
4187 case S_SYMBOL_STUBS:
4188 type = eSymbolTypeTrampoline;
4189 break; // section with only symbol stubs, byte size of stub in
4190 // the reserved2 field
4191 case S_MOD_INIT_FUNC_POINTERS:
4192 type = eSymbolTypeCode;
4193 break; // section with only function pointers for initialization
4194 case S_MOD_TERM_FUNC_POINTERS:
4195 type = eSymbolTypeCode;
4196 break; // section with only function pointers for termination
4197 case S_INTERPOSING:
4198 type = eSymbolTypeTrampoline;
4199 break; // section with only pairs of function pointers for
4200 // interposing
4201 case S_16BYTE_LITERALS:
4202 type = eSymbolTypeData;
4203 break; // section with only 16 byte literals
4204 case S_DTRACE_DOF:
4206 break;
4207 case S_LAZY_DYLIB_SYMBOL_POINTERS:
4208 type = eSymbolTypeTrampoline;
4209 break;
4210 default:
4211 switch (symbol_section->GetType()) {
4213 type = eSymbolTypeCode;
4214 break;
4215 case eSectionTypeData:
4216 case eSectionTypeDataCString: // Inlined C string data
4217 case eSectionTypeDataCStringPointers: // Pointers to C string
4218 // data
4219 case eSectionTypeDataSymbolAddress: // Address of a symbol in
4220 // the symbol table
4221 case eSectionTypeData4:
4222 case eSectionTypeData8:
4223 case eSectionTypeData16:
4224 type = eSymbolTypeData;
4225 break;
4226 default:
4227 break;
4228 }
4229 break;
4230 }
4231
4232 if (type == eSymbolTypeInvalid) {
4233 const char *symbol_sect_name =
4234 symbol_section->GetName().AsCString();
4235 if (symbol_section->IsDescendant(text_section_sp.get())) {
4236 if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS |
4237 S_ATTR_SELF_MODIFYING_CODE |
4238 S_ATTR_SOME_INSTRUCTIONS))
4239 type = eSymbolTypeData;
4240 else
4241 type = eSymbolTypeCode;
4242 } else if (symbol_section->IsDescendant(data_section_sp.get()) ||
4243 symbol_section->IsDescendant(
4244 data_dirty_section_sp.get()) ||
4245 symbol_section->IsDescendant(
4246 data_const_section_sp.get())) {
4247 if (symbol_sect_name &&
4248 ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) {
4249 type = eSymbolTypeRuntime;
4250
4251 if (symbol_name) {
4252 llvm::StringRef symbol_name_ref(symbol_name);
4253 if (symbol_name_ref.startswith("_OBJC_")) {
4254 llvm::StringRef g_objc_v2_prefix_class(
4255 "_OBJC_CLASS_$_");
4256 llvm::StringRef g_objc_v2_prefix_metaclass(
4257 "_OBJC_METACLASS_$_");
4258 llvm::StringRef g_objc_v2_prefix_ivar(
4259 "_OBJC_IVAR_$_");
4260 if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
4261 symbol_name_non_abi_mangled = symbol_name + 1;
4262 symbol_name =
4263 symbol_name + g_objc_v2_prefix_class.size();
4264 type = eSymbolTypeObjCClass;
4265 demangled_is_synthesized = true;
4266 } else if (symbol_name_ref.startswith(
4267 g_objc_v2_prefix_metaclass)) {
4268 symbol_name_non_abi_mangled = symbol_name + 1;
4269 symbol_name =
4270 symbol_name + g_objc_v2_prefix_metaclass.size();
4272 demangled_is_synthesized = true;
4273 } else if (symbol_name_ref.startswith(
4274 g_objc_v2_prefix_ivar)) {
4275 symbol_name_non_abi_mangled = symbol_name + 1;
4276 symbol_name =
4277 symbol_name + g_objc_v2_prefix_ivar.size();
4278 type = eSymbolTypeObjCIVar;
4279 demangled_is_synthesized = true;
4280 }
4281 }
4282 }
4283 } else if (symbol_sect_name &&
4284 ::strstr(symbol_sect_name, "__gcc_except_tab") ==
4285 symbol_sect_name) {
4286 type = eSymbolTypeException;
4287 } else {
4288 type = eSymbolTypeData;
4289 }
4290 } else if (symbol_sect_name &&
4291 ::strstr(symbol_sect_name, "__IMPORT") ==
4292 symbol_sect_name) {
4293 type = eSymbolTypeTrampoline;
4294 } else if (symbol_section->IsDescendant(objc_section_sp.get())) {
4295 type = eSymbolTypeRuntime;
4296 if (symbol_name && symbol_name[0] == '.') {
4297 llvm::StringRef symbol_name_ref(symbol_name);
4298 llvm::StringRef g_objc_v1_prefix_class(
4299 ".objc_class_name_");
4300 if (symbol_name_ref.startswith(g_objc_v1_prefix_class)) {
4301 symbol_name_non_abi_mangled = symbol_name;
4302 symbol_name = symbol_name + g_objc_v1_prefix_class.size();
4303 type = eSymbolTypeObjCClass;
4304 demangled_is_synthesized = true;
4305 }
4306 }
4307 }
4308 }
4309 }
4310 } break;
4311 }
4312 }
4313
4314 if (!add_nlist) {
4315 sym[sym_idx].Clear();
4316 return true;
4317 }
4318
4319 uint64_t symbol_value = nlist.n_value;
4320
4321 if (symbol_name_non_abi_mangled) {
4322 sym[sym_idx].GetMangled().SetMangledName(
4323 ConstString(symbol_name_non_abi_mangled));
4324 sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name));
4325 } else {
4326
4327 if (symbol_name && symbol_name[0] == '_') {
4328 symbol_name++; // Skip the leading underscore
4329 }
4330
4331 if (symbol_name) {
4332 ConstString const_symbol_name(symbol_name);
4333 sym[sym_idx].GetMangled().SetValue(const_symbol_name);
4334 }
4335 }
4336
4337 if (is_gsym) {
4338 const char *gsym_name = sym[sym_idx]
4339 .GetMangled()
4341 .GetCString();
4342 if (gsym_name)
4343 N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
4344 }
4345
4346 if (symbol_section) {
4347 const addr_t section_file_addr = symbol_section->GetFileAddress();
4348 if (symbol_byte_size == 0 && function_starts_count > 0) {
4349 addr_t symbol_lookup_file_addr = nlist.n_value;
4350 // Do an exact address match for non-ARM addresses, else get the
4351 // closest since the symbol might be a thumb symbol which has an
4352 // address with bit zero set.
4353 FunctionStarts::Entry *func_start_entry =
4354 function_starts.FindEntry(symbol_lookup_file_addr, !is_arm);
4355 if (is_arm && func_start_entry) {
4356 // Verify that the function start address is the symbol address
4357 // (ARM) or the symbol address + 1 (thumb).
4358 if (func_start_entry->addr != symbol_lookup_file_addr &&
4359 func_start_entry->addr != (symbol_lookup_file_addr + 1)) {
4360 // Not the right entry, NULL it out...
4361 func_start_entry = nullptr;
4362 }
4363 }
4364 if (func_start_entry) {
4365 func_start_entry->data = true;
4366
4367 addr_t symbol_file_addr = func_start_entry->addr;
4368 if (is_arm)
4369 symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4370
4371 const FunctionStarts::Entry *next_func_start_entry =
4372 function_starts.FindNextEntry(func_start_entry);
4373 const addr_t section_end_file_addr =
4374 section_file_addr + symbol_section->GetByteSize();
4375 if (next_func_start_entry) {
4376 addr_t next_symbol_file_addr = next_func_start_entry->addr;
4377 // Be sure the clear the Thumb address bit when we calculate the
4378 // size from the current and next address
4379 if (is_arm)
4380 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4381 symbol_byte_size = std::min<lldb::addr_t>(
4382 next_symbol_file_addr - symbol_file_addr,
4383 section_end_file_addr - symbol_file_addr);
4384 } else {
4385 symbol_byte_size = section_end_file_addr - symbol_file_addr;
4386 }
4387 }
4388 }
4389 symbol_value -= section_file_addr;
4390 }
4391
4392 if (!is_debug) {
4393 if (type == eSymbolTypeCode) {
4394 // See if we can find a N_FUN entry for any code symbols. If we do
4395 // find a match, and the name matches, then we can merge the two into
4396 // just the function symbol to avoid duplicate entries in the symbol
4397 // table.
4398 std::pair<ValueToSymbolIndexMap::const_iterator,
4399 ValueToSymbolIndexMap::const_iterator>
4400 range;
4401 range = N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
4402 if (range.first != range.second) {
4403 for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4404 pos != range.second; ++pos) {
4405 if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) ==
4406 sym[pos->second].GetMangled().GetName(
4408 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4409 // We just need the flags from the linker symbol, so put these
4410 // flags into the N_FUN flags to avoid duplicate symbols in the
4411 // symbol table.
4412 sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4413 sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4414 if (resolver_addresses.find(nlist.n_value) !=
4415 resolver_addresses.end())
4416 sym[pos->second].SetType(eSymbolTypeResolver);
4417 sym[sym_idx].Clear();
4418 return true;
4419 }
4420 }
4421 } else {
4422 if (resolver_addresses.find(nlist.n_value) !=
4423 resolver_addresses.end())
4424 type = eSymbolTypeResolver;
4425 }
4426 } else if (type == eSymbolTypeData || type == eSymbolTypeObjCClass ||
4427 type == eSymbolTypeObjCMetaClass ||
4428 type == eSymbolTypeObjCIVar) {
4429 // See if we can find a N_STSYM entry for any data symbols. If we do
4430 // find a match, and the name matches, then we can merge the two into
4431 // just the Static symbol to avoid duplicate entries in the symbol
4432 // table.
4433 std::pair<ValueToSymbolIndexMap::const_iterator,
4434 ValueToSymbolIndexMap::const_iterator>
4435 range;
4436 range = N_STSYM_addr_to_sym_idx.equal_range(nlist.n_value);
4437 if (range.first != range.second) {
4438 for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4439 pos != range.second; ++pos) {
4440 if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) ==
4441 sym[pos->second].GetMangled().GetName(
4443 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4444 // We just need the flags from the linker symbol, so put these
4445 // flags into the N_STSYM flags to avoid duplicate symbols in
4446 // the symbol table.
4447 sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4448 sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4449 sym[sym_idx].Clear();
4450 return true;
4451 }
4452 }
4453 } else {
4454 // Combine N_GSYM stab entries with the non stab symbol.
4455 const char *gsym_name = sym[sym_idx]
4456 .GetMangled()
4458 .GetCString();
4459 if (gsym_name) {
4460 ConstNameToSymbolIndexMap::const_iterator pos =
4461 N_GSYM_name_to_sym_idx.find(gsym_name);
4462 if (pos != N_GSYM_name_to_sym_idx.end()) {
4463 const uint32_t GSYM_sym_idx = pos->second;
4464 m_nlist_idx_to_sym_idx[nlist_idx] = GSYM_sym_idx;
4465 // Copy the address, because often the N_GSYM address has an
4466 // invalid address of zero when the global is a common symbol.
4467 sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section);
4468 sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value);
4469 add_symbol_addr(
4470 sym[GSYM_sym_idx].GetAddress().GetFileAddress());
4471 // We just need the flags from the linker symbol, so put these
4472 // flags into the N_GSYM flags to avoid duplicate symbols in
4473 // the symbol table.
4474 sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4475 sym[sym_idx].Clear();
4476 return true;
4477 }
4478 }
4479 }
4480 }
4481 }
4482
4483 sym[sym_idx].SetID(nlist_idx);
4484 sym[sym_idx].SetType(type);
4485 if (set_value) {
4486 sym[sym_idx].GetAddressRef().SetSection(symbol_section);
4487 sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
4488 if (symbol_section)
4489 add_symbol_addr(sym[sym_idx].GetAddress().GetFileAddress());
4490 }
4491 sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4492 if (nlist.n_desc & N_WEAK_REF)
4493 sym[sym_idx].SetIsWeak(true);
4494
4495 if (symbol_byte_size > 0)
4496 sym[sym_idx].SetByteSize(symbol_byte_size);
4497
4498 if (demangled_is_synthesized)
4499 sym[sym_idx].SetDemangledNameIsSynthesized(true);
4500
4501 ++sym_idx;
4502 return true;
4503 };
4504
4505 // First parse all the nlists but don't process them yet. See the next
4506 // comment for an explanation why.
4507 std::vector<struct nlist_64> nlists;
4508 nlists.reserve(symtab_load_command.nsyms);
4509 for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) {
4510 if (auto nlist =
4511 ParseNList(nlist_data, nlist_data_offset, nlist_byte_size))
4512 nlists.push_back(*nlist);
4513 else
4514 break;
4515 }
4516
4517 // Now parse all the debug symbols. This is needed to merge non-debug
4518 // symbols in the next step. Non-debug symbols are always coalesced into
4519 // the debug symbol. Doing this in one step would mean that some symbols
4520 // won't be merged.
4521 nlist_idx = 0;
4522 for (auto &nlist : nlists) {
4523 if (!ParseSymbolLambda(nlist, nlist_idx++, DebugSymbols))
4524 break;
4525 }
4526
4527 // Finally parse all the non debug symbols.
4528 nlist_idx = 0;
4529 for (auto &nlist : nlists) {
4530 if (!ParseSymbolLambda(nlist, nlist_idx++, NonDebugSymbols))
4531 break;
4532 }
4533
4534 for (const auto &pos : reexport_shlib_needs_fixup) {
4535 const auto undef_pos = undefined_name_to_desc.find(pos.second);
4536 if (undef_pos != undefined_name_to_desc.end()) {
4537 const uint8_t dylib_ordinal =
4538 llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
4539 if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
4540 sym[pos.first].SetReExportedSymbolSharedLibrary(
4541 dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
4542 }
4543 }
4544 }
4545
4546 // Count how many trie symbols we'll add to the symbol table
4547 int trie_symbol_table_augment_count = 0;
4548 for (auto &e : external_sym_trie_entries) {
4549 if (!symbols_added.contains(e.entry.address))
4550 trie_symbol_table_augment_count++;
4551 }
4552
4553 if (num_syms < sym_idx + trie_symbol_table_augment_count) {
4554 num_syms = sym_idx + trie_symbol_table_augment_count;
4555 sym = symtab.Resize(num_syms);
4556 }
4557 uint32_t synthetic_sym_id = symtab_load_command.nsyms;
4558
4559 // Add symbols from the trie to the symbol table.
4560 for (auto &e : external_sym_trie_entries) {
4561 if (symbols_added.contains(e.entry.address))
4562 continue;
4563
4564 // Find the section that this trie address is in, use that to annotate
4565 // symbol type as we add the trie address and name to the symbol table.
4566 Address symbol_addr;
4567 if (module_sp->ResolveFileAddress(e.entry.address, symbol_addr)) {
4568 SectionSP symbol_section(symbol_addr.GetSection());
4569 const char *symbol_name = e.entry.name.GetCString();
4570 bool demangled_is_synthesized = false;
4571 SymbolType type =
4572 GetSymbolType(symbol_name, demangled_is_synthesized, text_section_sp,
4573 data_section_sp, data_dirty_section_sp,
4574 data_const_section_sp, symbol_section);
4575
4576 sym[sym_idx].SetType(type);
4577 if (symbol_section) {
4578 sym[sym_idx].SetID(synthetic_sym_id++);
4579 sym[sym_idx].GetMangled().SetMangledName(ConstString(symbol_name));
4580 if (demangled_is_synthesized)
4581 sym[sym_idx].SetDemangledNameIsSynthesized(true);
4582 sym[sym_idx].SetIsSynthetic(true);
4583 sym[sym_idx].SetExternal(true);
4584 sym[sym_idx].GetAddressRef() = symbol_addr;
4585 add_symbol_addr(symbol_addr.GetFileAddress());
4586 if (e.entry.flags & TRIE_SYMBOL_IS_THUMB)
4588 ++sym_idx;
4589 }
4590 }
4591 }
4592
4593 if (function_starts_count > 0) {
4594 uint32_t num_synthetic_function_symbols = 0;
4595 for (i = 0; i < function_starts_count; ++i) {
4596 if (!symbols_added.contains(function_starts.GetEntryRef(i).addr))
4597 ++num_synthetic_function_symbols;
4598 }
4599
4600 if (num_synthetic_function_symbols > 0) {
4601 if (num_syms < sym_idx + num_synthetic_function_symbols) {
4602 num_syms = sym_idx + num_synthetic_function_symbols;
4603 sym = symtab.Resize(num_syms);
4604 }
4605 for (i = 0; i < function_starts_count; ++i) {
4606 const FunctionStarts::Entry *func_start_entry =
4607 function_starts.GetEntryAtIndex(i);
4608 if (!symbols_added.contains(func_start_entry->addr)) {
4609 addr_t symbol_file_addr = func_start_entry->addr;
4610 uint32_t symbol_flags = 0;
4611 if (func_start_entry->data)
4612 symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
4613 Address symbol_addr;
4614 if (module_sp->ResolveFileAddress(symbol_file_addr, symbol_addr)) {
4615 SectionSP symbol_section(symbol_addr.GetSection());
4616 uint32_t symbol_byte_size = 0;
4617 if (symbol_section) {
4618 const addr_t section_file_addr = symbol_section->GetFileAddress();
4619 const FunctionStarts::Entry *next_func_start_entry =
4620 function_starts.FindNextEntry(func_start_entry);
4621 const addr_t section_end_file_addr =
4622 section_file_addr + symbol_section->GetByteSize();
4623 if (next_func_start_entry) {
4624 addr_t next_symbol_file_addr = next_func_start_entry->addr;
4625 if (is_arm)
4626 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4627 symbol_byte_size = std::min<lldb::addr_t>(
4628 next_symbol_file_addr - symbol_file_addr,
4629 section_end_file_addr - symbol_file_addr);
4630 } else {
4631 symbol_byte_size = section_end_file_addr - symbol_file_addr;
4632 }
4633 sym[sym_idx].SetID(synthetic_sym_id++);
4634 // Don't set the name for any synthetic symbols, the Symbol
4635 // object will generate one if needed when the name is accessed
4636 // via accessors.
4637 sym[sym_idx].GetMangled().SetDemangledName(ConstString());
4638 sym[sym_idx].SetType(eSymbolTypeCode);
4639 sym[sym_idx].SetIsSynthetic(true);
4640 sym[sym_idx].GetAddressRef() = symbol_addr;
4641 add_symbol_addr(symbol_addr.GetFileAddress());
4642 if (symbol_flags)
4643 sym[sym_idx].SetFlags(symbol_flags);
4644 if (symbol_byte_size)
4645 sym[sym_idx].SetByteSize(symbol_byte_size);
4646 ++sym_idx;
4647 }
4648 }
4649 }
4650 }
4651 }
4652 }
4653
4654 // Trim our symbols down to just what we ended up with after removing any
4655 // symbols.
4656 if (sym_idx < num_syms) {
4657 num_syms = sym_idx;
4658 sym = symtab.Resize(num_syms);
4659 }
4660
4661 // Now synthesize indirect symbols
4662 if (m_dysymtab.nindirectsyms != 0) {
4663 if (indirect_symbol_index_data.GetByteSize()) {
4664 NListIndexToSymbolIndexMap::const_iterator end_index_pos =
4665 m_nlist_idx_to_sym_idx.end();
4666
4667 for (uint32_t sect_idx = 1; sect_idx < m_mach_sections.size();
4668 ++sect_idx) {
4669 if ((m_mach_sections[sect_idx].flags & SECTION_TYPE) ==
4670 S_SYMBOL_STUBS) {
4671 uint32_t symbol_stub_byte_size = m_mach_sections[sect_idx].reserved2;
4672 if (symbol_stub_byte_size == 0)
4673 continue;
4674
4675 const uint32_t num_symbol_stubs =
4676 m_mach_sections[sect_idx].size / symbol_stub_byte_size;
4677
4678 if (num_symbol_stubs == 0)
4679 continue;
4680
4681 const uint32_t symbol_stub_index_offset =
4682 m_mach_sections[sect_idx].reserved1;
4683 for (uint32_t stub_idx = 0; stub_idx < num_symbol_stubs; ++stub_idx) {
4684 const uint32_t symbol_stub_index =
4685 symbol_stub_index_offset + stub_idx;
4686 const lldb::addr_t symbol_stub_addr =
4687 m_mach_sections[sect_idx].addr +
4688 (stub_idx * symbol_stub_byte_size);
4689 lldb::offset_t symbol_stub_offset = symbol_stub_index * 4;
4690 if (indirect_symbol_index_data.ValidOffsetForDataOfSize(
4691 symbol_stub_offset, 4)) {
4692 const uint32_t stub_sym_id =
4693 indirect_symbol_index_data.GetU32(&symbol_stub_offset);
4694 if (stub_sym_id & (INDIRECT_SYMBOL_ABS | INDIRECT_SYMBOL_LOCAL))
4695 continue;
4696
4697 NListIndexToSymbolIndexMap::const_iterator index_pos =
4698 m_nlist_idx_to_sym_idx.find(stub_sym_id);
4699 Symbol *stub_symbol = nullptr;
4700 if (index_pos != end_index_pos) {
4701 // We have a remapping from the original nlist index to a
4702 // current symbol index, so just look this up by index
4703 stub_symbol = symtab.SymbolAtIndex(index_pos->second);
4704 } else {
4705 // We need to lookup a symbol using the original nlist symbol
4706 // index since this index is coming from the S_SYMBOL_STUBS
4707 stub_symbol = symtab.FindSymbolByID(stub_sym_id);
4708 }
4709
4710 if (stub_symbol) {
4711 Address so_addr(symbol_stub_addr, section_list);
4712
4713 if (stub_symbol->GetType() == eSymbolTypeUndefined) {
4714 // Change the external symbol into a trampoline that makes
4715 // sense These symbols were N_UNDF N_EXT, and are useless
4716 // to us, so we can re-use them so we don't have to make up
4717 // a synthetic symbol for no good reason.
4718 if (resolver_addresses.find(symbol_stub_addr) ==
4719 resolver_addresses.end())
4720 stub_symbol->SetType(eSymbolTypeTrampoline);
4721 else
4722 stub_symbol->SetType(eSymbolTypeResolver);
4723 stub_symbol->SetExternal(false);
4724 stub_symbol->GetAddressRef() = so_addr;
4725 stub_symbol->SetByteSize(symbol_stub_byte_size);
4726 } else {
4727 // Make a synthetic symbol to describe the trampoline stub
4728 Mangled stub_symbol_mangled_name(stub_symbol->GetMangled());
4729 if (sym_idx >= num_syms) {
4730 sym = symtab.Resize(++num_syms);
4731 stub_symbol = nullptr; // this pointer no longer valid
4732 }
4733 sym[sym_idx].SetID(synthetic_sym_id++);
4734 sym[sym_idx].GetMangled() = stub_symbol_mangled_name;
4735 if (resolver_addresses.find(symbol_stub_addr) ==
4736 resolver_addresses.end())
4737 sym[sym_idx].SetType(eSymbolTypeTrampoline);
4738 else
4739 sym[sym_idx].SetType(eSymbolTypeResolver);
4740 sym[sym_idx].SetIsSynthetic(true);
4741 sym[sym_idx].GetAddressRef() = so_addr;
4742 add_symbol_addr(so_addr.GetFileAddress());
4743 sym[sym_idx].SetByteSize(symbol_stub_byte_size);
4744 ++sym_idx;
4745 }
4746 } else {
4747 if (log)
4748 log->Warning("symbol stub referencing symbol table symbol "
4749 "%u that isn't in our minimal symbol table, "
4750 "fix this!!!",
4751 stub_sym_id);
4752 }
4753 }
4754 }
4755 }
4756 }
4757 }
4758 }
4759
4760 if (!reexport_trie_entries.empty()) {
4761 for (const auto &e : reexport_trie_entries) {
4762 if (e.entry.import_name) {
4763 // Only add indirect symbols from the Trie entries if we didn't have
4764 // a N_INDR nlist entry for this already
4765 if (indirect_symbol_names.find(e.entry.name) ==
4766 indirect_symbol_names.end()) {
4767 // Make a synthetic symbol to describe re-exported symbol.
4768 if (sym_idx >= num_syms)
4769 sym = symtab.Resize(++num_syms);
4770 sym[sym_idx].SetID(synthetic_sym_id++);
4771 sym[sym_idx].GetMangled() = Mangled(e.entry.name);
4772 sym[sym_idx].SetType(eSymbolTypeReExported);
4773 sym[sym_idx].SetIsSynthetic(true);
4774 sym[sym_idx].SetReExportedSymbolName(e.entry.import_name);
4775 if (e.entry.other > 0 && e.entry.other <= dylib_files.GetSize()) {
4777 dylib_files.GetFileSpecAtIndex(e.entry.other - 1));
4778 }
4779 ++sym_idx;
4780 }
4781 }
4782 }
4783 }
4784}
4785
4787 ModuleSP module_sp(GetModule());
4788 if (module_sp) {
4789 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
4790 s->Printf("%p: ", static_cast<void *>(this));
4791 s->Indent();
4792 if (m_header.magic == MH_MAGIC_64 || m_header.magic == MH_CIGAM_64)
4793 s->PutCString("ObjectFileMachO64");
4794 else
4795 s->PutCString("ObjectFileMachO32");
4796
4797 *s << ", file = '" << m_file;
4798 ModuleSpecList all_specs;
4799 ModuleSpec base_spec;
4801 base_spec, all_specs);
4802 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
4803 *s << "', triple";
4804 if (e)
4805 s->Printf("[%d]", i);
4806 *s << " = ";
4807 *s << all_specs.GetModuleSpecRefAtIndex(i)
4809 .GetTriple()
4810 .getTriple();
4811 }
4812 *s << "\n";
4813 SectionList *sections = GetSectionList();
4814 if (sections)
4815 sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
4816 UINT32_MAX);
4817
4818 if (m_symtab_up)
4819 m_symtab_up->Dump(s, nullptr, eSortOrderNone);
4820 }
4821}
4822
4823UUID ObjectFileMachO::GetUUID(const llvm::MachO::mach_header &header,
4824 const lldb_private::DataExtractor &data,
4825 lldb::offset_t lc_offset) {
4826 uint32_t i;
4827 llvm::MachO::uuid_command load_cmd;
4828
4829 lldb::offset_t offset = lc_offset;
4830 for (i = 0; i < header.ncmds; ++i) {
4831 const lldb::offset_t cmd_offset = offset;
4832 if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
4833 break;
4834
4835 if (load_cmd.cmd == LC_UUID) {
4836 const uint8_t *uuid_bytes = data.PeekData(offset, 16);
4837
4838 if (uuid_bytes) {
4839 // OpenCL on Mac OS X uses the same UUID for each of its object files.
4840 // We pretend these object files have no UUID to prevent crashing.
4841
4842 const uint8_t opencl_uuid[] = {0x8c, 0x8e, 0xb3, 0x9b, 0x3b, 0xa8,
4843 0x4b, 0x16, 0xb6, 0xa4, 0x27, 0x63,
4844 0xbb, 0x14, 0xf0, 0x0d};
4845
4846 if (!memcmp(uuid_bytes, opencl_uuid, 16))
4847 return UUID();
4848
4849 return UUID(uuid_bytes, 16);
4850 }
4851 return UUID();
4852 }
4853 offset = cmd_offset + load_cmd.cmdsize;
4854 }
4855 return UUID();
4856}
4857
4858static llvm::StringRef GetOSName(uint32_t cmd) {
4859 switch (cmd) {
4860 case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
4861 return llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4862 case llvm::MachO::LC_VERSION_MIN_MACOSX:
4863 return llvm::Triple::getOSTypeName(llvm::Triple::MacOSX);
4864 case llvm::MachO::LC_VERSION_MIN_TVOS:
4865 return llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4866 case llvm::MachO::LC_VERSION_MIN_WATCHOS:
4867 return llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4868 default:
4869 llvm_unreachable("unexpected LC_VERSION load command");
4870 }
4871}
4872
4873namespace {
4874struct OSEnv {
4875 llvm::StringRef os_type;
4876 llvm::StringRef environment;
4877 OSEnv(uint32_t cmd) {
4878 switch (cmd) {
4879 case llvm::MachO::PLATFORM_MACOS:
4880 os_type = llvm::Triple::getOSTypeName(llvm::Triple::MacOSX);
4881 return;
4882 case llvm::MachO::PLATFORM_IOS:
4883 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4884 return;
4885 case llvm::MachO::PLATFORM_TVOS:
4886 os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4887 return;
4888 case llvm::MachO::PLATFORM_WATCHOS:
4889 os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4890 return;
4891 // TODO: add BridgeOS & DriverKit once in llvm/lib/Support/Triple.cpp
4892 // NEED_BRIDGEOS_TRIPLE
4893 // case llvm::MachO::PLATFORM_BRIDGEOS:
4894 // os_type = llvm::Triple::getOSTypeName(llvm::Triple::BridgeOS);
4895 // return;
4896 // case llvm::MachO::PLATFORM_DRIVERKIT:
4897 // os_type = llvm::Triple::getOSTypeName(llvm::Triple::DriverKit);
4898 // return;
4899 case llvm::MachO::PLATFORM_MACCATALYST:
4900 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4901 environment = llvm::Triple::getEnvironmentTypeName(llvm::Triple::MacABI);
4902 return;
4903 case llvm::MachO::PLATFORM_IOSSIMULATOR:
4904 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4905 environment =
4906 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4907 return;
4908 case llvm::MachO::PLATFORM_TVOSSIMULATOR:
4909 os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4910 environment =
4911 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4912 return;
4913 case llvm::MachO::PLATFORM_WATCHOSSIMULATOR:
4914 os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4915 environment =
4916 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4917 return;
4918 default: {
4919 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process));
4920 LLDB_LOGF(log, "unsupported platform in LC_BUILD_VERSION");
4921 }
4922 }
4923 }
4924};
4925
4926struct MinOS {
4927 uint32_t major_version, minor_version, patch_version;
4928 MinOS(uint32_t version)
4929 : major_version(version >> 16), minor_version((version >> 8) & 0xffu),
4930 patch_version(version & 0xffu) {}
4931};
4932} // namespace
4933
4934void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header,
4935 const lldb_private::DataExtractor &data,
4936 lldb::offset_t lc_offset,
4937 ModuleSpec &base_spec,
4938 lldb_private::ModuleSpecList &all_specs) {
4939 auto &base_arch = base_spec.GetArchitecture();
4940 base_arch.SetArchitecture(eArchTypeMachO, header.cputype, header.cpusubtype);
4941 if (!base_arch.IsValid())
4942 return;
4943
4944 bool found_any = false;
4945 auto add_triple = [&](const llvm::Triple &triple) {
4946 auto spec = base_spec;
4947 spec.GetArchitecture().GetTriple() = triple;
4948 if (spec.GetArchitecture().IsValid()) {
4949 spec.GetUUID() = ObjectFileMachO::GetUUID(header, data, lc_offset);
4950 all_specs.Append(spec);
4951 found_any = true;
4952 }
4953 };
4954
4955 // Set OS to an unspecified unknown or a "*" so it can match any OS
4956 llvm::Triple base_triple = base_arch.GetTriple();
4957 base_triple.setOS(llvm::Triple::UnknownOS);
4958 base_triple.setOSName(llvm::StringRef());
4959
4960 if (header.filetype == MH_PRELOAD) {
4961 if (header.cputype == CPU_TYPE_ARM) {
4962 // If this is a 32-bit arm binary, and it's a standalone binary, force
4963 // the Vendor to Apple so we don't accidentally pick up the generic
4964 // armv7 ABI at runtime. Apple's armv7 ABI always uses r7 for the
4965 // frame pointer register; most other armv7 ABIs use a combination of
4966 // r7 and r11.
4967 base_triple.setVendor(llvm::Triple::Apple);
4968 } else {
4969 // Set vendor to an unspecified unknown or a "*" so it can match any
4970 // vendor This is required for correct behavior of EFI debugging on
4971 // x86_64
4972 base_triple.setVendor(llvm::Triple::UnknownVendor);
4973 base_triple.setVendorName(llvm::StringRef());
4974 }
4975 return add_triple(base_triple);
4976 }
4977
4978 llvm::MachO::load_command load_cmd;
4979
4980 // See if there is an LC_VERSION_MIN_* load command that can give
4981 // us the OS type.
4982 lldb::offset_t offset = lc_offset;
4983 for (uint32_t i = 0; i < header.ncmds; ++i) {
4984 const lldb::offset_t cmd_offset = offset;
4985 if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
4986 break;
4987
4988 llvm::MachO::version_min_command version_min;
4989 switch (load_cmd.cmd) {
4990 case llvm::MachO::LC_VERSION_MIN_MACOSX:
4991 case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
4992 case llvm::MachO::LC_VERSION_MIN_TVOS:
4993 case llvm::MachO::LC_VERSION_MIN_WATCHOS: {
4994 if (load_cmd.cmdsize != sizeof(version_min))
4995 break;
4996 if (data.ExtractBytes(cmd_offset, sizeof(version_min),
4997 data.GetByteOrder(), &version_min) == 0)
4998 break;
4999 MinOS min_os(version_min.version);
5000 llvm::SmallString<32> os_name;
5001 llvm::raw_svector_ostream os(os_name);
5002 os << GetOSName(load_cmd.cmd) << min_os.major_version << '.'
5003 << min_os.minor_version << '.' << min_os.patch_version;
5004
5005 auto triple = base_triple;
5006 triple.setOSName(os.str());
5007
5008 // Disambiguate legacy simulator platforms.
5009 if (load_cmd.cmd != llvm::MachO::LC_VERSION_MIN_MACOSX &&
5010 (base_triple.getArch() == llvm::Triple::x86_64 ||
5011 base_triple.getArch() == llvm::Triple::x86)) {
5012 // The combination of legacy LC_VERSION_MIN load command and
5013 // x86 architecture always indicates a simulator environment.
5014 // The combination of LC_VERSION_MIN and arm architecture only
5015 // appears for native binaries. Back-deploying simulator
5016 // binaries on Apple Silicon Macs use the modern unambigous
5017 // LC_BUILD_VERSION load commands; no special handling required.
5018 triple.setEnvironment(llvm::Triple::Simulator);
5019 }
5020 add_triple(triple);
5021 break;
5022 }
5023 default:
5024 break;
5025 }
5026
5027 offset = cmd_offset + load_cmd.cmdsize;
5028 }
5029
5030 // See if there are LC_BUILD_VERSION load commands that can give
5031 // us the OS type.
5032 offset = lc_offset;
5033 for (uint32_t i = 0; i < header.ncmds; ++i) {
5034 const lldb::offset_t cmd_offset = offset;
5035 if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
5036 break;
5037
5038 do {
5039 if (load_cmd.cmd == llvm::MachO::LC_BUILD_VERSION) {
5040 llvm::MachO::build_version_command build_version;
5041 if (load_cmd.cmdsize < sizeof(build_version)) {
5042 // Malformed load command.
5043 break;
5044 }
5045 if (data.ExtractBytes(cmd_offset, sizeof(build_version),
5046 data.GetByteOrder(), &build_version) == 0)
5047 break;
5048 MinOS min_os(build_version.minos);
5049 OSEnv os_env(build_version.platform);
5050 llvm::SmallString<16> os_name;
5051 llvm::raw_svector_ostream os(os_name);
5052 os << os_env.os_type << min_os.major_version << '.'
5053 << min_os.minor_version << '.' << min_os.patch_version;
5054 auto triple = base_triple;
5055 triple.setOSName(os.str());
5056 os_name.clear();
5057 if (!os_env.environment.empty())
5058 triple.setEnvironmentName(os_env.environment);
5059 add_triple(triple);
5060 }
5061 } while (false);
5062 offset = cmd_offset + load_cmd.cmdsize;
5063 }
5064
5065 if (!found_any) {
5066 add_triple(base_triple);
5067 }
5068}
5069
5071 ModuleSP module_sp, const llvm::MachO::mach_header &header,
5072 const lldb_private::DataExtractor &data, lldb::offset_t lc_offset) {
5073 ModuleSpecList all_specs;
5074 ModuleSpec base_spec;
5075 GetAllArchSpecs(header, data, MachHeaderSizeFromMagic(header.magic),
5076 base_spec, all_specs);
5077
5078 // If the object file offers multiple alternative load commands,
5079 // pick the one that matches the module.
5080 if (module_sp) {
5081 const ArchSpec &module_arch = module_sp->GetArchitecture();
5082 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
5083 ArchSpec mach_arch =
5085 if (module_arch.IsCompatibleMatch(mach_arch))
5086 return mach_arch;
5087 }
5088 }
5089
5090 // Return the first arch we found.
5091 if (all_specs.GetSize() == 0)
5092 return {};
5093 return all_specs.GetModuleSpecRefAtIndex(0).GetArchitecture();
5094}
5095
5097 ModuleSP module_sp(GetModule());
5098 if (module_sp) {
5099 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5101 return GetUUID(m_header, m_data, offset);
5102 }
5103 return UUID();
5104}
5105
5107 uint32_t count = 0;
5108 ModuleSP module_sp(GetModule());
5109 if (module_sp) {
5110 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5111 llvm::MachO::load_command load_cmd;
5113 std::vector<std::string> rpath_paths;
5114 std::vector<std::string> rpath_relative_paths;
5115 std::vector<std::string> at_exec_relative_paths;
5116 uint32_t i;
5117 for (i = 0; i < m_header.ncmds; ++i) {
5118 const uint32_t cmd_offset = offset;
5119 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
5120 break;
5121
5122 switch (load_cmd.cmd) {
5123 case LC_RPATH:
5124 case LC_LOAD_DYLIB:
5125 case LC_LOAD_WEAK_DYLIB:
5126 case LC_REEXPORT_DYLIB:
5127 case LC_LOAD_DYLINKER:
5128 case LC_LOADFVMLIB:
5129 case LC_LOAD_UPWARD_DYLIB: {
5130 uint32_t name_offset = cmd_offset + m_data.GetU32(&offset);
5131 const char *path = m_data.PeekCStr(name_offset);
5132 if (path) {
5133 if (load_cmd.cmd == LC_RPATH)
5134 rpath_paths.push_back(path);
5135 else {
5136 if (path[0] == '@') {
5137 if (strncmp(path, "@rpath", strlen("@rpath")) == 0)
5138 rpath_relative_paths.push_back(path + strlen("@rpath"));
5139 else if (strncmp(path, "@executable_path",
5140 strlen("@executable_path")) == 0)
5141 at_exec_relative_paths.push_back(path +
5142 strlen("@executable_path"));
5143 } else {
5144 FileSpec file_spec(path);
5145 if (files.AppendIfUnique(file_spec))
5146 count++;
5147 }
5148 }
5149 }
5150 } break;
5151
5152 default:
5153 break;
5154 }
5155 offset = cmd_offset + load_cmd.cmdsize;
5156 }
5157
5158 FileSpec this_file_spec(m_file);
5159 FileSystem::Instance().Resolve(this_file_spec);
5160
5161 if (!rpath_paths.empty()) {
5162 // Fixup all LC_RPATH values to be absolute paths
5163 std::string loader_path("@loader_path");
5164 std::string executable_path("@executable_path");
5165 for (auto &rpath : rpath_paths) {
5166 if (llvm::StringRef(rpath).startswith(loader_path)) {
5167 rpath.erase(0, loader_path.size());
5168 rpath.insert(0, this_file_spec.GetDirectory().GetCString());
5169 } else if (llvm::StringRef(rpath).startswith(executable_path)) {
5170 rpath.erase(0, executable_path.size());
5171 rpath.insert(0, this_file_spec.GetDirectory().GetCString());
5172 }
5173 }
5174
5175 for (const auto &rpath_relative_path : rpath_relative_paths) {
5176 for (const auto &rpath : rpath_paths) {
5177 std::string path = rpath;
5178 path += rpath_relative_path;
5179 // It is OK to resolve this path because we must find a file on disk
5180 // for us to accept it anyway if it is rpath relative.
5181 FileSpec file_spec(path);
5182 FileSystem::Instance().Resolve(file_spec);
5183 if (FileSystem::Instance().Exists(file_spec) &&
5184 files.AppendIfUnique(file_spec)) {
5185 count++;
5186 break;
5187 }
5188 }
5189 }
5190 }
5191
5192 // We may have @executable_paths but no RPATHS. Figure those out here.
5193 // Only do this if this object file is the executable. We have no way to
5194 // get back to the actual executable otherwise, so we won't get the right
5195 // path.
5196 if (!at_exec_relative_paths.empty() && CalculateType() == eTypeExecutable) {
5197 FileSpec exec_dir = this_file_spec.CopyByRemovingLastPathComponent();
5198 for (const auto &at_exec_relative_path : at_exec_relative_paths) {
5199 FileSpec file_spec =
5200 exec_dir.CopyByAppendingPathComponent(at_exec_relative_path);
5201 if (FileSystem::Instance().Exists(file_spec) &&
5202 files.AppendIfUnique(file_spec))
5203 count++;
5204 }
5205 }
5206 }
5207 return count;
5208}
5209
5211 // If the object file is not an executable it can't hold the entry point.
5212 // m_entry_point_address is initialized to an invalid address, so we can just
5213 // return that. If m_entry_point_address is valid it means we've found it
5214 // already, so return the cached value.
5215
5216 if ((!IsExecutable() && !IsDynamicLoader()) ||
5218 return m_entry_point_address;
5219 }
5220
5221 // Otherwise, look for the UnixThread or Thread command. The data for the
5222 // Thread command is given in /usr/include/mach-o.h, but it is basically:
5223 //
5224 // uint32_t flavor - this is the flavor argument you would pass to
5225 // thread_get_state
5226 // uint32_t count - this is the count of longs in the thread state data
5227 // struct XXX_thread_state state - this is the structure from
5228 // <machine/thread_status.h> corresponding to the flavor.
5229 // <repeat this trio>
5230 //
5231 // So we just keep reading the various register flavors till we find the GPR
5232 // one, then read the PC out of there.
5233 // FIXME: We will need to have a "RegisterContext data provider" class at some
5234 // point that can get all the registers
5235 // out of data in this form & attach them to a given thread. That should
5236 // underlie the MacOS X User process plugin, and we'll also need it for the
5237 // MacOS X Core File process plugin. When we have that we can also use it
5238 // here.
5239 //
5240 // For now we hard-code the offsets and flavors we need:
5241 //
5242 //
5243