LLDB mainline
ObjectFileMachO.cpp
Go to the documentation of this file.
1//===-- ObjectFileMachO.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/ADT/ScopeExit.h"
10#include "llvm/ADT/StringRef.h"
11
16#include "lldb/Core/Debugger.h"
17#include "lldb/Core/Module.h"
20#include "lldb/Core/Progress.h"
21#include "lldb/Core/Section.h"
23#include "lldb/Host/Host.h"
30#include "lldb/Target/Process.h"
32#include "lldb/Target/Target.h"
33#include "lldb/Target/Thread.h"
40#include "lldb/Utility/Log.h"
43#include "lldb/Utility/Status.h"
45#include "lldb/Utility/Timer.h"
46#include "lldb/Utility/UUID.h"
47
48#include "lldb/Host/SafeMachO.h"
49
50#include "llvm/ADT/DenseSet.h"
51#include "llvm/Support/FormatVariadic.h"
52#include "llvm/Support/MemoryBuffer.h"
53
54#include "ObjectFileMachO.h"
55
56#if defined(__APPLE__)
57#include <TargetConditionals.h>
58// GetLLDBSharedCacheUUID() needs to call dlsym()
59#include <dlfcn.h>
60#include <mach/mach_init.h>
61#include <mach/vm_map.h>
62#include <lldb/Host/SafeMachO.h>
63#endif
64
65#ifndef __APPLE__
67#else
68#include <uuid/uuid.h>
69#endif
70
71#include <bitset>
72#include <memory>
73#include <optional>
74
75// Unfortunately the signpost header pulls in the system MachO header, too.
76#ifdef CPU_TYPE_ARM
77#undef CPU_TYPE_ARM
78#endif
79#ifdef CPU_TYPE_ARM64
80#undef CPU_TYPE_ARM64
81#endif
82#ifdef CPU_TYPE_ARM64_32
83#undef CPU_TYPE_ARM64_32
84#endif
85#ifdef CPU_TYPE_I386
86#undef CPU_TYPE_I386
87#endif
88#ifdef CPU_TYPE_X86_64
89#undef CPU_TYPE_X86_64
90#endif
91#ifdef MH_DYLINKER
92#undef MH_DYLINKER
93#endif
94#ifdef MH_OBJECT
95#undef MH_OBJECT
96#endif
97#ifdef LC_VERSION_MIN_MACOSX
98#undef LC_VERSION_MIN_MACOSX
99#endif
100#ifdef LC_VERSION_MIN_IPHONEOS
101#undef LC_VERSION_MIN_IPHONEOS
102#endif
103#ifdef LC_VERSION_MIN_TVOS
104#undef LC_VERSION_MIN_TVOS
105#endif
106#ifdef LC_VERSION_MIN_WATCHOS
107#undef LC_VERSION_MIN_WATCHOS
108#endif
109#ifdef LC_BUILD_VERSION
110#undef LC_BUILD_VERSION
111#endif
112#ifdef PLATFORM_MACOS
113#undef PLATFORM_MACOS
114#endif
115#ifdef PLATFORM_MACCATALYST
116#undef PLATFORM_MACCATALYST
117#endif
118#ifdef PLATFORM_IOS
119#undef PLATFORM_IOS
120#endif
121#ifdef PLATFORM_IOSSIMULATOR
122#undef PLATFORM_IOSSIMULATOR
123#endif
124#ifdef PLATFORM_TVOS
125#undef PLATFORM_TVOS
126#endif
127#ifdef PLATFORM_TVOSSIMULATOR
128#undef PLATFORM_TVOSSIMULATOR
129#endif
130#ifdef PLATFORM_WATCHOS
131#undef PLATFORM_WATCHOS
132#endif
133#ifdef PLATFORM_WATCHOSSIMULATOR
134#undef PLATFORM_WATCHOSSIMULATOR
135#endif
136
137#define THUMB_ADDRESS_BIT_MASK 0xfffffffffffffffeull
138using namespace lldb;
139using namespace lldb_private;
140using namespace llvm::MachO;
141
143
144static void PrintRegisterValue(RegisterContext *reg_ctx, const char *name,
145 const char *alt_name, size_t reg_byte_size,
146 Stream &data) {
147 const RegisterInfo *reg_info = reg_ctx->GetRegisterInfoByName(name);
148 if (reg_info == nullptr)
149 reg_info = reg_ctx->GetRegisterInfoByName(alt_name);
150 if (reg_info) {
152 if (reg_ctx->ReadRegister(reg_info, reg_value)) {
153 if (reg_info->byte_size >= reg_byte_size)
154 data.Write(reg_value.GetBytes(), reg_byte_size);
155 else {
156 data.Write(reg_value.GetBytes(), reg_info->byte_size);
157 for (size_t i = 0, n = reg_byte_size - reg_info->byte_size; i < n; ++i)
158 data.PutChar(0);
159 }
160 return;
161 }
162 }
163 // Just write zeros if all else fails
164 for (size_t i = 0; i < reg_byte_size; ++i)
165 data.PutChar(0);
166}
167
169public:
171 const DataExtractor &data)
172 : RegisterContextDarwin_x86_64(thread, 0) {
174 }
175
176 void InvalidateAllRegisters() override {
177 // Do nothing... registers are always valid...
178 }
179
181 lldb::offset_t offset = 0;
182 SetError(GPRRegSet, Read, -1);
183 SetError(FPURegSet, Read, -1);
184 SetError(EXCRegSet, Read, -1);
185 bool done = false;
186
187 while (!done) {
188 int flavor = data.GetU32(&offset);
189 if (flavor == 0)
190 done = true;
191 else {
192 uint32_t i;
193 uint32_t count = data.GetU32(&offset);
194 switch (flavor) {
195 case GPRRegSet:
196 for (i = 0; i < count; ++i)
197 (&gpr.rax)[i] = data.GetU64(&offset);
199 done = true;
200
201 break;
202 case FPURegSet:
203 // TODO: fill in FPU regs....
204 // SetError (FPURegSet, Read, -1);
205 done = true;
206
207 break;
208 case EXCRegSet:
209 exc.trapno = data.GetU32(&offset);
210 exc.err = data.GetU32(&offset);
211 exc.faultvaddr = data.GetU64(&offset);
213 done = true;
214 break;
215 case 7:
216 case 8:
217 case 9:
218 // fancy flavors that encapsulate of the above flavors...
219 break;
220
221 default:
222 done = true;
223 break;
224 }
225 }
226 }
227 }
228
229 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
230 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
231 if (reg_ctx_sp) {
232 RegisterContext *reg_ctx = reg_ctx_sp.get();
233
234 data.PutHex32(GPRRegSet); // Flavor
236 PrintRegisterValue(reg_ctx, "rax", nullptr, 8, data);
237 PrintRegisterValue(reg_ctx, "rbx", nullptr, 8, data);
238 PrintRegisterValue(reg_ctx, "rcx", nullptr, 8, data);
239 PrintRegisterValue(reg_ctx, "rdx", nullptr, 8, data);
240 PrintRegisterValue(reg_ctx, "rdi", nullptr, 8, data);
241 PrintRegisterValue(reg_ctx, "rsi", nullptr, 8, data);
242 PrintRegisterValue(reg_ctx, "rbp", nullptr, 8, data);
243 PrintRegisterValue(reg_ctx, "rsp", nullptr, 8, data);
244 PrintRegisterValue(reg_ctx, "r8", nullptr, 8, data);
245 PrintRegisterValue(reg_ctx, "r9", nullptr, 8, data);
246 PrintRegisterValue(reg_ctx, "r10", nullptr, 8, data);
247 PrintRegisterValue(reg_ctx, "r11", nullptr, 8, data);
248 PrintRegisterValue(reg_ctx, "r12", nullptr, 8, data);
249 PrintRegisterValue(reg_ctx, "r13", nullptr, 8, data);
250 PrintRegisterValue(reg_ctx, "r14", nullptr, 8, data);
251 PrintRegisterValue(reg_ctx, "r15", nullptr, 8, data);
252 PrintRegisterValue(reg_ctx, "rip", nullptr, 8, data);
253 PrintRegisterValue(reg_ctx, "rflags", nullptr, 8, data);
254 PrintRegisterValue(reg_ctx, "cs", nullptr, 8, data);
255 PrintRegisterValue(reg_ctx, "fs", nullptr, 8, data);
256 PrintRegisterValue(reg_ctx, "gs", nullptr, 8, data);
257
258 // // Write out the FPU registers
259 // const size_t fpu_byte_size = sizeof(FPU);
260 // size_t bytes_written = 0;
261 // data.PutHex32 (FPURegSet);
262 // data.PutHex32 (fpu_byte_size/sizeof(uint64_t));
263 // bytes_written += data.PutHex32(0); // uint32_t pad[0]
264 // bytes_written += data.PutHex32(0); // uint32_t pad[1]
265 // bytes_written += WriteRegister (reg_ctx, "fcw", "fctrl", 2,
266 // data); // uint16_t fcw; // "fctrl"
267 // bytes_written += WriteRegister (reg_ctx, "fsw" , "fstat", 2,
268 // data); // uint16_t fsw; // "fstat"
269 // bytes_written += WriteRegister (reg_ctx, "ftw" , "ftag", 1,
270 // data); // uint8_t ftw; // "ftag"
271 // bytes_written += data.PutHex8 (0); // uint8_t pad1;
272 // bytes_written += WriteRegister (reg_ctx, "fop" , NULL, 2,
273 // data); // uint16_t fop; // "fop"
274 // bytes_written += WriteRegister (reg_ctx, "fioff", "ip", 4,
275 // data); // uint32_t ip; // "fioff"
276 // bytes_written += WriteRegister (reg_ctx, "fiseg", NULL, 2,
277 // data); // uint16_t cs; // "fiseg"
278 // bytes_written += data.PutHex16 (0); // uint16_t pad2;
279 // bytes_written += WriteRegister (reg_ctx, "dp", "fooff" , 4,
280 // data); // uint32_t dp; // "fooff"
281 // bytes_written += WriteRegister (reg_ctx, "foseg", NULL, 2,
282 // data); // uint16_t ds; // "foseg"
283 // bytes_written += data.PutHex16 (0); // uint16_t pad3;
284 // bytes_written += WriteRegister (reg_ctx, "mxcsr", NULL, 4,
285 // data); // uint32_t mxcsr;
286 // bytes_written += WriteRegister (reg_ctx, "mxcsrmask", NULL,
287 // 4, data);// uint32_t mxcsrmask;
288 // bytes_written += WriteRegister (reg_ctx, "stmm0", NULL,
289 // sizeof(MMSReg), data);
290 // bytes_written += WriteRegister (reg_ctx, "stmm1", NULL,
291 // sizeof(MMSReg), data);
292 // bytes_written += WriteRegister (reg_ctx, "stmm2", NULL,
293 // sizeof(MMSReg), data);
294 // bytes_written += WriteRegister (reg_ctx, "stmm3", NULL,
295 // sizeof(MMSReg), data);
296 // bytes_written += WriteRegister (reg_ctx, "stmm4", NULL,
297 // sizeof(MMSReg), data);
298 // bytes_written += WriteRegister (reg_ctx, "stmm5", NULL,
299 // sizeof(MMSReg), data);
300 // bytes_written += WriteRegister (reg_ctx, "stmm6", NULL,
301 // sizeof(MMSReg), data);
302 // bytes_written += WriteRegister (reg_ctx, "stmm7", NULL,
303 // sizeof(MMSReg), data);
304 // bytes_written += WriteRegister (reg_ctx, "xmm0" , NULL,
305 // sizeof(XMMReg), data);
306 // bytes_written += WriteRegister (reg_ctx, "xmm1" , NULL,
307 // sizeof(XMMReg), data);
308 // bytes_written += WriteRegister (reg_ctx, "xmm2" , NULL,
309 // sizeof(XMMReg), data);
310 // bytes_written += WriteRegister (reg_ctx, "xmm3" , NULL,
311 // sizeof(XMMReg), data);
312 // bytes_written += WriteRegister (reg_ctx, "xmm4" , NULL,
313 // sizeof(XMMReg), data);
314 // bytes_written += WriteRegister (reg_ctx, "xmm5" , NULL,
315 // sizeof(XMMReg), data);
316 // bytes_written += WriteRegister (reg_ctx, "xmm6" , NULL,
317 // sizeof(XMMReg), data);
318 // bytes_written += WriteRegister (reg_ctx, "xmm7" , NULL,
319 // sizeof(XMMReg), data);
320 // bytes_written += WriteRegister (reg_ctx, "xmm8" , NULL,
321 // sizeof(XMMReg), data);
322 // bytes_written += WriteRegister (reg_ctx, "xmm9" , NULL,
323 // sizeof(XMMReg), data);
324 // bytes_written += WriteRegister (reg_ctx, "xmm10", NULL,
325 // sizeof(XMMReg), data);
326 // bytes_written += WriteRegister (reg_ctx, "xmm11", NULL,
327 // sizeof(XMMReg), data);
328 // bytes_written += WriteRegister (reg_ctx, "xmm12", NULL,
329 // sizeof(XMMReg), data);
330 // bytes_written += WriteRegister (reg_ctx, "xmm13", NULL,
331 // sizeof(XMMReg), data);
332 // bytes_written += WriteRegister (reg_ctx, "xmm14", NULL,
333 // sizeof(XMMReg), data);
334 // bytes_written += WriteRegister (reg_ctx, "xmm15", NULL,
335 // sizeof(XMMReg), data);
336 //
337 // // Fill rest with zeros
338 // for (size_t i=0, n = fpu_byte_size - bytes_written; i<n; ++
339 // i)
340 // data.PutChar(0);
341
342 // Write out the EXC registers
343 data.PutHex32(EXCRegSet);
345 PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data);
346 PrintRegisterValue(reg_ctx, "err", nullptr, 4, data);
347 PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 8, data);
348 return true;
349 }
350 return false;
351 }
352
353protected:
354 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; }
355
356 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; }
357
358 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; }
359
360 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
361 return 0;
362 }
363
364 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
365 return 0;
366 }
367
368 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
369 return 0;
370 }
371};
372
374public:
376 const DataExtractor &data)
377 : RegisterContextDarwin_i386(thread, 0) {
379 }
380
381 void InvalidateAllRegisters() override {
382 // Do nothing... registers are always valid...
383 }
384
386 lldb::offset_t offset = 0;
387 SetError(GPRRegSet, Read, -1);
388 SetError(FPURegSet, Read, -1);
389 SetError(EXCRegSet, Read, -1);
390 bool done = false;
391
392 while (!done) {
393 int flavor = data.GetU32(&offset);
394 if (flavor == 0)
395 done = true;
396 else {
397 uint32_t i;
398 uint32_t count = data.GetU32(&offset);
399 switch (flavor) {
400 case GPRRegSet:
401 for (i = 0; i < count; ++i)
402 (&gpr.eax)[i] = data.GetU32(&offset);
404 done = true;
405
406 break;
407 case FPURegSet:
408 // TODO: fill in FPU regs....
409 // SetError (FPURegSet, Read, -1);
410 done = true;
411
412 break;
413 case EXCRegSet:
414 exc.trapno = data.GetU32(&offset);
415 exc.err = data.GetU32(&offset);
416 exc.faultvaddr = data.GetU32(&offset);
418 done = true;
419 break;
420 case 7:
421 case 8:
422 case 9:
423 // fancy flavors that encapsulate of the above flavors...
424 break;
425
426 default:
427 done = true;
428 break;
429 }
430 }
431 }
432 }
433
434 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
435 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
436 if (reg_ctx_sp) {
437 RegisterContext *reg_ctx = reg_ctx_sp.get();
438
439 data.PutHex32(GPRRegSet); // Flavor
441 PrintRegisterValue(reg_ctx, "eax", nullptr, 4, data);
442 PrintRegisterValue(reg_ctx, "ebx", nullptr, 4, data);
443 PrintRegisterValue(reg_ctx, "ecx", nullptr, 4, data);
444 PrintRegisterValue(reg_ctx, "edx", nullptr, 4, data);
445 PrintRegisterValue(reg_ctx, "edi", nullptr, 4, data);
446 PrintRegisterValue(reg_ctx, "esi", nullptr, 4, data);
447 PrintRegisterValue(reg_ctx, "ebp", nullptr, 4, data);
448 PrintRegisterValue(reg_ctx, "esp", nullptr, 4, data);
449 PrintRegisterValue(reg_ctx, "ss", nullptr, 4, data);
450 PrintRegisterValue(reg_ctx, "eflags", nullptr, 4, data);
451 PrintRegisterValue(reg_ctx, "eip", nullptr, 4, data);
452 PrintRegisterValue(reg_ctx, "cs", nullptr, 4, data);
453 PrintRegisterValue(reg_ctx, "ds", nullptr, 4, data);
454 PrintRegisterValue(reg_ctx, "es", nullptr, 4, data);
455 PrintRegisterValue(reg_ctx, "fs", nullptr, 4, data);
456 PrintRegisterValue(reg_ctx, "gs", nullptr, 4, data);
457
458 // Write out the EXC registers
459 data.PutHex32(EXCRegSet);
461 PrintRegisterValue(reg_ctx, "trapno", nullptr, 4, data);
462 PrintRegisterValue(reg_ctx, "err", nullptr, 4, data);
463 PrintRegisterValue(reg_ctx, "faultvaddr", nullptr, 4, data);
464 return true;
465 }
466 return false;
467 }
468
469protected:
470 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return 0; }
471
472 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return 0; }
473
474 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return 0; }
475
476 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
477 return 0;
478 }
479
480 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
481 return 0;
482 }
483
484 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
485 return 0;
486 }
487};
488
490public:
492 const DataExtractor &data)
493 : RegisterContextDarwin_arm(thread, 0) {
495 }
496
497 void InvalidateAllRegisters() override {
498 // Do nothing... registers are always valid...
499 }
500
502 lldb::offset_t offset = 0;
503 SetError(GPRRegSet, Read, -1);
504 SetError(FPURegSet, Read, -1);
505 SetError(EXCRegSet, Read, -1);
506 bool done = false;
507
508 while (!done) {
509 int flavor = data.GetU32(&offset);
510 uint32_t count = data.GetU32(&offset);
511 lldb::offset_t next_thread_state = offset + (count * 4);
512 switch (flavor) {
513 case GPRAltRegSet:
514 case GPRRegSet: {
515 // r0-r15, plus CPSR
516 uint32_t gpr_buf_count = (sizeof(gpr.r) / sizeof(gpr.r[0])) + 1;
517 if (count == gpr_buf_count) {
518 for (uint32_t i = 0; i < (count - 1); ++i) {
519 gpr.r[i] = data.GetU32(&offset);
520 }
521 gpr.cpsr = data.GetU32(&offset);
522
524 }
525 }
526 offset = next_thread_state;
527 break;
528
529 case FPURegSet: {
530 uint8_t *fpu_reg_buf = (uint8_t *)&fpu.floats;
531 const int fpu_reg_buf_size = sizeof(fpu.floats);
532 if (data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle,
533 fpu_reg_buf) == fpu_reg_buf_size) {
534 offset += fpu_reg_buf_size;
535 fpu.fpscr = data.GetU32(&offset);
537 } else {
538 done = true;
539 }
540 }
541 offset = next_thread_state;
542 break;
543
544 case EXCRegSet:
545 if (count == 3) {
546 exc.exception = data.GetU32(&offset);
547 exc.fsr = data.GetU32(&offset);
548 exc.far = data.GetU32(&offset);
550 }
551 done = true;
552 offset = next_thread_state;
553 break;
554
555 // Unknown register set flavor, stop trying to parse.
556 default:
557 done = true;
558 }
559 }
560 }
561
562 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
563 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
564 if (reg_ctx_sp) {
565 RegisterContext *reg_ctx = reg_ctx_sp.get();
566
567 data.PutHex32(GPRRegSet); // Flavor
569 PrintRegisterValue(reg_ctx, "r0", nullptr, 4, data);
570 PrintRegisterValue(reg_ctx, "r1", nullptr, 4, data);
571 PrintRegisterValue(reg_ctx, "r2", nullptr, 4, data);
572 PrintRegisterValue(reg_ctx, "r3", nullptr, 4, data);
573 PrintRegisterValue(reg_ctx, "r4", nullptr, 4, data);
574 PrintRegisterValue(reg_ctx, "r5", nullptr, 4, data);
575 PrintRegisterValue(reg_ctx, "r6", nullptr, 4, data);
576 PrintRegisterValue(reg_ctx, "r7", nullptr, 4, data);
577 PrintRegisterValue(reg_ctx, "r8", nullptr, 4, data);
578 PrintRegisterValue(reg_ctx, "r9", nullptr, 4, data);
579 PrintRegisterValue(reg_ctx, "r10", nullptr, 4, data);
580 PrintRegisterValue(reg_ctx, "r11", nullptr, 4, data);
581 PrintRegisterValue(reg_ctx, "r12", nullptr, 4, data);
582 PrintRegisterValue(reg_ctx, "sp", nullptr, 4, data);
583 PrintRegisterValue(reg_ctx, "lr", nullptr, 4, data);
584 PrintRegisterValue(reg_ctx, "pc", nullptr, 4, data);
585 PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data);
586
587 // Write out the EXC registers
588 // data.PutHex32 (EXCRegSet);
589 // data.PutHex32 (EXCWordCount);
590 // WriteRegister (reg_ctx, "exception", NULL, 4, data);
591 // WriteRegister (reg_ctx, "fsr", NULL, 4, data);
592 // WriteRegister (reg_ctx, "far", NULL, 4, data);
593 return true;
594 }
595 return false;
596 }
597
598protected:
599 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
600
601 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
602
603 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
604
605 int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
606
607 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
608 return 0;
609 }
610
611 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
612 return 0;
613 }
614
615 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
616 return 0;
617 }
618
619 int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
620 return -1;
621 }
622};
623
625public:
627 const DataExtractor &data)
628 : RegisterContextDarwin_arm64(thread, 0) {
630 }
631
632 void InvalidateAllRegisters() override {
633 // Do nothing... registers are always valid...
634 }
635
637 lldb::offset_t offset = 0;
638 SetError(GPRRegSet, Read, -1);
639 SetError(FPURegSet, Read, -1);
640 SetError(EXCRegSet, Read, -1);
641 bool done = false;
642 while (!done) {
643 int flavor = data.GetU32(&offset);
644 uint32_t count = data.GetU32(&offset);
645 lldb::offset_t next_thread_state = offset + (count * 4);
646 switch (flavor) {
647 case GPRRegSet:
648 // x0-x29 + fp + lr + sp + pc (== 33 64-bit registers) plus cpsr (1
649 // 32-bit register)
650 if (count >= (33 * 2) + 1) {
651 for (uint32_t i = 0; i < 29; ++i)
652 gpr.x[i] = data.GetU64(&offset);
653 gpr.fp = data.GetU64(&offset);
654 gpr.lr = data.GetU64(&offset);
655 gpr.sp = data.GetU64(&offset);
656 gpr.pc = data.GetU64(&offset);
657 gpr.cpsr = data.GetU32(&offset);
659 }
660 offset = next_thread_state;
661 break;
662 case FPURegSet: {
663 uint8_t *fpu_reg_buf = (uint8_t *)&fpu.v[0];
664 const int fpu_reg_buf_size = sizeof(fpu);
665 if (fpu_reg_buf_size == count * sizeof(uint32_t) &&
666 data.ExtractBytes(offset, fpu_reg_buf_size, eByteOrderLittle,
667 fpu_reg_buf) == fpu_reg_buf_size) {
669 } else {
670 done = true;
671 }
672 }
673 offset = next_thread_state;
674 break;
675 case EXCRegSet:
676 if (count == 4) {
677 exc.far = data.GetU64(&offset);
678 exc.esr = data.GetU32(&offset);
679 exc.exception = data.GetU32(&offset);
681 }
682 offset = next_thread_state;
683 break;
684 default:
685 done = true;
686 break;
687 }
688 }
689 }
690
691 static bool Create_LC_THREAD(Thread *thread, Stream &data) {
692 RegisterContextSP reg_ctx_sp(thread->GetRegisterContext());
693 if (reg_ctx_sp) {
694 RegisterContext *reg_ctx = reg_ctx_sp.get();
695
696 data.PutHex32(GPRRegSet); // Flavor
698 PrintRegisterValue(reg_ctx, "x0", nullptr, 8, data);
699 PrintRegisterValue(reg_ctx, "x1", nullptr, 8, data);
700 PrintRegisterValue(reg_ctx, "x2", nullptr, 8, data);
701 PrintRegisterValue(reg_ctx, "x3", nullptr, 8, data);
702 PrintRegisterValue(reg_ctx, "x4", nullptr, 8, data);
703 PrintRegisterValue(reg_ctx, "x5", nullptr, 8, data);
704 PrintRegisterValue(reg_ctx, "x6", nullptr, 8, data);
705 PrintRegisterValue(reg_ctx, "x7", nullptr, 8, data);
706 PrintRegisterValue(reg_ctx, "x8", nullptr, 8, data);
707 PrintRegisterValue(reg_ctx, "x9", nullptr, 8, data);
708 PrintRegisterValue(reg_ctx, "x10", nullptr, 8, data);
709 PrintRegisterValue(reg_ctx, "x11", nullptr, 8, data);
710 PrintRegisterValue(reg_ctx, "x12", nullptr, 8, data);
711 PrintRegisterValue(reg_ctx, "x13", nullptr, 8, data);
712 PrintRegisterValue(reg_ctx, "x14", nullptr, 8, data);
713 PrintRegisterValue(reg_ctx, "x15", nullptr, 8, data);
714 PrintRegisterValue(reg_ctx, "x16", nullptr, 8, data);
715 PrintRegisterValue(reg_ctx, "x17", nullptr, 8, data);
716 PrintRegisterValue(reg_ctx, "x18", nullptr, 8, data);
717 PrintRegisterValue(reg_ctx, "x19", nullptr, 8, data);
718 PrintRegisterValue(reg_ctx, "x20", nullptr, 8, data);
719 PrintRegisterValue(reg_ctx, "x21", nullptr, 8, data);
720 PrintRegisterValue(reg_ctx, "x22", nullptr, 8, data);
721 PrintRegisterValue(reg_ctx, "x23", nullptr, 8, data);
722 PrintRegisterValue(reg_ctx, "x24", nullptr, 8, data);
723 PrintRegisterValue(reg_ctx, "x25", nullptr, 8, data);
724 PrintRegisterValue(reg_ctx, "x26", nullptr, 8, data);
725 PrintRegisterValue(reg_ctx, "x27", nullptr, 8, data);
726 PrintRegisterValue(reg_ctx, "x28", nullptr, 8, data);
727 PrintRegisterValue(reg_ctx, "fp", nullptr, 8, data);
728 PrintRegisterValue(reg_ctx, "lr", nullptr, 8, data);
729 PrintRegisterValue(reg_ctx, "sp", nullptr, 8, data);
730 PrintRegisterValue(reg_ctx, "pc", nullptr, 8, data);
731 PrintRegisterValue(reg_ctx, "cpsr", nullptr, 4, data);
732 data.PutHex32(0); // uint32_t pad at the end
733
734 // Write out the EXC registers
735 data.PutHex32(EXCRegSet);
737 PrintRegisterValue(reg_ctx, "far", nullptr, 8, data);
738 PrintRegisterValue(reg_ctx, "esr", nullptr, 4, data);
739 PrintRegisterValue(reg_ctx, "exception", nullptr, 4, data);
740 return true;
741 }
742 return false;
743 }
744
745protected:
746 int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override { return -1; }
747
748 int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override { return -1; }
749
750 int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override { return -1; }
751
752 int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override { return -1; }
753
754 int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override {
755 return 0;
756 }
757
758 int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override {
759 return 0;
760 }
761
762 int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override {
763 return 0;
764 }
765
766 int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override {
767 return -1;
768 }
769};
770
772 switch (magic) {
773 case MH_MAGIC:
774 case MH_CIGAM:
775 return sizeof(struct llvm::MachO::mach_header);
776
777 case MH_MAGIC_64:
778 case MH_CIGAM_64:
779 return sizeof(struct llvm::MachO::mach_header_64);
780 break;
781
782 default:
783 break;
784 }
785 return 0;
786}
787
788#define MACHO_NLIST_ARM_SYMBOL_IS_THUMB 0x0008
789
791
796}
797
800}
801
802ObjectFile *ObjectFileMachO::CreateInstance(const lldb::ModuleSP &module_sp,
803 DataBufferSP data_sp,
804 lldb::offset_t data_offset,
805 const FileSpec *file,
806 lldb::offset_t file_offset,
807 lldb::offset_t length) {
808 if (!data_sp) {
809 data_sp = MapFileData(*file, length, file_offset);
810 if (!data_sp)
811 return nullptr;
812 data_offset = 0;
813 }
814
815 if (!ObjectFileMachO::MagicBytesMatch(data_sp, data_offset, length))
816 return nullptr;
817
818 // Update the data to contain the entire file if it doesn't already
819 if (data_sp->GetByteSize() < length) {
820 data_sp = MapFileData(*file, length, file_offset);
821 if (!data_sp)
822 return nullptr;
823 data_offset = 0;
824 }
825 auto objfile_up = std::make_unique<ObjectFileMachO>(
826 module_sp, data_sp, data_offset, file, file_offset, length);
827 if (!objfile_up || !objfile_up->ParseHeader())
828 return nullptr;
829
830 return objfile_up.release();
831}
832
834 const lldb::ModuleSP &module_sp, WritableDataBufferSP data_sp,
835 const ProcessSP &process_sp, lldb::addr_t header_addr) {
836 if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) {
837 std::unique_ptr<ObjectFile> objfile_up(
838 new ObjectFileMachO(module_sp, data_sp, process_sp, header_addr));
839 if (objfile_up.get() && objfile_up->ParseHeader())
840 return objfile_up.release();
841 }
842 return nullptr;
843}
844
846 const lldb_private::FileSpec &file, lldb::DataBufferSP &data_sp,
847 lldb::offset_t data_offset, lldb::offset_t file_offset,
849 const size_t initial_count = specs.GetSize();
850
851 if (ObjectFileMachO::MagicBytesMatch(data_sp, 0, data_sp->GetByteSize())) {
852 DataExtractor data;
853 data.SetData(data_sp);
854 llvm::MachO::mach_header header;
855 if (ParseHeader(data, &data_offset, header)) {
856 size_t header_and_load_cmds =
857 header.sizeofcmds + MachHeaderSizeFromMagic(header.magic);
858 if (header_and_load_cmds >= data_sp->GetByteSize()) {
859 data_sp = MapFileData(file, header_and_load_cmds, file_offset);
860 data.SetData(data_sp);
861 data_offset = MachHeaderSizeFromMagic(header.magic);
862 }
863 if (data_sp) {
864 ModuleSpec base_spec;
865 base_spec.GetFileSpec() = file;
866 base_spec.SetObjectOffset(file_offset);
867 base_spec.SetObjectSize(length);
868 GetAllArchSpecs(header, data, data_offset, base_spec, specs);
869 }
870 }
871 }
872 return specs.GetSize() - initial_count;
873}
874
876 static ConstString g_segment_name_TEXT("__TEXT");
877 return g_segment_name_TEXT;
878}
879
881 static ConstString g_segment_name_DATA("__DATA");
882 return g_segment_name_DATA;
883}
884
886 static ConstString g_segment_name("__DATA_DIRTY");
887 return g_segment_name;
888}
889
891 static ConstString g_segment_name("__DATA_CONST");
892 return g_segment_name;
893}
894
896 static ConstString g_segment_name_OBJC("__OBJC");
897 return g_segment_name_OBJC;
898}
899
901 static ConstString g_section_name_LINKEDIT("__LINKEDIT");
902 return g_section_name_LINKEDIT;
903}
904
906 static ConstString g_section_name("__DWARF");
907 return g_section_name;
908}
909
911 static ConstString g_section_name_eh_frame("__eh_frame");
912 return g_section_name_eh_frame;
913}
914
915bool ObjectFileMachO::MagicBytesMatch(DataBufferSP data_sp,
916 lldb::addr_t data_offset,
917 lldb::addr_t data_length) {
918 DataExtractor data;
919 data.SetData(data_sp, data_offset, data_length);
920 lldb::offset_t offset = 0;
921 uint32_t magic = data.GetU32(&offset);
922
923 offset += 4; // cputype
924 offset += 4; // cpusubtype
925 uint32_t filetype = data.GetU32(&offset);
926
927 // A fileset has a Mach-O header but is not an
928 // individual file and must be handled via an
929 // ObjectContainer plugin.
930 if (filetype == llvm::MachO::MH_FILESET)
931 return false;
932
933 return MachHeaderSizeFromMagic(magic) != 0;
934}
935
936ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp,
937 DataBufferSP data_sp,
938 lldb::offset_t data_offset,
939 const FileSpec *file,
940 lldb::offset_t file_offset,
941 lldb::offset_t length)
942 : ObjectFile(module_sp, file, file_offset, length, data_sp, data_offset),
943 m_mach_sections(), m_entry_point_address(), m_thread_context_offsets(),
944 m_thread_context_offsets_valid(false), m_reexported_dylibs(),
945 m_allow_assembly_emulation_unwind_plans(true) {
946 ::memset(&m_header, 0, sizeof(m_header));
947 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
948}
949
950ObjectFileMachO::ObjectFileMachO(const lldb::ModuleSP &module_sp,
951 lldb::WritableDataBufferSP header_data_sp,
952 const lldb::ProcessSP &process_sp,
953 lldb::addr_t header_addr)
954 : ObjectFile(module_sp, process_sp, header_addr, header_data_sp),
955 m_mach_sections(), m_entry_point_address(), m_thread_context_offsets(),
956 m_thread_context_offsets_valid(false), m_reexported_dylibs(),
957 m_allow_assembly_emulation_unwind_plans(true) {
958 ::memset(&m_header, 0, sizeof(m_header));
959 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
960}
961
963 lldb::offset_t *data_offset_ptr,
964 llvm::MachO::mach_header &header) {
966 // Leave magic in the original byte order
967 header.magic = data.GetU32(data_offset_ptr);
968 bool can_parse = false;
969 bool is_64_bit = false;
970 switch (header.magic) {
971 case MH_MAGIC:
973 data.SetAddressByteSize(4);
974 can_parse = true;
975 break;
976
977 case MH_MAGIC_64:
979 data.SetAddressByteSize(8);
980 can_parse = true;
981 is_64_bit = true;
982 break;
983
984 case MH_CIGAM:
987 : eByteOrderBig);
988 data.SetAddressByteSize(4);
989 can_parse = true;
990 break;
991
992 case MH_CIGAM_64:
995 : eByteOrderBig);
996 data.SetAddressByteSize(8);
997 is_64_bit = true;
998 can_parse = true;
999 break;
1000
1001 default:
1002 break;
1003 }
1004
1005 if (can_parse) {
1006 data.GetU32(data_offset_ptr, &header.cputype, 6);
1007 if (is_64_bit)
1008 *data_offset_ptr += 4;
1009 return true;
1010 } else {
1011 memset(&header, 0, sizeof(header));
1012 }
1013 return false;
1014}
1015
1017 ModuleSP module_sp(GetModule());
1018 if (!module_sp)
1019 return false;
1020
1021 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
1022 bool can_parse = false;
1023 lldb::offset_t offset = 0;
1025 // Leave magic in the original byte order
1026 m_header.magic = m_data.GetU32(&offset);
1027 switch (m_header.magic) {
1028 case MH_MAGIC:
1031 can_parse = true;
1032 break;
1033
1034 case MH_MAGIC_64:
1037 can_parse = true;
1038 break;
1039
1040 case MH_CIGAM:
1043 : eByteOrderBig);
1045 can_parse = true;
1046 break;
1047
1048 case MH_CIGAM_64:
1051 : eByteOrderBig);
1053 can_parse = true;
1054 break;
1055
1056 default:
1057 break;
1058 }
1059
1060 if (can_parse) {
1061 m_data.GetU32(&offset, &m_header.cputype, 6);
1062
1063 ModuleSpecList all_specs;
1064 ModuleSpec base_spec;
1066 base_spec, all_specs);
1067
1068 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
1069 ArchSpec mach_arch =
1071
1072 // Check if the module has a required architecture
1073 const ArchSpec &module_arch = module_sp->GetArchitecture();
1074 if (module_arch.IsValid() && !module_arch.IsCompatibleMatch(mach_arch))
1075 continue;
1076
1077 if (SetModulesArchitecture(mach_arch)) {
1078 const size_t header_and_lc_size =
1079 m_header.sizeofcmds + MachHeaderSizeFromMagic(m_header.magic);
1080 if (m_data.GetByteSize() < header_and_lc_size) {
1081 DataBufferSP data_sp;
1082 ProcessSP process_sp(m_process_wp.lock());
1083 if (process_sp) {
1084 data_sp = ReadMemory(process_sp, m_memory_addr, header_and_lc_size);
1085 } else {
1086 // Read in all only the load command data from the file on disk
1087 data_sp = MapFileData(m_file, header_and_lc_size, m_file_offset);
1088 if (data_sp->GetByteSize() != header_and_lc_size)
1089 continue;
1090 }
1091 if (data_sp)
1092 m_data.SetData(data_sp);
1093 }
1094 }
1095 return true;
1096 }
1097 // None found.
1098 return false;
1099 } else {
1100 memset(&m_header, 0, sizeof(struct llvm::MachO::mach_header));
1101 }
1102 return false;
1103}
1104
1106 return m_data.GetByteOrder();
1107}
1108
1110 return m_header.filetype == MH_EXECUTE;
1111}
1112
1114 return m_header.filetype == MH_DYLINKER;
1115}
1116
1118 return m_header.flags & MH_DYLIB_IN_CACHE;
1119}
1120
1122 return m_header.filetype == MH_KEXT_BUNDLE;
1123}
1124
1126 return m_data.GetAddressByteSize();
1127}
1128
1130 Symtab *symtab = GetSymtab();
1131 if (!symtab)
1132 return AddressClass::eUnknown;
1133
1134 Symbol *symbol = symtab->FindSymbolContainingFileAddress(file_addr);
1135 if (symbol) {
1136 if (symbol->ValueIsAddress()) {
1137 SectionSP section_sp(symbol->GetAddressRef().GetSection());
1138 if (section_sp) {
1139 const lldb::SectionType section_type = section_sp->GetType();
1140 switch (section_type) {
1142 return AddressClass::eUnknown;
1143
1144 case eSectionTypeCode:
1145 if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1146 // For ARM we have a bit in the n_desc field of the symbol that
1147 // tells us ARM/Thumb which is bit 0x0008.
1149 return AddressClass::eCodeAlternateISA;
1150 }
1151 return AddressClass::eCode;
1152
1154 return AddressClass::eUnknown;
1155
1156 case eSectionTypeData:
1160 case eSectionTypeData4:
1161 case eSectionTypeData8:
1162 case eSectionTypeData16:
1168 return AddressClass::eData;
1169
1170 case eSectionTypeDebug:
1205 return AddressClass::eDebug;
1206
1211 return AddressClass::eRuntime;
1212
1218 case eSectionTypeOther:
1219 return AddressClass::eUnknown;
1220 }
1221 }
1222 }
1223
1224 const SymbolType symbol_type = symbol->GetType();
1225 switch (symbol_type) {
1226 case eSymbolTypeAny:
1227 return AddressClass::eUnknown;
1229 return AddressClass::eUnknown;
1230
1231 case eSymbolTypeCode:
1234 if (m_header.cputype == llvm::MachO::CPU_TYPE_ARM) {
1235 // For ARM we have a bit in the n_desc field of the symbol that tells
1236 // us ARM/Thumb which is bit 0x0008.
1238 return AddressClass::eCodeAlternateISA;
1239 }
1240 return AddressClass::eCode;
1241
1242 case eSymbolTypeData:
1243 return AddressClass::eData;
1244 case eSymbolTypeRuntime:
1245 return AddressClass::eRuntime;
1247 return AddressClass::eRuntime;
1249 return AddressClass::eDebug;
1251 return AddressClass::eDebug;
1253 return AddressClass::eDebug;
1255 return AddressClass::eDebug;
1256 case eSymbolTypeBlock:
1257 return AddressClass::eDebug;
1258 case eSymbolTypeLocal:
1259 return AddressClass::eData;
1260 case eSymbolTypeParam:
1261 return AddressClass::eData;
1263 return AddressClass::eData;
1265 return AddressClass::eDebug;
1267 return AddressClass::eDebug;
1269 return AddressClass::eDebug;
1271 return AddressClass::eDebug;
1273 return AddressClass::eDebug;
1275 return AddressClass::eUnknown;
1277 return AddressClass::eDebug;
1279 return AddressClass::eDebug;
1281 return AddressClass::eUnknown;
1283 return AddressClass::eRuntime;
1285 return AddressClass::eRuntime;
1287 return AddressClass::eRuntime;
1289 return AddressClass::eRuntime;
1290 }
1291 }
1292 return AddressClass::eUnknown;
1293}
1294
1296 if (m_dysymtab.cmd == 0) {
1297 ModuleSP module_sp(GetModule());
1298 if (module_sp) {
1300 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1301 const lldb::offset_t load_cmd_offset = offset;
1302
1303 llvm::MachO::load_command lc = {};
1304 if (m_data.GetU32(&offset, &lc.cmd, 2) == nullptr)
1305 break;
1306 if (lc.cmd == LC_DYSYMTAB) {
1307 m_dysymtab.cmd = lc.cmd;
1308 m_dysymtab.cmdsize = lc.cmdsize;
1309 if (m_data.GetU32(&offset, &m_dysymtab.ilocalsym,
1310 (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2) ==
1311 nullptr) {
1312 // Clear m_dysymtab if we were unable to read all items from the
1313 // load command
1314 ::memset(&m_dysymtab, 0, sizeof(m_dysymtab));
1315 }
1316 }
1317 offset = load_cmd_offset + lc.cmdsize;
1318 }
1319 }
1320 }
1321 if (m_dysymtab.cmd)
1322 return m_dysymtab.nlocalsym <= 1;
1323 return false;
1324}
1325
1327 EncryptedFileRanges result;
1329
1330 llvm::MachO::encryption_info_command encryption_cmd;
1331 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1332 const lldb::offset_t load_cmd_offset = offset;
1333 if (m_data.GetU32(&offset, &encryption_cmd, 2) == nullptr)
1334 break;
1335
1336 // LC_ENCRYPTION_INFO and LC_ENCRYPTION_INFO_64 have the same sizes for the
1337 // 3 fields we care about, so treat them the same.
1338 if (encryption_cmd.cmd == LC_ENCRYPTION_INFO ||
1339 encryption_cmd.cmd == LC_ENCRYPTION_INFO_64) {
1340 if (m_data.GetU32(&offset, &encryption_cmd.cryptoff, 3)) {
1341 if (encryption_cmd.cryptid != 0) {
1343 entry.SetRangeBase(encryption_cmd.cryptoff);
1344 entry.SetByteSize(encryption_cmd.cryptsize);
1345 result.Append(entry);
1346 }
1347 }
1348 }
1349 offset = load_cmd_offset + encryption_cmd.cmdsize;
1350 }
1351
1352 return result;
1353}
1354
1356 llvm::MachO::segment_command_64 &seg_cmd, uint32_t cmd_idx) {
1357 if (m_length == 0 || seg_cmd.filesize == 0)
1358 return;
1359
1360 if (IsSharedCacheBinary() && !IsInMemory()) {
1361 // In shared cache images, the load commands are relative to the
1362 // shared cache file, and not the specific image we are
1363 // examining. Let's fix this up so that it looks like a normal
1364 // image.
1365 if (strncmp(seg_cmd.segname, GetSegmentNameTEXT().GetCString(),
1366 sizeof(seg_cmd.segname)) == 0)
1367 m_text_address = seg_cmd.vmaddr;
1368 if (strncmp(seg_cmd.segname, GetSegmentNameLINKEDIT().GetCString(),
1369 sizeof(seg_cmd.segname)) == 0)
1370 m_linkedit_original_offset = seg_cmd.fileoff;
1371
1372 seg_cmd.fileoff = seg_cmd.vmaddr - m_text_address;
1373 }
1374
1375 if (seg_cmd.fileoff > m_length) {
1376 // We have a load command that says it extends past the end of the file.
1377 // This is likely a corrupt file. We don't have any way to return an error
1378 // condition here (this method was likely invoked from something like
1379 // ObjectFile::GetSectionList()), so we just null out the section contents,
1380 // and dump a message to stdout. The most common case here is core file
1381 // debugging with a truncated file.
1382 const char *lc_segment_name =
1383 seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1384 GetModule()->ReportWarning(
1385 "load command {0} {1} has a fileoff ({2:x16}) that extends beyond "
1386 "the end of the file ({3:x16}), ignoring this section",
1387 cmd_idx, lc_segment_name, seg_cmd.fileoff, m_length);
1388
1389 seg_cmd.fileoff = 0;
1390 seg_cmd.filesize = 0;
1391 }
1392
1393 if (seg_cmd.fileoff + seg_cmd.filesize > m_length) {
1394 // We have a load command that says it extends past the end of the file.
1395 // This is likely a corrupt file. We don't have any way to return an error
1396 // condition here (this method was likely invoked from something like
1397 // ObjectFile::GetSectionList()), so we just null out the section contents,
1398 // and dump a message to stdout. The most common case here is core file
1399 // debugging with a truncated file.
1400 const char *lc_segment_name =
1401 seg_cmd.cmd == LC_SEGMENT_64 ? "LC_SEGMENT_64" : "LC_SEGMENT";
1402 GetModule()->ReportWarning(
1403 "load command {0} {1} has a fileoff + filesize ({2:x16}) that "
1404 "extends beyond the end of the file ({4:x16}), the segment will be "
1405 "truncated to match",
1406 cmd_idx, lc_segment_name, seg_cmd.fileoff + seg_cmd.filesize, m_length);
1407
1408 // Truncate the length
1409 seg_cmd.filesize = m_length - seg_cmd.fileoff;
1410 }
1411}
1412
1413static uint32_t
1414GetSegmentPermissions(const llvm::MachO::segment_command_64 &seg_cmd) {
1415 uint32_t result = 0;
1416 if (seg_cmd.initprot & VM_PROT_READ)
1417 result |= ePermissionsReadable;
1418 if (seg_cmd.initprot & VM_PROT_WRITE)
1419 result |= ePermissionsWritable;
1420 if (seg_cmd.initprot & VM_PROT_EXECUTE)
1421 result |= ePermissionsExecutable;
1422 return result;
1423}
1424
1426 ConstString section_name) {
1427
1428 if (flags & (S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS))
1429 return eSectionTypeCode;
1430
1431 uint32_t mach_sect_type = flags & SECTION_TYPE;
1432 static ConstString g_sect_name_objc_data("__objc_data");
1433 static ConstString g_sect_name_objc_msgrefs("__objc_msgrefs");
1434 static ConstString g_sect_name_objc_selrefs("__objc_selrefs");
1435 static ConstString g_sect_name_objc_classrefs("__objc_classrefs");
1436 static ConstString g_sect_name_objc_superrefs("__objc_superrefs");
1437 static ConstString g_sect_name_objc_const("__objc_const");
1438 static ConstString g_sect_name_objc_classlist("__objc_classlist");
1439 static ConstString g_sect_name_cfstring("__cfstring");
1440
1441 static ConstString g_sect_name_dwarf_debug_abbrev("__debug_abbrev");
1442 static ConstString g_sect_name_dwarf_debug_aranges("__debug_aranges");
1443 static ConstString g_sect_name_dwarf_debug_frame("__debug_frame");
1444 static ConstString g_sect_name_dwarf_debug_info("__debug_info");
1445 static ConstString g_sect_name_dwarf_debug_line("__debug_line");
1446 static ConstString g_sect_name_dwarf_debug_loc("__debug_loc");
1447 static ConstString g_sect_name_dwarf_debug_loclists("__debug_loclists");
1448 static ConstString g_sect_name_dwarf_debug_macinfo("__debug_macinfo");
1449 static ConstString g_sect_name_dwarf_debug_names("__debug_names");
1450 static ConstString g_sect_name_dwarf_debug_pubnames("__debug_pubnames");
1451 static ConstString g_sect_name_dwarf_debug_pubtypes("__debug_pubtypes");
1452 static ConstString g_sect_name_dwarf_debug_ranges("__debug_ranges");
1453 static ConstString g_sect_name_dwarf_debug_str("__debug_str");
1454 static ConstString g_sect_name_dwarf_debug_types("__debug_types");
1455 static ConstString g_sect_name_dwarf_apple_names("__apple_names");
1456 static ConstString g_sect_name_dwarf_apple_types("__apple_types");
1457 static ConstString g_sect_name_dwarf_apple_namespaces("__apple_namespac");
1458 static ConstString g_sect_name_dwarf_apple_objc("__apple_objc");
1459 static ConstString g_sect_name_eh_frame("__eh_frame");
1460 static ConstString g_sect_name_compact_unwind("__unwind_info");
1461 static ConstString g_sect_name_text("__text");
1462 static ConstString g_sect_name_data("__data");
1463 static ConstString g_sect_name_go_symtab("__gosymtab");
1464
1465 if (section_name == g_sect_name_dwarf_debug_abbrev)
1467 if (section_name == g_sect_name_dwarf_debug_aranges)
1469 if (section_name == g_sect_name_dwarf_debug_frame)
1471 if (section_name == g_sect_name_dwarf_debug_info)
1473 if (section_name == g_sect_name_dwarf_debug_line)
1475 if (section_name == g_sect_name_dwarf_debug_loc)
1477 if (section_name == g_sect_name_dwarf_debug_loclists)
1479 if (section_name == g_sect_name_dwarf_debug_macinfo)
1481 if (section_name == g_sect_name_dwarf_debug_names)
1483 if (section_name == g_sect_name_dwarf_debug_pubnames)
1485 if (section_name == g_sect_name_dwarf_debug_pubtypes)
1487 if (section_name == g_sect_name_dwarf_debug_ranges)
1489 if (section_name == g_sect_name_dwarf_debug_str)
1491 if (section_name == g_sect_name_dwarf_debug_types)
1493 if (section_name == g_sect_name_dwarf_apple_names)
1495 if (section_name == g_sect_name_dwarf_apple_types)
1497 if (section_name == g_sect_name_dwarf_apple_namespaces)
1499 if (section_name == g_sect_name_dwarf_apple_objc)
1501 if (section_name == g_sect_name_objc_selrefs)
1503 if (section_name == g_sect_name_objc_msgrefs)
1505 if (section_name == g_sect_name_eh_frame)
1506 return eSectionTypeEHFrame;
1507 if (section_name == g_sect_name_compact_unwind)
1509 if (section_name == g_sect_name_cfstring)
1511 if (section_name == g_sect_name_go_symtab)
1512 return eSectionTypeGoSymtab;
1513 if (section_name == g_sect_name_objc_data ||
1514 section_name == g_sect_name_objc_classrefs ||
1515 section_name == g_sect_name_objc_superrefs ||
1516 section_name == g_sect_name_objc_const ||
1517 section_name == g_sect_name_objc_classlist) {
1519 }
1520
1521 switch (mach_sect_type) {
1522 // TODO: categorize sections by other flags for regular sections
1523 case S_REGULAR:
1524 if (section_name == g_sect_name_text)
1525 return eSectionTypeCode;
1526 if (section_name == g_sect_name_data)
1527 return eSectionTypeData;
1528 return eSectionTypeOther;
1529 case S_ZEROFILL:
1530 return eSectionTypeZeroFill;
1531 case S_CSTRING_LITERALS: // section with only literal C strings
1533 case S_4BYTE_LITERALS: // section with only 4 byte literals
1534 return eSectionTypeData4;
1535 case S_8BYTE_LITERALS: // section with only 8 byte literals
1536 return eSectionTypeData8;
1537 case S_LITERAL_POINTERS: // section with only pointers to literals
1539 case S_NON_LAZY_SYMBOL_POINTERS: // section with only non-lazy symbol pointers
1541 case S_LAZY_SYMBOL_POINTERS: // section with only lazy symbol pointers
1543 case S_SYMBOL_STUBS: // section with only symbol stubs, byte size of stub in
1544 // the reserved2 field
1545 return eSectionTypeCode;
1546 case S_MOD_INIT_FUNC_POINTERS: // section with only function pointers for
1547 // initialization
1549 case S_MOD_TERM_FUNC_POINTERS: // section with only function pointers for
1550 // termination
1552 case S_COALESCED:
1553 return eSectionTypeOther;
1554 case S_GB_ZEROFILL:
1555 return eSectionTypeZeroFill;
1556 case S_INTERPOSING: // section with only pairs of function pointers for
1557 // interposing
1558 return eSectionTypeCode;
1559 case S_16BYTE_LITERALS: // section with only 16 byte literals
1560 return eSectionTypeData16;
1561 case S_DTRACE_DOF:
1562 return eSectionTypeDebug;
1563 case S_LAZY_DYLIB_SYMBOL_POINTERS:
1565 default:
1566 return eSectionTypeOther;
1567 }
1568}
1569
1576
1580};
1581
1583 const llvm::MachO::load_command &load_cmd_, lldb::offset_t offset,
1584 uint32_t cmd_idx, SegmentParsingContext &context) {
1585 llvm::MachO::segment_command_64 load_cmd;
1586 memcpy(&load_cmd, &load_cmd_, sizeof(load_cmd_));
1587
1588 if (!m_data.GetU8(&offset, (uint8_t *)load_cmd.segname, 16))
1589 return;
1590
1591 ModuleSP module_sp = GetModule();
1592 const bool is_core = GetType() == eTypeCoreFile;
1593 const bool is_dsym = (m_header.filetype == MH_DSYM);
1594 bool add_section = true;
1595 bool add_to_unified = true;
1596 ConstString const_segname(
1597 load_cmd.segname, strnlen(load_cmd.segname, sizeof(load_cmd.segname)));
1598
1599 SectionSP unified_section_sp(
1600 context.UnifiedList.FindSectionByName(const_segname));
1601 if (is_dsym && unified_section_sp) {
1602 if (const_segname == GetSegmentNameLINKEDIT()) {
1603 // We need to keep the __LINKEDIT segment private to this object file
1604 // only
1605 add_to_unified = false;
1606 } else {
1607 // This is the dSYM file and this section has already been created by the
1608 // object file, no need to create it.
1609 add_section = false;
1610 }
1611 }
1612 load_cmd.vmaddr = m_data.GetAddress(&offset);
1613 load_cmd.vmsize = m_data.GetAddress(&offset);
1614 load_cmd.fileoff = m_data.GetAddress(&offset);
1615 load_cmd.filesize = m_data.GetAddress(&offset);
1616 if (!m_data.GetU32(&offset, &load_cmd.maxprot, 4))
1617 return;
1618
1619 SanitizeSegmentCommand(load_cmd, cmd_idx);
1620
1621 const uint32_t segment_permissions = GetSegmentPermissions(load_cmd);
1622 const bool segment_is_encrypted =
1623 (load_cmd.flags & SG_PROTECTED_VERSION_1) != 0;
1624
1625 // Use a segment ID of the segment index shifted left by 8 so they never
1626 // conflict with any of the sections.
1627 SectionSP segment_sp;
1628 if (add_section && (const_segname || is_core)) {
1629 segment_sp = std::make_shared<Section>(
1630 module_sp, // Module to which this section belongs
1631 this, // Object file to which this sections belongs
1632 ++context.NextSegmentIdx
1633 << 8, // Section ID is the 1 based segment index
1634 // shifted right by 8 bits as not to collide with any of the 256
1635 // section IDs that are possible
1636 const_segname, // Name of this section
1637 eSectionTypeContainer, // This section is a container of other
1638 // sections.
1639 load_cmd.vmaddr, // File VM address == addresses as they are
1640 // found in the object file
1641 load_cmd.vmsize, // VM size in bytes of this section
1642 load_cmd.fileoff, // Offset to the data for this section in
1643 // the file
1644 load_cmd.filesize, // Size in bytes of this section as found
1645 // in the file
1646 0, // Segments have no alignment information
1647 load_cmd.flags); // Flags for this section
1648
1649 segment_sp->SetIsEncrypted(segment_is_encrypted);
1650 m_sections_up->AddSection(segment_sp);
1651 segment_sp->SetPermissions(segment_permissions);
1652 if (add_to_unified)
1653 context.UnifiedList.AddSection(segment_sp);
1654 } else if (unified_section_sp) {
1655 // If this is a dSYM and the file addresses in the dSYM differ from the
1656 // file addresses in the ObjectFile, we must use the file base address for
1657 // the Section from the dSYM for the DWARF to resolve correctly.
1658 // This only happens with binaries in the shared cache in practice;
1659 // normally a mismatch like this would give a binary & dSYM that do not
1660 // match UUIDs. When a binary is included in the shared cache, its
1661 // segments are rearranged to optimize the shared cache, so its file
1662 // addresses will differ from what the ObjectFile had originally,
1663 // and what the dSYM has.
1664 if (is_dsym && unified_section_sp->GetFileAddress() != load_cmd.vmaddr) {
1665 Log *log = GetLog(LLDBLog::Symbols);
1666 if (log) {
1667 log->Printf(
1668 "Installing dSYM's %s segment file address over ObjectFile's "
1669 "so symbol table/debug info resolves correctly for %s",
1670 const_segname.AsCString(),
1671 module_sp->GetFileSpec().GetFilename().AsCString());
1672 }
1673
1674 // Make sure we've parsed the symbol table from the ObjectFile before
1675 // we go around changing its Sections.
1676 module_sp->GetObjectFile()->GetSymtab();
1677 // eh_frame would present the same problems but we parse that on a per-
1678 // function basis as-needed so it's more difficult to remove its use of
1679 // the Sections. Realistically, the environments where this code path
1680 // will be taken will not have eh_frame sections.
1681
1682 unified_section_sp->SetFileAddress(load_cmd.vmaddr);
1683
1684 // Notify the module that the section addresses have been changed once
1685 // we're done so any file-address caches can be updated.
1686 context.FileAddressesChanged = true;
1687 }
1688 m_sections_up->AddSection(unified_section_sp);
1689 }
1690
1691 llvm::MachO::section_64 sect64;
1692 ::memset(&sect64, 0, sizeof(sect64));
1693 // Push a section into our mach sections for the section at index zero
1694 // (NO_SECT) if we don't have any mach sections yet...
1695 if (m_mach_sections.empty())
1696 m_mach_sections.push_back(sect64);
1697 uint32_t segment_sect_idx;
1698 const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1;
1699
1700 const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8;
1701 for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects;
1702 ++segment_sect_idx) {
1703 if (m_data.GetU8(&offset, (uint8_t *)sect64.sectname,
1704 sizeof(sect64.sectname)) == nullptr)
1705 break;
1706 if (m_data.GetU8(&offset, (uint8_t *)sect64.segname,
1707 sizeof(sect64.segname)) == nullptr)
1708 break;
1709 sect64.addr = m_data.GetAddress(&offset);
1710 sect64.size = m_data.GetAddress(&offset);
1711
1712 if (m_data.GetU32(&offset, &sect64.offset, num_u32s) == nullptr)
1713 break;
1714
1715 if (IsSharedCacheBinary() && !IsInMemory()) {
1716 sect64.offset = sect64.addr - m_text_address;
1717 }
1718
1719 // Keep a list of mach sections around in case we need to get at data that
1720 // isn't stored in the abstracted Sections.
1721 m_mach_sections.push_back(sect64);
1722
1723 if (add_section) {
1724 ConstString section_name(
1725 sect64.sectname, strnlen(sect64.sectname, sizeof(sect64.sectname)));
1726 if (!const_segname) {
1727 // We have a segment with no name so we need to conjure up segments
1728 // that correspond to the section's segname if there isn't already such
1729 // a section. If there is such a section, we resize the section so that
1730 // it spans all sections. We also mark these sections as fake so
1731 // address matches don't hit if they land in the gaps between the child
1732 // sections.
1733 const_segname.SetTrimmedCStringWithLength(sect64.segname,
1734 sizeof(sect64.segname));
1735 segment_sp = context.UnifiedList.FindSectionByName(const_segname);
1736 if (segment_sp.get()) {
1737 Section *segment = segment_sp.get();
1738 // Grow the section size as needed.
1739 const lldb::addr_t sect64_min_addr = sect64.addr;
1740 const lldb::addr_t sect64_max_addr = sect64_min_addr + sect64.size;
1741 const lldb::addr_t curr_seg_byte_size = segment->GetByteSize();
1742 const lldb::addr_t curr_seg_min_addr = segment->GetFileAddress();
1743 const lldb::addr_t curr_seg_max_addr =
1744 curr_seg_min_addr + curr_seg_byte_size;
1745 if (sect64_min_addr >= curr_seg_min_addr) {
1746 const lldb::addr_t new_seg_byte_size =
1747 sect64_max_addr - curr_seg_min_addr;
1748 // Only grow the section size if needed
1749 if (new_seg_byte_size > curr_seg_byte_size)
1750 segment->SetByteSize(new_seg_byte_size);
1751 } else {
1752 // We need to change the base address of the segment and adjust the
1753 // child section offsets for all existing children.
1754 const lldb::addr_t slide_amount =
1755 sect64_min_addr - curr_seg_min_addr;
1756 segment->Slide(slide_amount, false);
1757 segment->GetChildren().Slide(-slide_amount, false);
1758 segment->SetByteSize(curr_seg_max_addr - sect64_min_addr);
1759 }
1760
1761 // Grow the section size as needed.
1762 if (sect64.offset) {
1763 const lldb::addr_t segment_min_file_offset =
1764 segment->GetFileOffset();
1765 const lldb::addr_t segment_max_file_offset =
1766 segment_min_file_offset + segment->GetFileSize();
1767
1768 const lldb::addr_t section_min_file_offset = sect64.offset;
1769 const lldb::addr_t section_max_file_offset =
1770 section_min_file_offset + sect64.size;
1771 const lldb::addr_t new_file_offset =
1772 std::min(section_min_file_offset, segment_min_file_offset);
1773 const lldb::addr_t new_file_size =
1774 std::max(section_max_file_offset, segment_max_file_offset) -
1775 new_file_offset;
1776 segment->SetFileOffset(new_file_offset);
1777 segment->SetFileSize(new_file_size);
1778 }
1779 } else {
1780 // Create a fake section for the section's named segment
1781 segment_sp = std::make_shared<Section>(
1782 segment_sp, // Parent section
1783 module_sp, // Module to which this section belongs
1784 this, // Object file to which this section belongs
1785 ++context.NextSegmentIdx
1786 << 8, // Section ID is the 1 based segment index
1787 // shifted right by 8 bits as not to
1788 // collide with any of the 256 section IDs
1789 // that are possible
1790 const_segname, // Name of this section
1791 eSectionTypeContainer, // This section is a container of
1792 // other sections.
1793 sect64.addr, // File VM address == addresses as they are
1794 // found in the object file
1795 sect64.size, // VM size in bytes of this section
1796 sect64.offset, // Offset to the data for this section in
1797 // the file
1798 sect64.offset ? sect64.size : 0, // Size in bytes of
1799 // this section as
1800 // found in the file
1801 sect64.align,
1802 load_cmd.flags); // Flags for this section
1803 segment_sp->SetIsFake(true);
1804 segment_sp->SetPermissions(segment_permissions);
1805 m_sections_up->AddSection(segment_sp);
1806 if (add_to_unified)
1807 context.UnifiedList.AddSection(segment_sp);
1808 segment_sp->SetIsEncrypted(segment_is_encrypted);
1809 }
1810 }
1811 assert(segment_sp.get());
1812
1813 lldb::SectionType sect_type = GetSectionType(sect64.flags, section_name);
1814
1815 SectionSP section_sp(new Section(
1816 segment_sp, module_sp, this, ++context.NextSectionIdx, section_name,
1817 sect_type, sect64.addr - segment_sp->GetFileAddress(), sect64.size,
1818 sect64.offset, sect64.offset == 0 ? 0 : sect64.size, sect64.align,
1819 sect64.flags));
1820 // Set the section to be encrypted to match the segment
1821
1822 bool section_is_encrypted = false;
1823 if (!segment_is_encrypted && load_cmd.filesize != 0)
1824 section_is_encrypted = context.EncryptedRanges.FindEntryThatContains(
1825 sect64.offset) != nullptr;
1826
1827 section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted);
1828 section_sp->SetPermissions(segment_permissions);
1829 segment_sp->GetChildren().AddSection(section_sp);
1830
1831 if (segment_sp->IsFake()) {
1832 segment_sp.reset();
1833 const_segname.Clear();
1834 }
1835 }
1836 }
1837 if (segment_sp && is_dsym) {
1838 if (first_segment_sectID <= context.NextSectionIdx) {
1839 lldb::user_id_t sect_uid;
1840 for (sect_uid = first_segment_sectID; sect_uid <= context.NextSectionIdx;
1841 ++sect_uid) {
1842 SectionSP curr_section_sp(
1843 segment_sp->GetChildren().FindSectionByID(sect_uid));
1844 SectionSP next_section_sp;
1845 if (sect_uid + 1 <= context.NextSectionIdx)
1846 next_section_sp =
1847 segment_sp->GetChildren().FindSectionByID(sect_uid + 1);
1848
1849 if (curr_section_sp.get()) {
1850 if (curr_section_sp->GetByteSize() == 0) {
1851 if (next_section_sp.get() != nullptr)
1852 curr_section_sp->SetByteSize(next_section_sp->GetFileAddress() -
1853 curr_section_sp->GetFileAddress());
1854 else
1855 curr_section_sp->SetByteSize(load_cmd.vmsize);
1856 }
1857 }
1858 }
1859 }
1860 }
1861}
1862
1864 const llvm::MachO::load_command &load_cmd, lldb::offset_t offset) {
1865 m_dysymtab.cmd = load_cmd.cmd;
1866 m_dysymtab.cmdsize = load_cmd.cmdsize;
1867 m_data.GetU32(&offset, &m_dysymtab.ilocalsym,
1868 (sizeof(m_dysymtab) / sizeof(uint32_t)) - 2);
1869}
1870
1872 if (m_sections_up)
1873 return;
1874
1875 m_sections_up = std::make_unique<SectionList>();
1876
1878 // bool dump_sections = false;
1879 ModuleSP module_sp(GetModule());
1880
1881 offset = MachHeaderSizeFromMagic(m_header.magic);
1882
1883 SegmentParsingContext context(GetEncryptedFileRanges(), unified_section_list);
1884 llvm::MachO::load_command load_cmd;
1885 for (uint32_t i = 0; i < m_header.ncmds; ++i) {
1886 const lldb::offset_t load_cmd_offset = offset;
1887 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
1888 break;
1889
1890 if (load_cmd.cmd == LC_SEGMENT || load_cmd.cmd == LC_SEGMENT_64)
1891 ProcessSegmentCommand(load_cmd, offset, i, context);
1892 else if (load_cmd.cmd == LC_DYSYMTAB)
1893 ProcessDysymtabCommand(load_cmd, offset);
1894
1895 offset = load_cmd_offset + load_cmd.cmdsize;
1896 }
1897
1898 if (context.FileAddressesChanged && module_sp)
1899 module_sp->SectionFileAddressesChanged();
1900}
1901
1903public:
1905 : m_section_list(section_list), m_section_infos() {
1906 // Get the number of sections down to a depth of 1 to include all segments
1907 // and their sections, but no other sections that may be added for debug
1908 // map or
1909 m_section_infos.resize(section_list->GetNumSections(1));
1910 }
1911
1912 SectionSP GetSection(uint8_t n_sect, addr_t file_addr) {
1913 if (n_sect == 0)
1914 return SectionSP();
1915 if (n_sect < m_section_infos.size()) {
1916 if (!m_section_infos[n_sect].section_sp) {
1917 SectionSP section_sp(m_section_list->FindSectionByID(n_sect));
1918 m_section_infos[n_sect].section_sp = section_sp;
1919 if (section_sp) {
1920 m_section_infos[n_sect].vm_range.SetBaseAddress(
1921 section_sp->GetFileAddress());
1922 m_section_infos[n_sect].vm_range.SetByteSize(
1923 section_sp->GetByteSize());
1924 } else {
1925 std::string filename = "<unknown>";
1926 SectionSP first_section_sp(m_section_list->GetSectionAtIndex(0));
1927 if (first_section_sp)
1928 filename = first_section_sp->GetObjectFile()->GetFileSpec().GetPath();
1929
1931 llvm::formatv("unable to find section {0} for a symbol in "
1932 "{1}, corrupt file?",
1933 n_sect, filename));
1934 }
1935 }
1936 if (m_section_infos[n_sect].vm_range.Contains(file_addr)) {
1937 // Symbol is in section.
1938 return m_section_infos[n_sect].section_sp;
1939 } else if (m_section_infos[n_sect].vm_range.GetByteSize() == 0 &&
1940 m_section_infos[n_sect].vm_range.GetBaseAddress() ==
1941 file_addr) {
1942 // Symbol is in section with zero size, but has the same start address
1943 // as the section. This can happen with linker symbols (symbols that
1944 // start with the letter 'l' or 'L'.
1945 return m_section_infos[n_sect].section_sp;
1946 }
1947 }
1949 }
1950
1951protected:
1954
1956 SectionSP section_sp;
1957 };
1959 std::vector<SectionInfo> m_section_infos;
1960};
1961
1962#define TRIE_SYMBOL_IS_THUMB (1ULL << 63)
1964 void Dump() const {
1965 printf("0x%16.16llx 0x%16.16llx 0x%16.16llx \"%s\"",
1966 static_cast<unsigned long long>(address),
1967 static_cast<unsigned long long>(flags),
1968 static_cast<unsigned long long>(other), name.GetCString());
1969 if (import_name)
1970 printf(" -> \"%s\"\n", import_name.GetCString());
1971 else
1972 printf("\n");
1973 }
1976 uint64_t flags =
1977 0; // EXPORT_SYMBOL_FLAGS_REEXPORT, EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER,
1978 // TRIE_SYMBOL_IS_THUMB
1979 uint64_t other = 0;
1981};
1982
1986
1988
1989 void Dump(uint32_t idx) const {
1990 printf("[%3u] 0x%16.16llx: ", idx,
1991 static_cast<unsigned long long>(nodeOffset));
1992 entry.Dump();
1993 }
1994
1995 bool operator<(const TrieEntryWithOffset &other) const {
1996 return (nodeOffset < other.nodeOffset);
1997 }
1998};
1999
2001 const bool is_arm, addr_t text_seg_base_addr,
2002 std::vector<llvm::StringRef> &nameSlices,
2003 std::set<lldb::addr_t> &resolver_addresses,
2004 std::vector<TrieEntryWithOffset> &reexports,
2005 std::vector<TrieEntryWithOffset> &ext_symbols) {
2006 if (!data.ValidOffset(offset))
2007 return true;
2008
2009 // Terminal node -- end of a branch, possibly add this to
2010 // the symbol table or resolver table.
2011 const uint64_t terminalSize = data.GetULEB128(&offset);
2012 lldb::offset_t children_offset = offset + terminalSize;
2013 if (terminalSize != 0) {
2014 TrieEntryWithOffset e(offset);
2015 e.entry.flags = data.GetULEB128(&offset);
2016 const char *import_name = nullptr;
2017 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
2018 e.entry.address = 0;
2019 e.entry.other = data.GetULEB128(&offset); // dylib ordinal
2020 import_name = data.GetCStr(&offset);
2021 } else {
2022 e.entry.address = data.GetULEB128(&offset);
2023 if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
2024 e.entry.address += text_seg_base_addr;
2025 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
2026 e.entry.other = data.GetULEB128(&offset);
2027 uint64_t resolver_addr = e.entry.other;
2028 if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
2029 resolver_addr += text_seg_base_addr;
2030 if (is_arm)
2031 resolver_addr &= THUMB_ADDRESS_BIT_MASK;
2032 resolver_addresses.insert(resolver_addr);
2033 } else
2034 e.entry.other = 0;
2035 }
2036 bool add_this_entry = false;
2037 if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT) &&
2038 import_name && import_name[0]) {
2039 // add symbols that are reexport symbols with a valid import name.
2040 add_this_entry = true;
2041 } else if (e.entry.flags == 0 &&
2042 (import_name == nullptr || import_name[0] == '\0')) {
2043 // add externally visible symbols, in case the nlist record has
2044 // been stripped/omitted.
2045 add_this_entry = true;
2046 }
2047 if (add_this_entry) {
2048 std::string name;
2049 if (!nameSlices.empty()) {
2050 for (auto name_slice : nameSlices)
2051 name.append(name_slice.data(), name_slice.size());
2052 }
2053 if (name.size() > 1) {
2054 // Skip the leading '_'
2055 e.entry.name.SetCStringWithLength(name.c_str() + 1, name.size() - 1);
2056 }
2057 if (import_name) {
2058 // Skip the leading '_'
2059 e.entry.import_name.SetCString(import_name + 1);
2060 }
2061 if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT)) {
2062 reexports.push_back(e);
2063 } else {
2064 if (is_arm && (e.entry.address & 1)) {
2067 }
2068 ext_symbols.push_back(e);
2069 }
2070 }
2071 }
2072
2073 const uint8_t childrenCount = data.GetU8(&children_offset);
2074 for (uint8_t i = 0; i < childrenCount; ++i) {
2075 const char *cstr = data.GetCStr(&children_offset);
2076 if (cstr)
2077 nameSlices.push_back(llvm::StringRef(cstr));
2078 else
2079 return false; // Corrupt data
2080 lldb::offset_t childNodeOffset = data.GetULEB128(&children_offset);
2081 if (childNodeOffset) {
2082 if (!ParseTrieEntries(data, childNodeOffset, is_arm, text_seg_base_addr,
2083 nameSlices, resolver_addresses, reexports,
2084 ext_symbols)) {
2085 return false;
2086 }
2087 }
2088 nameSlices.pop_back();
2089 }
2090 return true;
2091}
2092
2093static SymbolType GetSymbolType(const char *&symbol_name,
2094 bool &demangled_is_synthesized,
2095 const SectionSP &text_section_sp,
2096 const SectionSP &data_section_sp,
2097 const SectionSP &data_dirty_section_sp,
2098 const SectionSP &data_const_section_sp,
2099 const SectionSP &symbol_section) {
2101
2102 const char *symbol_sect_name = symbol_section->GetName().AsCString();
2103 if (symbol_section->IsDescendant(text_section_sp.get())) {
2104 if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS |
2105 S_ATTR_SELF_MODIFYING_CODE |
2106 S_ATTR_SOME_INSTRUCTIONS))
2107 type = eSymbolTypeData;
2108 else
2109 type = eSymbolTypeCode;
2110 } else if (symbol_section->IsDescendant(data_section_sp.get()) ||
2111 symbol_section->IsDescendant(data_dirty_section_sp.get()) ||
2112 symbol_section->IsDescendant(data_const_section_sp.get())) {
2113 if (symbol_sect_name &&
2114 ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) {
2115 type = eSymbolTypeRuntime;
2116
2117 if (symbol_name) {
2118 llvm::StringRef symbol_name_ref(symbol_name);
2119 if (symbol_name_ref.startswith("OBJC_")) {
2120 static const llvm::StringRef g_objc_v2_prefix_class("OBJC_CLASS_$_");
2121 static const llvm::StringRef g_objc_v2_prefix_metaclass(
2122 "OBJC_METACLASS_$_");
2123 static const llvm::StringRef g_objc_v2_prefix_ivar("OBJC_IVAR_$_");
2124 if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
2125 symbol_name = symbol_name + g_objc_v2_prefix_class.size();
2126 type = eSymbolTypeObjCClass;
2127 demangled_is_synthesized = true;
2128 } else if (symbol_name_ref.startswith(g_objc_v2_prefix_metaclass)) {
2129 symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
2131 demangled_is_synthesized = true;
2132 } else if (symbol_name_ref.startswith(g_objc_v2_prefix_ivar)) {
2133 symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
2134 type = eSymbolTypeObjCIVar;
2135 demangled_is_synthesized = true;
2136 }
2137 }
2138 }
2139 } else if (symbol_sect_name &&
2140 ::strstr(symbol_sect_name, "__gcc_except_tab") ==
2141 symbol_sect_name) {
2142 type = eSymbolTypeException;
2143 } else {
2144 type = eSymbolTypeData;
2145 }
2146 } else if (symbol_sect_name &&
2147 ::strstr(symbol_sect_name, "__IMPORT") == symbol_sect_name) {
2148 type = eSymbolTypeTrampoline;
2149 }
2150 return type;
2151}
2152
2153static std::optional<struct nlist_64>
2154ParseNList(DataExtractor &nlist_data, lldb::offset_t &nlist_data_offset,
2155 size_t nlist_byte_size) {
2156 struct nlist_64 nlist;
2157 if (!nlist_data.ValidOffsetForDataOfSize(nlist_data_offset, nlist_byte_size))
2158 return {};
2159 nlist.n_strx = nlist_data.GetU32_unchecked(&nlist_data_offset);
2160 nlist.n_type = nlist_data.GetU8_unchecked(&nlist_data_offset);
2161 nlist.n_sect = nlist_data.GetU8_unchecked(&nlist_data_offset);
2162 nlist.n_desc = nlist_data.GetU16_unchecked(&nlist_data_offset);
2163 nlist.n_value = nlist_data.GetAddress_unchecked(&nlist_data_offset);
2164 return nlist;
2165}
2166
2167enum { DebugSymbols = true, NonDebugSymbols = false };
2168
2170 ModuleSP module_sp(GetModule());
2171 if (!module_sp)
2172 return;
2173
2174 const FileSpec &file = m_file ? m_file : module_sp->GetFileSpec();
2175 const char *file_name = file.GetFilename().AsCString("<Unknown>");
2176 LLDB_SCOPED_TIMERF("ObjectFileMachO::ParseSymtab () module = %s", file_name);
2177 Progress progress(llvm::formatv("Parsing symbol table for {0}", file_name));
2178
2179 llvm::MachO::symtab_command symtab_load_command = {0, 0, 0, 0, 0, 0};
2180 llvm::MachO::linkedit_data_command function_starts_load_command = {0, 0, 0, 0};
2181 llvm::MachO::linkedit_data_command exports_trie_load_command = {0, 0, 0, 0};
2182 llvm::MachO::dyld_info_command dyld_info = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
2183 llvm::MachO::dysymtab_command dysymtab = m_dysymtab;
2184 // The data element of type bool indicates that this entry is thumb
2185 // code.
2186 typedef AddressDataArray<lldb::addr_t, bool, 100> FunctionStarts;
2187
2188 // Record the address of every function/data that we add to the symtab.
2189 // We add symbols to the table in the order of most information (nlist
2190 // records) to least (function starts), and avoid duplicating symbols
2191 // via this set.
2192 llvm::DenseSet<addr_t> symbols_added;
2193
2194 // We are using a llvm::DenseSet for "symbols_added" so we must be sure we
2195 // do not add the tombstone or empty keys to the set.
2196 auto add_symbol_addr = [&symbols_added](lldb::addr_t file_addr) {
2197 // Don't add the tombstone or empty keys.
2198 if (file_addr == UINT64_MAX || file_addr == UINT64_MAX - 1)
2199 return;
2200 symbols_added.insert(file_addr);
2201 };
2202 FunctionStarts function_starts;
2204 uint32_t i;
2205 FileSpecList dylib_files;
2206 Log *log = GetLog(LLDBLog::Symbols);
2207 llvm::StringRef g_objc_v2_prefix_class("_OBJC_CLASS_$_");
2208 llvm::StringRef g_objc_v2_prefix_metaclass("_OBJC_METACLASS_$_");
2209 llvm::StringRef g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
2210 UUID image_uuid;
2211
2212 for (i = 0; i < m_header.ncmds; ++i) {
2213 const lldb::offset_t cmd_offset = offset;
2214 // Read in the load command and load command size
2215 llvm::MachO::load_command lc;
2216 if (m_data.GetU32(&offset, &lc, 2) == nullptr)
2217 break;
2218 // Watch for the symbol table load command
2219 switch (lc.cmd) {
2220 case LC_SYMTAB:
2221 symtab_load_command.cmd = lc.cmd;
2222 symtab_load_command.cmdsize = lc.cmdsize;
2223 // Read in the rest of the symtab load command
2224 if (m_data.GetU32(&offset, &symtab_load_command.symoff, 4) ==
2225 nullptr) // fill in symoff, nsyms, stroff, strsize fields
2226 return;
2227 break;
2228
2229 case LC_DYLD_INFO:
2230 case LC_DYLD_INFO_ONLY:
2231 if (m_data.GetU32(&offset, &dyld_info.rebase_off, 10)) {
2232 dyld_info.cmd = lc.cmd;
2233 dyld_info.cmdsize = lc.cmdsize;
2234 } else {
2235 memset(&dyld_info, 0, sizeof(dyld_info));
2236 }
2237 break;
2238
2239 case LC_LOAD_DYLIB:
2240 case LC_LOAD_WEAK_DYLIB:
2241 case LC_REEXPORT_DYLIB:
2242 case LC_LOADFVMLIB:
2243 case LC_LOAD_UPWARD_DYLIB: {
2244 uint32_t name_offset = cmd_offset + m_data.GetU32(&offset);
2245 const char *path = m_data.PeekCStr(name_offset);
2246 if (path) {
2247 FileSpec file_spec(path);
2248 // Strip the path if there is @rpath, @executable, etc so we just use
2249 // the basename
2250 if (path[0] == '@')
2251 file_spec.ClearDirectory();
2252
2253 if (lc.cmd == LC_REEXPORT_DYLIB) {
2255 }
2256
2257 dylib_files.Append(file_spec);
2258 }
2259 } break;
2260
2261 case LC_DYLD_EXPORTS_TRIE:
2262 exports_trie_load_command.cmd = lc.cmd;
2263 exports_trie_load_command.cmdsize = lc.cmdsize;
2264 if (m_data.GetU32(&offset, &exports_trie_load_command.dataoff, 2) ==
2265 nullptr) // fill in offset and size fields
2266 memset(&exports_trie_load_command, 0,
2267 sizeof(exports_trie_load_command));
2268 break;
2269 case LC_FUNCTION_STARTS:
2270 function_starts_load_command.cmd = lc.cmd;
2271 function_starts_load_command.cmdsize = lc.cmdsize;
2272 if (m_data.GetU32(&offset, &function_starts_load_command.dataoff, 2) ==
2273 nullptr) // fill in data offset and size fields
2274 memset(&function_starts_load_command, 0,
2275 sizeof(function_starts_load_command));
2276 break;
2277
2278 case LC_UUID: {
2279 const uint8_t *uuid_bytes = m_data.PeekData(offset, 16);
2280
2281 if (uuid_bytes)
2282 image_uuid = UUID(uuid_bytes, 16);
2283 break;
2284 }
2285
2286 default:
2287 break;
2288 }
2289 offset = cmd_offset + lc.cmdsize;
2290 }
2291
2292 if (!symtab_load_command.cmd)
2293 return;
2294
2295 SectionList *section_list = GetSectionList();
2296 if (section_list == nullptr)
2297 return;
2298
2299 const uint32_t addr_byte_size = m_data.GetAddressByteSize();
2300 const ByteOrder byte_order = m_data.GetByteOrder();
2301 bool bit_width_32 = addr_byte_size == 4;
2302 const size_t nlist_byte_size =
2303 bit_width_32 ? sizeof(struct nlist) : sizeof(struct nlist_64);
2304
2305 DataExtractor nlist_data(nullptr, 0, byte_order, addr_byte_size);
2306 DataExtractor strtab_data(nullptr, 0, byte_order, addr_byte_size);
2307 DataExtractor function_starts_data(nullptr, 0, byte_order, addr_byte_size);
2308 DataExtractor indirect_symbol_index_data(nullptr, 0, byte_order,
2309 addr_byte_size);
2310 DataExtractor dyld_trie_data(nullptr, 0, byte_order, addr_byte_size);
2311
2312 const addr_t nlist_data_byte_size =
2313 symtab_load_command.nsyms * nlist_byte_size;
2314 const addr_t strtab_data_byte_size = symtab_load_command.strsize;
2315 addr_t strtab_addr = LLDB_INVALID_ADDRESS;
2316
2317 ProcessSP process_sp(m_process_wp.lock());
2318 Process *process = process_sp.get();
2319
2320 uint32_t memory_module_load_level = eMemoryModuleLoadLevelComplete;
2321 bool is_shared_cache_image = IsSharedCacheBinary();
2322 bool is_local_shared_cache_image = is_shared_cache_image && !IsInMemory();
2323 SectionSP linkedit_section_sp(
2324 section_list->FindSectionByName(GetSegmentNameLINKEDIT()));
2325
2326 if (process && m_header.filetype != llvm::MachO::MH_OBJECT &&
2327 !is_local_shared_cache_image) {
2328 Target &target = process->GetTarget();
2329
2330 memory_module_load_level = target.GetMemoryModuleLoadLevel();
2331
2332 // Reading mach file from memory in a process or core file...
2333
2334 if (linkedit_section_sp) {
2335 addr_t linkedit_load_addr =
2336 linkedit_section_sp->GetLoadBaseAddress(&target);
2337 if (linkedit_load_addr == LLDB_INVALID_ADDRESS) {
2338 // We might be trying to access the symbol table before the
2339 // __LINKEDIT's load address has been set in the target. We can't
2340 // fail to read the symbol table, so calculate the right address
2341 // manually
2342 linkedit_load_addr = CalculateSectionLoadAddressForMemoryImage(
2343 m_memory_addr, GetMachHeaderSection(), linkedit_section_sp.get());
2344 }
2345
2346 const addr_t linkedit_file_offset = linkedit_section_sp->GetFileOffset();
2347 const addr_t symoff_addr = linkedit_load_addr +
2348 symtab_load_command.symoff -
2349 linkedit_file_offset;
2350 strtab_addr = linkedit_load_addr + symtab_load_command.stroff -
2351 linkedit_file_offset;
2352
2353 // Always load dyld - the dynamic linker - from memory if we didn't
2354 // find a binary anywhere else. lldb will not register
2355 // dylib/framework/bundle loads/unloads if we don't have the dyld
2356 // symbols, we force dyld to load from memory despite the user's
2357 // target.memory-module-load-level setting.
2358 if (memory_module_load_level == eMemoryModuleLoadLevelComplete ||
2359 m_header.filetype == llvm::MachO::MH_DYLINKER) {
2360 DataBufferSP nlist_data_sp(
2361 ReadMemory(process_sp, symoff_addr, nlist_data_byte_size));
2362 if (nlist_data_sp)
2363 nlist_data.SetData(nlist_data_sp, 0, nlist_data_sp->GetByteSize());
2364 if (dysymtab.nindirectsyms != 0) {
2365 const addr_t indirect_syms_addr = linkedit_load_addr +
2366 dysymtab.indirectsymoff -
2367 linkedit_file_offset;
2368 DataBufferSP indirect_syms_data_sp(ReadMemory(
2369 process_sp, indirect_syms_addr, dysymtab.nindirectsyms * 4));
2370 if (indirect_syms_data_sp)
2371 indirect_symbol_index_data.SetData(
2372 indirect_syms_data_sp, 0,
2373 indirect_syms_data_sp->GetByteSize());
2374 // If this binary is outside the shared cache,
2375 // cache the string table.
2376 // Binaries in the shared cache all share a giant string table,
2377 // and we can't share the string tables across multiple
2378 // ObjectFileMachO's, so we'd end up re-reading this mega-strtab
2379 // for every binary in the shared cache - it would be a big perf
2380 // problem. For binaries outside the shared cache, it's faster to
2381 // read the entire strtab at once instead of piece-by-piece as we
2382 // process the nlist records.
2383 if (!is_shared_cache_image) {
2384 DataBufferSP strtab_data_sp(
2385 ReadMemory(process_sp, strtab_addr, strtab_data_byte_size));
2386 if (strtab_data_sp) {
2387 strtab_data.SetData(strtab_data_sp, 0,
2388 strtab_data_sp->GetByteSize());
2389 }
2390 }
2391 }
2392 if (memory_module_load_level >= eMemoryModuleLoadLevelPartial) {
2393 if (function_starts_load_command.cmd) {
2394 const addr_t func_start_addr =
2395 linkedit_load_addr + function_starts_load_command.dataoff -
2396 linkedit_file_offset;
2397 DataBufferSP func_start_data_sp(
2398 ReadMemory(process_sp, func_start_addr,
2399 function_starts_load_command.datasize));
2400 if (func_start_data_sp)
2401 function_starts_data.SetData(func_start_data_sp, 0,
2402 func_start_data_sp->GetByteSize());
2403 }
2404 }
2405 }
2406 }
2407 } else {
2408 if (is_local_shared_cache_image) {
2409 // The load commands in shared cache images are relative to the
2410 // beginning of the shared cache, not the library image. The
2411 // data we get handed when creating the ObjectFileMachO starts
2412 // at the beginning of a specific library and spans to the end
2413 // of the cache to be able to reach the shared LINKEDIT
2414 // segments. We need to convert the load command offsets to be
2415 // relative to the beginning of our specific image.
2416 lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset();
2417 lldb::offset_t linkedit_slide =
2418 linkedit_offset - m_linkedit_original_offset;
2419 symtab_load_command.symoff += linkedit_slide;
2420 symtab_load_command.stroff += linkedit_slide;
2421 dyld_info.export_off += linkedit_slide;
2422 dysymtab.indirectsymoff += linkedit_slide;
2423 function_starts_load_command.dataoff += linkedit_slide;
2424 exports_trie_load_command.dataoff += linkedit_slide;
2425 }
2426
2427 nlist_data.SetData(m_data, symtab_load_command.symoff,
2428 nlist_data_byte_size);
2429 strtab_data.SetData(m_data, symtab_load_command.stroff,
2430 strtab_data_byte_size);
2431
2432 // We shouldn't have exports data from both the LC_DYLD_INFO command
2433 // AND the LC_DYLD_EXPORTS_TRIE command in the same binary:
2434 lldbassert(!((dyld_info.export_size > 0)
2435 && (exports_trie_load_command.datasize > 0)));
2436 if (dyld_info.export_size > 0) {
2437 dyld_trie_data.SetData(m_data, dyld_info.export_off,
2438 dyld_info.export_size);
2439 } else if (exports_trie_load_command.datasize > 0) {
2440 dyld_trie_data.SetData(m_data, exports_trie_load_command.dataoff,
2441 exports_trie_load_command.datasize);
2442 }
2443
2444 if (dysymtab.nindirectsyms != 0) {
2445 indirect_symbol_index_data.SetData(m_data, dysymtab.indirectsymoff,
2446 dysymtab.nindirectsyms * 4);
2447 }
2448 if (function_starts_load_command.cmd) {
2449 function_starts_data.SetData(m_data, function_starts_load_command.dataoff,
2450 function_starts_load_command.datasize);
2451 }
2452 }
2453
2454 const bool have_strtab_data = strtab_data.GetByteSize() > 0;
2455
2456 ConstString g_segment_name_TEXT = GetSegmentNameTEXT();
2457 ConstString g_segment_name_DATA = GetSegmentNameDATA();
2458 ConstString g_segment_name_DATA_DIRTY = GetSegmentNameDATA_DIRTY();
2459 ConstString g_segment_name_DATA_CONST = GetSegmentNameDATA_CONST();
2460 ConstString g_segment_name_OBJC = GetSegmentNameOBJC();
2461 ConstString g_section_name_eh_frame = GetSectionNameEHFrame();
2462 SectionSP text_section_sp(
2463 section_list->FindSectionByName(g_segment_name_TEXT));
2464 SectionSP data_section_sp(
2465 section_list->FindSectionByName(g_segment_name_DATA));
2466 SectionSP data_dirty_section_sp(
2467 section_list->FindSectionByName(g_segment_name_DATA_DIRTY));
2468 SectionSP data_const_section_sp(
2469 section_list->FindSectionByName(g_segment_name_DATA_CONST));
2470 SectionSP objc_section_sp(
2471 section_list->FindSectionByName(g_segment_name_OBJC));
2472 SectionSP eh_frame_section_sp;
2473 if (text_section_sp.get())
2474 eh_frame_section_sp = text_section_sp->GetChildren().FindSectionByName(
2475 g_section_name_eh_frame);
2476 else
2477 eh_frame_section_sp =
2478 section_list->FindSectionByName(g_section_name_eh_frame);
2479
2480 const bool is_arm = (m_header.cputype == llvm::MachO::CPU_TYPE_ARM);
2481 const bool always_thumb = GetArchitecture().IsAlwaysThumbInstructions();
2482
2483 // lldb works best if it knows the start address of all functions in a
2484 // module. Linker symbols or debug info are normally the best source of
2485 // information for start addr / size but they may be stripped in a released
2486 // binary. Two additional sources of information exist in Mach-O binaries:
2487 // LC_FUNCTION_STARTS - a list of ULEB128 encoded offsets of each
2488 // function's start address in the
2489 // binary, relative to the text section.
2490 // eh_frame - the eh_frame FDEs have the start addr & size of
2491 // each function
2492 // LC_FUNCTION_STARTS is the fastest source to read in, and is present on
2493 // all modern binaries.
2494 // Binaries built to run on older releases may need to use eh_frame
2495 // information.
2496
2497 if (text_section_sp && function_starts_data.GetByteSize()) {
2498 FunctionStarts::Entry function_start_entry;
2499 function_start_entry.data = false;
2500 lldb::offset_t function_start_offset = 0;
2501 function_start_entry.addr = text_section_sp->GetFileAddress();
2502 uint64_t delta;
2503 while ((delta = function_starts_data.GetULEB128(&function_start_offset)) >
2504 0) {
2505 // Now append the current entry
2506 function_start_entry.addr += delta;
2507 if (is_arm) {
2508 if (function_start_entry.addr & 1) {
2509 function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2510 function_start_entry.data = true;
2511 } else if (always_thumb) {
2512 function_start_entry.data = true;
2513 }
2514 }
2515 function_starts.Append(function_start_entry);
2516 }
2517 } else {
2518 // If m_type is eTypeDebugInfo, then this is a dSYM - it will have the
2519 // load command claiming an eh_frame but it doesn't actually have the
2520 // eh_frame content. And if we have a dSYM, we don't need to do any of
2521 // this fill-in-the-missing-symbols works anyway - the debug info should
2522 // give us all the functions in the module.
2523 if (text_section_sp.get() && eh_frame_section_sp.get() &&
2525 DWARFCallFrameInfo eh_frame(*this, eh_frame_section_sp,
2528 eh_frame.GetFunctionAddressAndSizeVector(functions);
2529 addr_t text_base_addr = text_section_sp->GetFileAddress();
2530 size_t count = functions.GetSize();
2531 for (size_t i = 0; i < count; ++i) {
2533 functions.GetEntryAtIndex(i);
2534 if (func) {
2535 FunctionStarts::Entry function_start_entry;
2536 function_start_entry.addr = func->base - text_base_addr;
2537 if (is_arm) {
2538 if (function_start_entry.addr & 1) {
2539 function_start_entry.addr &= THUMB_ADDRESS_BIT_MASK;
2540 function_start_entry.data = true;
2541 } else if (always_thumb) {
2542 function_start_entry.data = true;
2543 }
2544 }
2545 function_starts.Append(function_start_entry);
2546 }
2547 }
2548 }
2549 }
2550
2551 const size_t function_starts_count = function_starts.GetSize();
2552
2553 // For user process binaries (executables, dylibs, frameworks, bundles), if
2554 // we don't have LC_FUNCTION_STARTS/eh_frame section in this binary, we're
2555 // going to assume the binary has been stripped. Don't allow assembly
2556 // language instruction emulation because we don't know proper function
2557 // start boundaries.
2558 //
2559 // For all other types of binaries (kernels, stand-alone bare board
2560 // binaries, kexts), they may not have LC_FUNCTION_STARTS / eh_frame
2561 // sections - we should not make any assumptions about them based on that.
2562 if (function_starts_count == 0 && CalculateStrata() == eStrataUser) {
2564 Log *unwind_or_symbol_log(GetLog(LLDBLog::Symbols | LLDBLog::Unwind));
2565
2566 if (unwind_or_symbol_log)
2567 module_sp->LogMessage(
2568 unwind_or_symbol_log,
2569 "no LC_FUNCTION_STARTS, will not allow assembly profiled unwinds");
2570 }
2571
2572 const user_id_t TEXT_eh_frame_sectID = eh_frame_section_sp.get()
2573 ? eh_frame_section_sp->GetID()
2574 : static_cast<user_id_t>(NO_SECT);
2575
2576 uint32_t N_SO_index = UINT32_MAX;
2577
2578 MachSymtabSectionInfo section_info(section_list);
2579 std::vector<uint32_t> N_FUN_indexes;
2580 std::vector<uint32_t> N_NSYM_indexes;
2581 std::vector<uint32_t> N_INCL_indexes;
2582 std::vector<uint32_t> N_BRAC_indexes;
2583 std::vector<uint32_t> N_COMM_indexes;
2584 typedef std::multimap<uint64_t, uint32_t> ValueToSymbolIndexMap;
2585 typedef llvm::DenseMap<uint32_t, uint32_t> NListIndexToSymbolIndexMap;
2586 typedef llvm::DenseMap<const char *, uint32_t> ConstNameToSymbolIndexMap;
2587 ValueToSymbolIndexMap N_FUN_addr_to_sym_idx;
2588 ValueToSymbolIndexMap N_STSYM_addr_to_sym_idx;
2589 ConstNameToSymbolIndexMap N_GSYM_name_to_sym_idx;
2590 // Any symbols that get merged into another will get an entry in this map
2591 // so we know
2592 NListIndexToSymbolIndexMap m_nlist_idx_to_sym_idx;
2593 uint32_t nlist_idx = 0;
2594 Symbol *symbol_ptr = nullptr;
2595
2596 uint32_t sym_idx = 0;
2597 Symbol *sym = nullptr;
2598 size_t num_syms = 0;
2599 std::string memory_symbol_name;
2600 uint32_t unmapped_local_symbols_found = 0;
2601
2602 std::vector<TrieEntryWithOffset> reexport_trie_entries;
2603 std::vector<TrieEntryWithOffset> external_sym_trie_entries;
2604 std::set<lldb::addr_t> resolver_addresses;
2605
2606 if (dyld_trie_data.GetByteSize() > 0) {
2607 SectionSP text_segment_sp =
2609 lldb::addr_t text_segment_file_addr = LLDB_INVALID_ADDRESS;
2610 if (text_segment_sp)
2611 text_segment_file_addr = text_segment_sp->GetFileAddress();
2612 std::vector<llvm::StringRef> nameSlices;
2613 ParseTrieEntries(dyld_trie_data, 0, is_arm, text_segment_file_addr,
2614 nameSlices, resolver_addresses, reexport_trie_entries,
2615 external_sym_trie_entries);
2616 }
2617
2618 typedef std::set<ConstString> IndirectSymbols;
2619 IndirectSymbols indirect_symbol_names;
2620
2621#if TARGET_OS_IPHONE
2622
2623 // Some recent builds of the dyld_shared_cache (hereafter: DSC) have been
2624 // optimized by moving LOCAL symbols out of the memory mapped portion of
2625 // the DSC. The symbol information has all been retained, but it isn't
2626 // available in the normal nlist data. However, there *are* duplicate
2627 // entries of *some*
2628 // LOCAL symbols in the normal nlist data. To handle this situation
2629 // correctly, we must first attempt
2630 // to parse any DSC unmapped symbol information. If we find any, we set a
2631 // flag that tells the normal nlist parser to ignore all LOCAL symbols.
2632
2633 if (IsSharedCacheBinary()) {
2634 // Before we can start mapping the DSC, we need to make certain the
2635 // target process is actually using the cache we can find.
2636
2637 // Next we need to determine the correct path for the dyld shared cache.
2638
2639 ArchSpec header_arch = GetArchitecture();
2640
2641 UUID dsc_uuid;
2642 UUID process_shared_cache_uuid;
2643 addr_t process_shared_cache_base_addr;
2644
2645 if (process) {
2646 GetProcessSharedCacheUUID(process, process_shared_cache_base_addr,
2647 process_shared_cache_uuid);
2648 }
2649
2650 __block bool found_image = false;
2651 __block void *nlist_buffer = nullptr;
2652 __block unsigned nlist_count = 0;
2653 __block char *string_table = nullptr;
2654 __block vm_offset_t vm_nlist_memory = 0;
2655 __block mach_msg_type_number_t vm_nlist_bytes_read = 0;
2656 __block vm_offset_t vm_string_memory = 0;
2657 __block mach_msg_type_number_t vm_string_bytes_read = 0;
2658
2659 auto _ = llvm::make_scope_exit(^{
2660 if (vm_nlist_memory)
2661 vm_deallocate(mach_task_self(), vm_nlist_memory, vm_nlist_bytes_read);
2662 if (vm_string_memory)
2663 vm_deallocate(mach_task_self(), vm_string_memory, vm_string_bytes_read);
2664 });
2665
2666 typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
2667 typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
2668 UndefinedNameToDescMap undefined_name_to_desc;
2669 SymbolIndexToName reexport_shlib_needs_fixup;
2670
2671 dyld_for_each_installed_shared_cache(^(dyld_shared_cache_t shared_cache) {
2672 uuid_t cache_uuid;
2673 dyld_shared_cache_copy_uuid(shared_cache, &cache_uuid);
2674 if (found_image)
2675 return;
2676
2677 if (process_shared_cache_uuid.IsValid() &&
2678 process_shared_cache_uuid != UUID::fromData(&cache_uuid, 16))
2679 return;
2680
2681 dyld_shared_cache_for_each_image(shared_cache, ^(dyld_image_t image) {
2682 uuid_t dsc_image_uuid;
2683 if (found_image)
2684 return;
2685
2686 dyld_image_copy_uuid(image, &dsc_image_uuid);
2687 if (image_uuid != UUID::fromData(dsc_image_uuid, 16))
2688 return;
2689
2690 found_image = true;
2691
2692 // Compute the size of the string table. We need to ask dyld for a
2693 // new SPI to avoid this step.
2694 dyld_image_local_nlist_content_4Symbolication(
2695 image, ^(const void *nlistStart, uint64_t nlistCount,
2696 const char *stringTable) {
2697 if (!nlistStart || !nlistCount)
2698 return;
2699
2700 // The buffers passed here are valid only inside the block.
2701 // Use vm_read to make a cheap copy of them available for our
2702 // processing later.
2703 kern_return_t ret =
2704 vm_read(mach_task_self(), (vm_address_t)nlistStart,
2705 nlist_byte_size * nlistCount, &vm_nlist_memory,
2706 &vm_nlist_bytes_read);
2707 if (ret != KERN_SUCCESS)
2708 return;
2709 assert(vm_nlist_bytes_read == nlist_byte_size * nlistCount);
2710
2711 // We don't know the size of the string table. It's cheaper
2712 // to map the whol VM region than to determine the size by
2713 // parsing all teh nlist entries.
2714 vm_address_t string_address = (vm_address_t)stringTable;
2715 vm_size_t region_size;
2716 mach_msg_type_number_t info_count = VM_REGION_BASIC_INFO_COUNT_64;
2717 vm_region_basic_info_data_t info;
2718 memory_object_name_t object;
2719 ret = vm_region_64(mach_task_self(), &string_address,
2720 &region_size, VM_REGION_BASIC_INFO_64,
2721 (vm_region_info_t)&info, &info_count, &object);
2722 if (ret != KERN_SUCCESS)
2723 return;
2724
2725 ret = vm_read(mach_task_self(), (vm_address_t)stringTable,
2726 region_size -
2727 ((vm_address_t)stringTable - string_address),
2728 &vm_string_memory, &vm_string_bytes_read);
2729 if (ret != KERN_SUCCESS)
2730 return;
2731
2732 nlist_buffer = (void *)vm_nlist_memory;
2733 string_table = (char *)vm_string_memory;
2734 nlist_count = nlistCount;
2735 });
2736 });
2737 });
2738 if (nlist_buffer) {
2739 DataExtractor dsc_local_symbols_data(nlist_buffer,
2740 nlist_count * nlist_byte_size,
2741 byte_order, addr_byte_size);
2742 unmapped_local_symbols_found = nlist_count;
2743
2744 // The normal nlist code cannot correctly size the Symbols
2745 // array, we need to allocate it here.
2746 sym = symtab.Resize(
2747 symtab_load_command.nsyms + m_dysymtab.nindirectsyms +
2748 unmapped_local_symbols_found - m_dysymtab.nlocalsym);
2749 num_syms = symtab.GetNumSymbols();
2750
2751 lldb::offset_t nlist_data_offset = 0;
2752
2753 for (uint32_t nlist_index = 0;
2754 nlist_index < nlist_count;
2755 nlist_index++) {
2756 /////////////////////////////
2757 {
2758 std::optional<struct nlist_64> nlist_maybe =
2759 ParseNList(dsc_local_symbols_data, nlist_data_offset,
2760 nlist_byte_size);
2761 if (!nlist_maybe)
2762 break;
2763 struct nlist_64 nlist = *nlist_maybe;
2764
2766 const char *symbol_name = string_table + nlist.n_strx;
2767
2768 if (symbol_name == NULL) {
2769 // No symbol should be NULL, even the symbols with no
2770 // string values should have an offset zero which
2771 // points to an empty C-string
2772 Debugger::ReportError(llvm::formatv(
2773 "DSC unmapped local symbol[{0}] has invalid "
2774 "string table offset {1:x} in {2}, ignoring symbol",
2775 nlist_index, nlist.n_strx,
2776 module_sp->GetFileSpec().GetPath());
2777 continue;
2778 }
2779 if (symbol_name[0] == '\0')
2780 symbol_name = NULL;
2781
2782 const char *symbol_name_non_abi_mangled = NULL;
2783
2784 SectionSP symbol_section;
2785 uint32_t symbol_byte_size = 0;
2786 bool add_nlist = true;
2787 bool is_debug = ((nlist.n_type & N_STAB) != 0);
2788 bool demangled_is_synthesized = false;
2789 bool is_gsym = false;
2790 bool set_value = true;
2791
2792 assert(sym_idx < num_syms);
2793
2794 sym[sym_idx].SetDebug(is_debug);
2795
2796 if (is_debug) {
2797 switch (nlist.n_type) {
2798 case N_GSYM:
2799 // global symbol: name,,NO_SECT,type,0
2800 // Sometimes the N_GSYM value contains the address.
2801
2802 // FIXME: In the .o files, we have a GSYM and a debug
2803 // symbol for all the ObjC data. They
2804 // have the same address, but we want to ensure that
2805 // we always find only the real symbol, 'cause we
2806 // don't currently correctly attribute the
2807 // GSYM one to the ObjCClass/Ivar/MetaClass
2808 // symbol type. This is a temporary hack to make
2809 // sure the ObjectiveC symbols get treated correctly.
2810 // To do this right, we should coalesce all the GSYM
2811 // & global symbols that have the same address.
2812
2813 is_gsym = true;
2814 sym[sym_idx].SetExternal(true);
2815
2816 if (symbol_name && symbol_name[0] == '_' &&
2817 symbol_name[1] == 'O') {
2818 llvm::StringRef symbol_name_ref(symbol_name);
2819 if (symbol_name_ref.startswith(
2820 g_objc_v2_prefix_class)) {
2821 symbol_name_non_abi_mangled = symbol_name + 1;
2822 symbol_name =
2823 symbol_name + g_objc_v2_prefix_class.size();
2824 type = eSymbolTypeObjCClass;
2825 demangled_is_synthesized = true;
2826
2827 } else if (symbol_name_ref.startswith(
2828 g_objc_v2_prefix_metaclass)) {
2829 symbol_name_non_abi_mangled = symbol_name + 1;
2830 symbol_name =
2831 symbol_name + g_objc_v2_prefix_metaclass.size();
2833 demangled_is_synthesized = true;
2834 } else if (symbol_name_ref.startswith(
2835 g_objc_v2_prefix_ivar)) {
2836 symbol_name_non_abi_mangled = symbol_name + 1;
2837 symbol_name =
2838 symbol_name + g_objc_v2_prefix_ivar.size();
2839 type = eSymbolTypeObjCIVar;
2840 demangled_is_synthesized = true;
2841 }
2842 } else {
2843 if (nlist.n_value != 0)
2844 symbol_section = section_info.GetSection(
2845 nlist.n_sect, nlist.n_value);
2846 type = eSymbolTypeData;
2847 }
2848 break;
2849
2850 case N_FNAME:
2851 // procedure name (f77 kludge): name,,NO_SECT,0,0
2852 type = eSymbolTypeCompiler;
2853 break;
2854
2855 case N_FUN:
2856 // procedure: name,,n_sect,linenumber,address
2857 if (symbol_name) {
2858 type = eSymbolTypeCode;
2859 symbol_section = section_info.GetSection(
2860 nlist.n_sect, nlist.n_value);
2861
2862 N_FUN_addr_to_sym_idx.insert(
2863 std::make_pair(nlist.n_value, sym_idx));
2864 // We use the current number of symbols in the
2865 // symbol table in lieu of using nlist_idx in case
2866 // we ever start trimming entries out
2867 N_FUN_indexes.push_back(sym_idx);
2868 } else {
2869 type = eSymbolTypeCompiler;
2870
2871 if (!N_FUN_indexes.empty()) {
2872 // Copy the size of the function into the
2873 // original
2874 // STAB entry so we don't have
2875 // to hunt for it later
2876 symtab.SymbolAtIndex(N_FUN_indexes.back())
2877 ->SetByteSize(nlist.n_value);
2878 N_FUN_indexes.pop_back();
2879 // We don't really need the end function STAB as
2880 // it contains the size which we already placed
2881 // with the original symbol, so don't add it if
2882 // we want a minimal symbol table
2883 add_nlist = false;
2884 }
2885 }
2886 break;
2887
2888 case N_STSYM:
2889 // static symbol: name,,n_sect,type,address
2890 N_STSYM_addr_to_sym_idx.insert(
2891 std::make_pair(nlist.n_value, sym_idx));
2892 symbol_section = section_info.GetSection(nlist.n_sect,
2893 nlist.n_value);
2894 if (symbol_name && symbol_name[0]) {
2896 symbol_name + 1, eSymbolTypeData);
2897 }
2898 break;
2899
2900 case N_LCSYM:
2901 // .lcomm symbol: name,,n_sect,type,address
2902 symbol_section = section_info.GetSection(nlist.n_sect,
2903 nlist.n_value);
2905 break;
2906
2907 case N_BNSYM:
2908 // We use the current number of symbols in the symbol
2909 // table in lieu of using nlist_idx in case we ever
2910 // start trimming entries out Skip these if we want
2911 // minimal symbol tables
2912 add_nlist = false;
2913 break;
2914
2915 case N_ENSYM:
2916 // Set the size of the N_BNSYM to the terminating
2917 // index of this N_ENSYM so that we can always skip
2918 // the entire symbol if we need to navigate more
2919 // quickly at the source level when parsing STABS
2920 // Skip these if we want minimal symbol tables
2921 add_nlist = false;
2922 break;
2923
2924 case N_OPT:
2925 // emitted with gcc2_compiled and in gcc source
2926 type = eSymbolTypeCompiler;
2927 break;
2928
2929 case N_RSYM:
2930 // register sym: name,,NO_SECT,type,register
2931 type = eSymbolTypeVariable;
2932 break;
2933
2934 case N_SLINE:
2935 // src line: 0,,n_sect,linenumber,address
2936 symbol_section = section_info.GetSection(nlist.n_sect,
2937 nlist.n_value);
2938 type = eSymbolTypeLineEntry;
2939 break;
2940
2941 case N_SSYM:
2942 // structure elt: name,,NO_SECT,type,struct_offset
2944 break;
2945
2946 case N_SO:
2947 // source file name
2948 type = eSymbolTypeSourceFile;
2949 if (symbol_name == NULL) {
2950 add_nlist = false;
2951 if (N_SO_index != UINT32_MAX) {
2952 // Set the size of the N_SO to the terminating
2953 // index of this N_SO so that we can always skip
2954 // the entire N_SO if we need to navigate more
2955 // quickly at the source level when parsing STABS
2956 symbol_ptr = symtab.SymbolAtIndex(N_SO_index);
2957 symbol_ptr->SetByteSize(sym_idx);
2958 symbol_ptr->SetSizeIsSibling(true);
2959 }
2960 N_NSYM_indexes.clear();
2961 N_INCL_indexes.clear();
2962 N_BRAC_indexes.clear();
2963 N_COMM_indexes.clear();
2964 N_FUN_indexes.clear();
2965 N_SO_index = UINT32_MAX;
2966 } else {
2967 // We use the current number of symbols in the
2968 // symbol table in lieu of using nlist_idx in case
2969 // we ever start trimming entries out
2970 const bool N_SO_has_full_path = symbol_name[0] == '/';
2971 if (N_SO_has_full_path) {
2972 if ((N_SO_index == sym_idx - 1) &&
2973 ((sym_idx - 1) < num_syms)) {
2974 // We have two consecutive N_SO entries where
2975 // the first contains a directory and the
2976 // second contains a full path.
2977 sym[sym_idx - 1].GetMangled().SetValue(
2978 ConstString(symbol_name));
2979 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
2980 add_nlist = false;
2981 } else {
2982 // This is the first entry in a N_SO that
2983 // contains a directory or
2984 // a full path to the source file
2985 N_SO_index = sym_idx;
2986 }
2987 } else if ((N_SO_index == sym_idx - 1) &&
2988 ((sym_idx - 1) < num_syms)) {
2989 // This is usually the second N_SO entry that
2990 // contains just the filename, so here we combine
2991 // it with the first one if we are minimizing the
2992 // symbol table
2993 const char *so_path = sym[sym_idx - 1]
2994 .GetMangled()
2995 .GetDemangledName()
2996 .AsCString();
2997 if (so_path && so_path[0]) {
2998 std::string full_so_path(so_path);
2999 const size_t double_slash_pos =
3000 full_so_path.find("//");
3001 if (double_slash_pos != std::string::npos) {
3002 // The linker has been generating bad N_SO
3003 // entries with doubled up paths
3004 // in the format "%s%s" where the first
3005 // string in the DW_AT_comp_dir, and the
3006 // second is the directory for the source
3007 // file so you end up with a path that looks
3008 // like "/tmp/src//tmp/src/"
3009 FileSpec so_dir(so_path);
3010 if (!FileSystem::Instance().Exists(so_dir)) {
3011 so_dir.SetFile(
3012 &full_so_path[double_slash_pos + 1],
3013 FileSpec::Style::native);
3014 if (FileSystem::Instance().Exists(so_dir)) {
3015 // Trim off the incorrect path
3016 full_so_path.erase(0, double_slash_pos + 1);
3017 }
3018 }
3019 }
3020 if (*full_so_path.rbegin() != '/')
3021 full_so_path += '/';
3022 full_so_path += symbol_name;
3023 sym[sym_idx - 1].GetMangled().SetValue(
3024 ConstString(full_so_path.c_str()));
3025 add_nlist = false;
3026 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3027 }
3028 } else {
3029 // This could be a relative path to a N_SO
3030 N_SO_index = sym_idx;
3031 }
3032 }
3033 break;
3034
3035 case N_OSO:
3036 // object file name: name,,0,0,st_mtime
3037 type = eSymbolTypeObjectFile;
3038 break;
3039
3040 case N_LSYM:
3041 // local sym: name,,NO_SECT,type,offset
3042 type = eSymbolTypeLocal;
3043 break;
3044
3045 // INCL scopes
3046 case N_BINCL:
3047 // include file beginning: name,,NO_SECT,0,sum We use
3048 // the current number of symbols in the symbol table
3049 // in lieu of using nlist_idx in case we ever start
3050 // trimming entries out
3051 N_INCL_indexes.push_back(sym_idx);
3052 type = eSymbolTypeScopeBegin;
3053 break;
3054
3055 case N_EINCL:
3056 // include file end: name,,NO_SECT,0,0
3057 // Set the size of the N_BINCL to the terminating
3058 // index of this N_EINCL so that we can always skip
3059 // the entire symbol if we need to navigate more
3060 // quickly at the source level when parsing STABS
3061 if (!N_INCL_indexes.empty()) {
3062 symbol_ptr =
3063 symtab.SymbolAtIndex(N_INCL_indexes.back());
3064 symbol_ptr->SetByteSize(sym_idx + 1);
3065 symbol_ptr->SetSizeIsSibling(true);
3066 N_INCL_indexes.pop_back();
3067 }
3068 type = eSymbolTypeScopeEnd;
3069 break;
3070
3071 case N_SOL:
3072 // #included file name: name,,n_sect,0,address
3073 type = eSymbolTypeHeaderFile;
3074
3075 // We currently don't use the header files on darwin
3076 add_nlist = false;
3077 break;
3078
3079 case N_PARAMS:
3080 // compiler parameters: name,,NO_SECT,0,0
3081 type = eSymbolTypeCompiler;
3082 break;
3083
3084 case N_VERSION:
3085 // compiler version: name,,NO_SECT,0,0
3086 type = eSymbolTypeCompiler;
3087 break;
3088
3089 case N_OLEVEL:
3090 // compiler -O level: name,,NO_SECT,0,0
3091 type = eSymbolTypeCompiler;
3092 break;
3093
3094 case N_PSYM:
3095 // parameter: name,,NO_SECT,type,offset
3096 type = eSymbolTypeVariable;
3097 break;
3098
3099 case N_ENTRY:
3100 // alternate entry: name,,n_sect,linenumber,address
3101 symbol_section = section_info.GetSection(nlist.n_sect,
3102 nlist.n_value);
3103 type = eSymbolTypeLineEntry;
3104 break;
3105
3106 // Left and Right Braces
3107 case N_LBRAC:
3108 // left bracket: 0,,NO_SECT,nesting level,address We
3109 // use the current number of symbols in the symbol
3110 // table in lieu of using nlist_idx in case we ever
3111 // start trimming entries out
3112 symbol_section = section_info.GetSection(nlist.n_sect,
3113 nlist.n_value);
3114 N_BRAC_indexes.push_back(sym_idx);
3115 type = eSymbolTypeScopeBegin;
3116 break;
3117
3118 case N_RBRAC:
3119 // right bracket: 0,,NO_SECT,nesting level,address
3120 // Set the size of the N_LBRAC to the terminating
3121 // index of this N_RBRAC so that we can always skip
3122 // the entire symbol if we need to navigate more
3123 // quickly at the source level when parsing STABS
3124 symbol_section = section_info.GetSection(nlist.n_sect,
3125 nlist.n_value);
3126 if (!N_BRAC_indexes.empty()) {
3127 symbol_ptr =
3128 symtab.SymbolAtIndex(N_BRAC_indexes.back());
3129 symbol_ptr->SetByteSize(sym_idx + 1);
3130 symbol_ptr->SetSizeIsSibling(true);
3131 N_BRAC_indexes.pop_back();
3132 }
3133 type = eSymbolTypeScopeEnd;
3134 break;
3135
3136 case N_EXCL:
3137 // deleted include file: name,,NO_SECT,0,sum
3138 type = eSymbolTypeHeaderFile;
3139 break;
3140
3141 // COMM scopes
3142 case N_BCOMM:
3143 // begin common: name,,NO_SECT,0,0
3144 // We use the current number of symbols in the symbol
3145 // table in lieu of using nlist_idx in case we ever
3146 // start trimming entries out
3147 type = eSymbolTypeScopeBegin;
3148 N_COMM_indexes.push_back(sym_idx);
3149 break;
3150
3151 case N_ECOML:
3152 // end common (local name): 0,,n_sect,0,address
3153 symbol_section = section_info.GetSection(nlist.n_sect,
3154 nlist.n_value);
3155 // Fall through
3156
3157 case N_ECOMM:
3158 // end common: name,,n_sect,0,0
3159 // Set the size of the N_BCOMM to the terminating
3160 // index of this N_ECOMM/N_ECOML so that we can
3161 // always skip the entire symbol if we need to
3162 // navigate more quickly at the source level when
3163 // parsing STABS
3164 if (!N_COMM_indexes.empty()) {
3165 symbol_ptr =
3166 symtab.SymbolAtIndex(N_COMM_indexes.back());
3167 symbol_ptr->SetByteSize(sym_idx + 1);
3168 symbol_ptr->SetSizeIsSibling(true);
3169 N_COMM_indexes.pop_back();
3170 }
3171 type = eSymbolTypeScopeEnd;
3172 break;
3173
3174 case N_LENG:
3175 // second stab entry with length information
3176 type = eSymbolTypeAdditional;
3177 break;
3178
3179 default:
3180 break;
3181 }
3182 } else {
3183 // uint8_t n_pext = N_PEXT & nlist.n_type;
3184 uint8_t n_type = N_TYPE & nlist.n_type;
3185 sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
3186
3187 switch (n_type) {
3188 case N_INDR: {
3189 const char *reexport_name_cstr =
3190 strtab_data.PeekCStr(nlist.n_value);
3191 if (reexport_name_cstr && reexport_name_cstr[0]) {
3192 type = eSymbolTypeReExported;
3193 ConstString reexport_name(
3194 reexport_name_cstr +
3195 ((reexport_name_cstr[0] == '_') ? 1 : 0));
3196 sym[sym_idx].SetReExportedSymbolName(reexport_name);
3197 set_value = false;
3198 reexport_shlib_needs_fixup[sym_idx] = reexport_name;
3199 indirect_symbol_names.insert(ConstString(
3200 symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
3201 } else
3202 type = eSymbolTypeUndefined;
3203 } break;
3204
3205 case N_UNDF:
3206 if (symbol_name && symbol_name[0]) {
3207 ConstString undefined_name(
3208 symbol_name + ((symbol_name[0] == '_') ? 1 : 0));
3209 undefined_name_to_desc[undefined_name] = nlist.n_desc;
3210 }
3211 // Fall through
3212 case N_PBUD:
3213 type = eSymbolTypeUndefined;
3214 break;
3215
3216 case N_ABS:
3217 type = eSymbolTypeAbsolute;
3218 break;
3219
3220 case N_SECT: {
3221 symbol_section = section_info.GetSection(nlist.n_sect,
3222 nlist.n_value);
3223
3224 if (symbol_section == NULL) {
3225 // TODO: warn about this?
3226 add_nlist = false;
3227 break;
3228 }
3229
3230 if (TEXT_eh_frame_sectID == nlist.n_sect) {
3231 type = eSymbolTypeException;
3232 } else {
3233 uint32_t section_type =
3234 symbol_section->Get() & SECTION_TYPE;
3235
3236 switch (section_type) {
3237 case S_CSTRING_LITERALS:
3238 type = eSymbolTypeData;
3239 break; // section with only literal C strings
3240 case S_4BYTE_LITERALS:
3241 type = eSymbolTypeData;
3242 break; // section with only 4 byte literals
3243 case S_8BYTE_LITERALS:
3244 type = eSymbolTypeData;
3245 break; // section with only 8 byte literals
3246 case S_LITERAL_POINTERS:
3247 type = eSymbolTypeTrampoline;
3248 break; // section with only pointers to literals
3249 case S_NON_LAZY_SYMBOL_POINTERS:
3250 type = eSymbolTypeTrampoline;
3251 break; // section with only non-lazy symbol
3252 // pointers
3253 case S_LAZY_SYMBOL_POINTERS:
3254 type = eSymbolTypeTrampoline;
3255 break; // section with only lazy symbol pointers
3256 case S_SYMBOL_STUBS:
3257 type = eSymbolTypeTrampoline;
3258 break; // section with only symbol stubs, byte
3259 // size of stub in the reserved2 field
3260 case S_MOD_INIT_FUNC_POINTERS:
3261 type = eSymbolTypeCode;
3262 break; // section with only function pointers for
3263 // initialization
3264 case S_MOD_TERM_FUNC_POINTERS:
3265 type = eSymbolTypeCode;
3266 break; // section with only function pointers for
3267 // termination
3268 case S_INTERPOSING:
3269 type = eSymbolTypeTrampoline;
3270 break; // section with only pairs of function
3271 // pointers for interposing
3272 case S_16BYTE_LITERALS:
3273 type = eSymbolTypeData;
3274 break; // section with only 16 byte literals
3275 case S_DTRACE_DOF:
3277 break;
3278 case S_LAZY_DYLIB_SYMBOL_POINTERS:
3279 type = eSymbolTypeTrampoline;
3280 break;
3281 default:
3282 switch (symbol_section->GetType()) {
3284 type = eSymbolTypeCode;
3285 break;
3286 case eSectionTypeData:
3287 case eSectionTypeDataCString: // Inlined C string
3288 // data
3289 case eSectionTypeDataCStringPointers: // Pointers
3290 // to C
3291 // string
3292 // data
3293 case eSectionTypeDataSymbolAddress: // Address of
3294 // a symbol in
3295 // the symbol
3296 // table
3297 case eSectionTypeData4:
3298 case eSectionTypeData8:
3299 case eSectionTypeData16:
3300 type = eSymbolTypeData;
3301 break;
3302 default:
3303 break;
3304 }
3305 break;
3306 }
3307
3308 if (type == eSymbolTypeInvalid) {
3309 const char *symbol_sect_name =
3310 symbol_section->GetName().AsCString();
3311 if (symbol_section->IsDescendant(
3312 text_section_sp.get())) {
3313 if (symbol_section->IsClear(
3314 S_ATTR_PURE_INSTRUCTIONS |
3315 S_ATTR_SELF_MODIFYING_CODE |
3316 S_ATTR_SOME_INSTRUCTIONS))
3317 type = eSymbolTypeData;
3318 else
3319 type = eSymbolTypeCode;
3320 } else if (symbol_section->IsDescendant(
3321 data_section_sp.get()) ||
3322 symbol_section->IsDescendant(
3323 data_dirty_section_sp.get()) ||
3324 symbol_section->IsDescendant(
3325 data_const_section_sp.get())) {
3326 if (symbol_sect_name &&
3327 ::strstr(symbol_sect_name, "__objc") ==
3328 symbol_sect_name) {
3329 type = eSymbolTypeRuntime;
3330
3331 if (symbol_name) {
3332 llvm::StringRef symbol_name_ref(symbol_name);
3333 if (symbol_name_ref.startswith("_OBJC_")) {
3334 llvm::StringRef
3335 g_objc_v2_prefix_class(
3336 "_OBJC_CLASS_$_");
3337 llvm::StringRef
3338 g_objc_v2_prefix_metaclass(
3339 "_OBJC_METACLASS_$_");
3340 llvm::StringRef
3341 g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
3342 if (symbol_name_ref.startswith(
3343 g_objc_v2_prefix_class)) {
3344 symbol_name_non_abi_mangled =
3345 symbol_name + 1;
3346 symbol_name =
3347 symbol_name +
3348 g_objc_v2_prefix_class.size();
3349 type = eSymbolTypeObjCClass;
3350 demangled_is_synthesized = true;
3351 } else if (
3352 symbol_name_ref.startswith(
3353 g_objc_v2_prefix_metaclass)) {
3354 symbol_name_non_abi_mangled =
3355 symbol_name + 1;
3356 symbol_name =
3357 symbol_name +
3358 g_objc_v2_prefix_metaclass.size();
3360 demangled_is_synthesized = true;
3361 } else if (symbol_name_ref.startswith(
3362 g_objc_v2_prefix_ivar)) {
3363 symbol_name_non_abi_mangled =
3364 symbol_name + 1;
3365 symbol_name =
3366 symbol_name +
3367 g_objc_v2_prefix_ivar.size();
3368 type = eSymbolTypeObjCIVar;
3369 demangled_is_synthesized = true;
3370 }
3371 }
3372 }
3373 } else if (symbol_sect_name &&
3374 ::strstr(symbol_sect_name,
3375 "__gcc_except_tab") ==
3376 symbol_sect_name) {
3377 type = eSymbolTypeException;
3378 } else {
3379 type = eSymbolTypeData;
3380 }
3381 } else if (symbol_sect_name &&
3382 ::strstr(symbol_sect_name, "__IMPORT") ==
3383 symbol_sect_name) {
3384 type = eSymbolTypeTrampoline;
3385 } else if (symbol_section->IsDescendant(
3386 objc_section_sp.get())) {
3387 type = eSymbolTypeRuntime;
3388 if (symbol_name && symbol_name[0] == '.') {
3389 llvm::StringRef symbol_name_ref(symbol_name);
3390 llvm::StringRef
3391 g_objc_v1_prefix_class(".objc_class_name_");
3392 if (symbol_name_ref.startswith(
3393 g_objc_v1_prefix_class)) {
3394 symbol_name_non_abi_mangled = symbol_name;
3395 symbol_name = symbol_name +
3396 g_objc_v1_prefix_class.size();
3397 type = eSymbolTypeObjCClass;
3398 demangled_is_synthesized = true;
3399 }
3400 }
3401 }
3402 }
3403 }
3404 } break;
3405 }
3406 }
3407
3408 if (add_nlist) {
3409 uint64_t symbol_value = nlist.n_value;
3410 if (symbol_name_non_abi_mangled) {
3411 sym[sym_idx].GetMangled().SetMangledName(
3412 ConstString(symbol_name_non_abi_mangled));
3413 sym[sym_idx].GetMangled().SetDemangledName(
3414 ConstString(symbol_name));
3415 } else {
3416 if (symbol_name && symbol_name[0] == '_') {
3417 symbol_name++; // Skip the leading underscore
3418 }
3419
3420 if (symbol_name) {
3421 ConstString const_symbol_name(symbol_name);
3422 sym[sym_idx].GetMangled().SetValue(const_symbol_name);
3423 if (is_gsym && is_debug) {
3424 const char *gsym_name =
3425 sym[sym_idx]
3426 .GetMangled()
3427 .GetName(Mangled::ePreferMangled)
3428 .GetCString();
3429 if (gsym_name)
3430 N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
3431 }
3432 }
3433 }
3434 if (symbol_section) {
3435 const addr_t section_file_addr =
3436 symbol_section->GetFileAddress();
3437 if (symbol_byte_size == 0 &&
3438 function_starts_count > 0) {
3439 addr_t symbol_lookup_file_addr = nlist.n_value;
3440 // Do an exact address match for non-ARM addresses,
3441 // else get the closest since the symbol might be a
3442 // thumb symbol which has an address with bit zero
3443 // set
3444 FunctionStarts::Entry *func_start_entry =
3445 function_starts.FindEntry(symbol_lookup_file_addr,
3446 !is_arm);
3447 if (is_arm && func_start_entry) {
3448 // Verify that the function start address is the
3449 // symbol address (ARM) or the symbol address + 1
3450 // (thumb)
3451 if (func_start_entry->addr !=
3452 symbol_lookup_file_addr &&
3453 func_start_entry->addr !=
3454 (symbol_lookup_file_addr + 1)) {
3455 // Not the right entry, NULL it out...
3456 func_start_entry = NULL;
3457 }
3458 }
3459 if (func_start_entry) {
3460 func_start_entry->data = true;
3461
3462 addr_t symbol_file_addr = func_start_entry->addr;
3463 uint32_t symbol_flags = 0;
3464 if (is_arm) {
3465 if (symbol_file_addr & 1)
3466 symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
3467 symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3468 }
3469
3470 const FunctionStarts::Entry *next_func_start_entry =
3471 function_starts.FindNextEntry(func_start_entry);
3472 const addr_t section_end_file_addr =
3473 section_file_addr +
3474 symbol_section->GetByteSize();
3475 if (next_func_start_entry) {
3476 addr_t next_symbol_file_addr =
3477 next_func_start_entry->addr;
3478 // Be sure the clear the Thumb address bit when
3479 // we calculate the size from the current and
3480 // next address
3481 if (is_arm)
3482 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
3483 symbol_byte_size = std::min<lldb::addr_t>(
3484 next_symbol_file_addr - symbol_file_addr,
3485 section_end_file_addr - symbol_file_addr);
3486 } else {
3487 symbol_byte_size =
3488 section_end_file_addr - symbol_file_addr;
3489 }
3490 }
3491 }
3492 symbol_value -= section_file_addr;
3493 }
3494
3495 if (is_debug == false) {
3496 if (type == eSymbolTypeCode) {
3497 // See if we can find a N_FUN entry for any code
3498 // symbols. If we do find a match, and the name
3499 // matches, then we can merge the two into just the
3500 // function symbol to avoid duplicate entries in
3501 // the symbol table
3502 auto range =
3503 N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
3504 if (range.first != range.second) {
3505 bool found_it = false;
3506 for (auto pos = range.first; pos != range.second;
3507 ++pos) {
3508 if (sym[sym_idx].GetMangled().GetName(
3509 Mangled::ePreferMangled) ==
3510 sym[pos->second].GetMangled().GetName(
3511 Mangled::ePreferMangled)) {
3512 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3513 // We just need the flags from the linker
3514 // symbol, so put these flags
3515 // into the N_FUN flags to avoid duplicate
3516 // symbols in the symbol table
3517 sym[pos->second].SetExternal(
3518 sym[sym_idx].IsExternal());
3519 sym[pos->second].SetFlags(nlist.n_type << 16 |
3520 nlist.n_desc);
3521 if (resolver_addresses.find(nlist.n_value) !=
3522 resolver_addresses.end())
3523 sym[pos->second].SetType(eSymbolTypeResolver);
3524 sym[sym_idx].Clear();
3525 found_it = true;
3526 break;
3527 }
3528 }
3529 if (found_it)
3530 continue;
3531 } else {
3532 if (resolver_addresses.find(nlist.n_value) !=
3533 resolver_addresses.end())
3534 type = eSymbolTypeResolver;
3535 }
3536 } else if (type == eSymbolTypeData ||
3537 type == eSymbolTypeObjCClass ||
3538 type == eSymbolTypeObjCMetaClass ||
3539 type == eSymbolTypeObjCIVar) {
3540 // See if we can find a N_STSYM entry for any data
3541 // symbols. If we do find a match, and the name
3542 // matches, then we can merge the two into just the
3543 // Static symbol to avoid duplicate entries in the
3544 // symbol table
3545 auto range = N_STSYM_addr_to_sym_idx.equal_range(
3546 nlist.n_value);
3547 if (range.first != range.second) {
3548 bool found_it = false;
3549 for (auto pos = range.first; pos != range.second;
3550 ++pos) {
3551 if (sym[sym_idx].GetMangled().GetName(
3552 Mangled::ePreferMangled) ==
3553 sym[pos->second].GetMangled().GetName(
3554 Mangled::ePreferMangled)) {
3555 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
3556 // We just need the flags from the linker
3557 // symbol, so put these flags
3558 // into the N_STSYM flags to avoid duplicate
3559 // symbols in the symbol table
3560 sym[pos->second].SetExternal(
3561 sym[sym_idx].IsExternal());
3562 sym[pos->second].SetFlags(nlist.n_type << 16 |
3563 nlist.n_desc);
3564 sym[sym_idx].Clear();
3565 found_it = true;
3566 break;
3567 }
3568 }
3569 if (found_it)
3570 continue;
3571 } else {
3572 const char *gsym_name =
3573 sym[sym_idx]
3574 .GetMangled()
3575 .GetName(Mangled::ePreferMangled)
3576 .GetCString();
3577 if (gsym_name) {
3578 // Combine N_GSYM stab entries with the non
3579 // stab symbol
3580 ConstNameToSymbolIndexMap::const_iterator pos =
3581 N_GSYM_name_to_sym_idx.find(gsym_name);
3582 if (pos != N_GSYM_name_to_sym_idx.end()) {
3583 const uint32_t GSYM_sym_idx = pos->second;
3584 m_nlist_idx_to_sym_idx[nlist_idx] =
3585 GSYM_sym_idx;
3586 // Copy the address, because often the N_GSYM
3587 // address has an invalid address of zero
3588 // when the global is a common symbol
3589 sym[GSYM_sym_idx].GetAddressRef().SetSection(
3590 symbol_section);
3591 sym[GSYM_sym_idx].GetAddressRef().SetOffset(
3592 symbol_value);
3593 add_symbol_addr(sym[GSYM_sym_idx]
3594 .GetAddress()
3595 .GetFileAddress());
3596 // We just need the flags from the linker
3597 // symbol, so put these flags
3598 // into the N_GSYM flags to avoid duplicate
3599 // symbols in the symbol table
3600 sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 |
3601 nlist.n_desc);
3602 sym[sym_idx].Clear();
3603 continue;
3604 }
3605 }
3606 }
3607 }
3608 }
3609
3610 sym[sym_idx].SetID(nlist_idx);
3611 sym[sym_idx].SetType(type);
3612 if (set_value) {
3613 sym[sym_idx].GetAddressRef().SetSection(symbol_section);
3614 sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
3615 add_symbol_addr(
3616 sym[sym_idx].GetAddress().GetFileAddress());
3617 }
3618 sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
3619
3620 if (symbol_byte_size > 0)
3621 sym[sym_idx].SetByteSize(symbol_byte_size);
3622
3623 if (demangled_is_synthesized)
3624 sym[sym_idx].SetDemangledNameIsSynthesized(true);
3625 ++sym_idx;
3626 } else {
3627 sym[sym_idx].Clear();
3628 }
3629 }
3630 /////////////////////////////
3631 }
3632 }
3633
3634 for (const auto &pos : reexport_shlib_needs_fixup) {
3635 const auto undef_pos = undefined_name_to_desc.find(pos.second);
3636 if (undef_pos != undefined_name_to_desc.end()) {
3637 const uint8_t dylib_ordinal =
3638 llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
3639 if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
3640 sym[pos.first].SetReExportedSymbolSharedLibrary(
3641 dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
3642 }
3643 }
3644 }
3645
3646#endif
3647 lldb::offset_t nlist_data_offset = 0;
3648
3649 if (nlist_data.GetByteSize() > 0) {
3650
3651 // If the sym array was not created while parsing the DSC unmapped
3652 // symbols, create it now.
3653 if (sym == nullptr) {
3654 sym =
3655 symtab.Resize(symtab_load_command.nsyms + m_dysymtab.nindirectsyms);
3656 num_syms = symtab.GetNumSymbols();
3657 }
3658
3659 if (unmapped_local_symbols_found) {
3660 assert(m_dysymtab.ilocalsym == 0);
3661 nlist_data_offset += (m_dysymtab.nlocalsym * nlist_byte_size);
3662 nlist_idx = m_dysymtab.nlocalsym;
3663 } else {
3664 nlist_idx = 0;
3665 }
3666
3667 typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
3668 typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
3669 UndefinedNameToDescMap undefined_name_to_desc;
3670 SymbolIndexToName reexport_shlib_needs_fixup;
3671
3672 // Symtab parsing is a huge mess. Everything is entangled and the code
3673 // requires access to a ridiculous amount of variables. LLDB depends
3674 // heavily on the proper merging of symbols and to get that right we need
3675 // to make sure we have parsed all the debug symbols first. Therefore we
3676 // invoke the lambda twice, once to parse only the debug symbols and then
3677 // once more to parse the remaining symbols.
3678 auto ParseSymbolLambda = [&](struct nlist_64 &nlist, uint32_t nlist_idx,
3679 bool debug_only) {
3680 const bool is_debug = ((nlist.n_type & N_STAB) != 0);
3681 if (is_debug != debug_only)
3682 return true;
3683
3684 const char *symbol_name_non_abi_mangled = nullptr;
3685 const char *symbol_name = nullptr;
3686
3687 if (have_strtab_data) {
3688 symbol_name = strtab_data.PeekCStr(nlist.n_strx);
3689
3690 if (symbol_name == nullptr) {
3691 // No symbol should be NULL, even the symbols with no string values
3692 // should have an offset zero which points to an empty C-string
3693 Debugger::ReportError(llvm::formatv(
3694 "symbol[{0}] has invalid string table offset {1:x} in {2}, "
3695 "ignoring symbol",
3696 nlist_idx, nlist.n_strx, module_sp->GetFileSpec().GetPath()));
3697 return true;
3698 }
3699 if (symbol_name[0] == '\0')
3700 symbol_name = nullptr;
3701 } else {
3702 const addr_t str_addr = strtab_addr + nlist.n_strx;
3703 Status str_error;
3704 if (process->ReadCStringFromMemory(str_addr, memory_symbol_name,
3705 str_error))
3706 symbol_name = memory_symbol_name.c_str();
3707 }
3708
3710 SectionSP symbol_section;
3711 lldb::addr_t symbol_byte_size = 0;
3712 bool add_nlist = true;
3713 bool is_gsym = false;
3714 bool demangled_is_synthesized = false;
3715 bool set_value = true;
3716
3717 assert(sym_idx < num_syms);
3718 sym[sym_idx].SetDebug(is_debug);
3719
3720 if (is_debug) {
3721 switch (nlist.n_type) {
3722 case N_GSYM:
3723 // global symbol: name,,NO_SECT,type,0
3724 // Sometimes the N_GSYM value contains the address.
3725
3726 // FIXME: In the .o files, we have a GSYM and a debug symbol for all
3727 // the ObjC data. They
3728 // have the same address, but we want to ensure that we always find
3729 // only the real symbol, 'cause we don't currently correctly
3730 // attribute the GSYM one to the ObjCClass/Ivar/MetaClass symbol
3731 // type. This is a temporary hack to make sure the ObjectiveC
3732 // symbols get treated correctly. To do this right, we should
3733 // coalesce all the GSYM & global symbols that have the same
3734 // address.
3735 is_gsym = true;
3736 sym[sym_idx].SetExternal(true);
3737
3738 if (symbol_name && symbol_name[0] == '_' && symbol_name[1] == 'O') {
3739 llvm::StringRef symbol_name_ref(symbol_name);
3740 if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
3741 symbol_name_non_abi_mangled = symbol_name + 1;
3742 symbol_name = symbol_name + g_objc_v2_prefix_class.size();
3743 type = eSymbolTypeObjCClass;
3744 demangled_is_synthesized = true;
3745
3746 } else if (symbol_name_ref.startswith(g_objc_v2_prefix_metaclass)) {
3747 symbol_name_non_abi_mangled = symbol_name + 1;
3748 symbol_name = symbol_name + g_objc_v2_prefix_metaclass.size();
3750 demangled_is_synthesized = true;
3751 } else if (symbol_name_ref.startswith(g_objc_v2_prefix_ivar)) {
3752 symbol_name_non_abi_mangled = symbol_name + 1;
3753 symbol_name = symbol_name + g_objc_v2_prefix_ivar.size();
3754 type = eSymbolTypeObjCIVar;
3755 demangled_is_synthesized = true;
3756 }
3757 } else {
3758 if (nlist.n_value != 0)
3759 symbol_section =
3760 section_info.GetSection(nlist.n_sect, nlist.n_value);
3761 type = eSymbolTypeData;
3762 }
3763 break;
3764
3765 case N_FNAME:
3766 // procedure name (f77 kludge): name,,NO_SECT,0,0
3767 type = eSymbolTypeCompiler;
3768 break;
3769
3770 case N_FUN:
3771 // procedure: name,,n_sect,linenumber,address
3772 if (symbol_name) {
3773 type = eSymbolTypeCode;
3774 symbol_section =
3775 section_info.GetSection(nlist.n_sect, nlist.n_value);
3776
3777 N_FUN_addr_to_sym_idx.insert(
3778 std::make_pair(nlist.n_value, sym_idx));
3779 // We use the current number of symbols in the symbol table in
3780 // lieu of using nlist_idx in case we ever start trimming entries
3781 // out
3782 N_FUN_indexes.push_back(sym_idx);
3783 } else {
3784 type = eSymbolTypeCompiler;
3785
3786 if (!N_FUN_indexes.empty()) {
3787 // Copy the size of the function into the original STAB entry
3788 // so we don't have to hunt for it later
3789 symtab.SymbolAtIndex(N_FUN_indexes.back())
3790 ->SetByteSize(nlist.n_value);
3791 N_FUN_indexes.pop_back();
3792 // We don't really need the end function STAB as it contains
3793 // the size which we already placed with the original symbol,
3794 // so don't add it if we want a minimal symbol table
3795 add_nlist = false;
3796 }
3797 }
3798 break;
3799
3800 case N_STSYM:
3801 // static symbol: name,,n_sect,type,address
3802 N_STSYM_addr_to_sym_idx.insert(
3803 std::make_pair(nlist.n_value, sym_idx));
3804 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3805 if (symbol_name && symbol_name[0]) {
3806 type = ObjectFile::GetSymbolTypeFromName(symbol_name + 1,
3808 }
3809 break;
3810
3811 case N_LCSYM:
3812 // .lcomm symbol: name,,n_sect,type,address
3813 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3815 break;
3816
3817 case N_BNSYM:
3818 // We use the current number of symbols in the symbol table in lieu
3819 // of using nlist_idx in case we ever start trimming entries out
3820 // Skip these if we want minimal symbol tables
3821 add_nlist = false;
3822 break;
3823
3824 case N_ENSYM:
3825 // Set the size of the N_BNSYM to the terminating index of this
3826 // N_ENSYM so that we can always skip the entire symbol if we need
3827 // to navigate more quickly at the source level when parsing STABS
3828 // Skip these if we want minimal symbol tables
3829 add_nlist = false;
3830 break;
3831
3832 case N_OPT:
3833 // emitted with gcc2_compiled and in gcc source
3834 type = eSymbolTypeCompiler;
3835 break;
3836
3837 case N_RSYM:
3838 // register sym: name,,NO_SECT,type,register
3839 type = eSymbolTypeVariable;
3840 break;
3841
3842 case N_SLINE:
3843 // src line: 0,,n_sect,linenumber,address
3844 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3845 type = eSymbolTypeLineEntry;
3846 break;
3847
3848 case N_SSYM:
3849 // structure elt: name,,NO_SECT,type,struct_offset
3851 break;
3852
3853 case N_SO:
3854 // source file name
3855 type = eSymbolTypeSourceFile;
3856 if (symbol_name == nullptr) {
3857 add_nlist = false;
3858 if (N_SO_index != UINT32_MAX) {
3859 // Set the size of the N_SO to the terminating index of this
3860 // N_SO so that we can always skip the entire N_SO if we need
3861 // to navigate more quickly at the source level when parsing
3862 // STABS
3863 symbol_ptr = symtab.SymbolAtIndex(N_SO_index);
3864 symbol_ptr->SetByteSize(sym_idx);
3865 symbol_ptr->SetSizeIsSibling(true);
3866 }
3867 N_NSYM_indexes.clear();
3868 N_INCL_indexes.clear();
3869 N_BRAC_indexes.clear();
3870 N_COMM_indexes.clear();
3871 N_FUN_indexes.clear();
3872 N_SO_index = UINT32_MAX;
3873 } else {
3874 // We use the current number of symbols in the symbol table in
3875 // lieu of using nlist_idx in case we ever start trimming entries
3876 // out
3877 const bool N_SO_has_full_path = symbol_name[0] == '/';
3878 if (N_SO_has_full_path) {
3879 if ((N_SO_index == sym_idx - 1) && ((sym_idx - 1) < num_syms)) {
3880 // We have two consecutive N_SO entries where the first
3881 // contains a directory and the second contains a full path.
3882 sym[sym_idx - 1].GetMangled().SetValue(
3883 ConstString(symbol_name));
3884 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3885 add_nlist = false;
3886 } else {
3887 // This is the first entry in a N_SO that contains a
3888 // directory or a full path to the source file
3889 N_SO_index = sym_idx;
3890 }
3891 } else if ((N_SO_index == sym_idx - 1) &&
3892 ((sym_idx - 1) < num_syms)) {
3893 // This is usually the second N_SO entry that contains just the
3894 // filename, so here we combine it with the first one if we are
3895 // minimizing the symbol table
3896 const char *so_path =
3897 sym[sym_idx - 1].GetMangled().GetDemangledName().AsCString();
3898 if (so_path && so_path[0]) {
3899 std::string full_so_path(so_path);
3900 const size_t double_slash_pos = full_so_path.find("//");
3901 if (double_slash_pos != std::string::npos) {
3902 // The linker has been generating bad N_SO entries with
3903 // doubled up paths in the format "%s%s" where the first
3904 // string in the DW_AT_comp_dir, and the second is the
3905 // directory for the source file so you end up with a path
3906 // that looks like "/tmp/src//tmp/src/"
3907 FileSpec so_dir(so_path);
3908 if (!FileSystem::Instance().Exists(so_dir)) {
3909 so_dir.SetFile(&full_so_path[double_slash_pos + 1],
3910 FileSpec::Style::native);
3911 if (FileSystem::Instance().Exists(so_dir)) {
3912 // Trim off the incorrect path
3913 full_so_path.erase(0, double_slash_pos + 1);
3914 }
3915 }
3916 }
3917 if (*full_so_path.rbegin() != '/')
3918 full_so_path += '/';
3919 full_so_path += symbol_name;
3920 sym[sym_idx - 1].GetMangled().SetValue(
3921 ConstString(full_so_path.c_str()));
3922 add_nlist = false;
3923 m_nlist_idx_to_sym_idx[nlist_idx] = sym_idx - 1;
3924 }
3925 } else {
3926 // This could be a relative path to a N_SO
3927 N_SO_index = sym_idx;
3928 }
3929 }
3930 break;
3931
3932 case N_OSO:
3933 // object file name: name,,0,0,st_mtime
3934 type = eSymbolTypeObjectFile;
3935 break;
3936
3937 case N_LSYM:
3938 // local sym: name,,NO_SECT,type,offset
3939 type = eSymbolTypeLocal;
3940 break;
3941
3942 // INCL scopes
3943 case N_BINCL:
3944 // include file beginning: name,,NO_SECT,0,sum We use the current
3945 // number of symbols in the symbol table in lieu of using nlist_idx
3946 // in case we ever start trimming entries out
3947 N_INCL_indexes.push_back(sym_idx);
3948 type = eSymbolTypeScopeBegin;
3949 break;
3950
3951 case N_EINCL:
3952 // include file end: name,,NO_SECT,0,0
3953 // Set the size of the N_BINCL to the terminating index of this
3954 // N_EINCL so that we can always skip the entire symbol if we need
3955 // to navigate more quickly at the source level when parsing STABS
3956 if (!N_INCL_indexes.empty()) {
3957 symbol_ptr = symtab.SymbolAtIndex(N_INCL_indexes.back());
3958 symbol_ptr->SetByteSize(sym_idx + 1);
3959 symbol_ptr->SetSizeIsSibling(true);
3960 N_INCL_indexes.pop_back();
3961 }
3962 type = eSymbolTypeScopeEnd;
3963 break;
3964
3965 case N_SOL:
3966 // #included file name: name,,n_sect,0,address
3967 type = eSymbolTypeHeaderFile;
3968
3969 // We currently don't use the header files on darwin
3970 add_nlist = false;
3971 break;
3972
3973 case N_PARAMS:
3974 // compiler parameters: name,,NO_SECT,0,0
3975 type = eSymbolTypeCompiler;
3976 break;
3977
3978 case N_VERSION:
3979 // compiler version: name,,NO_SECT,0,0
3980 type = eSymbolTypeCompiler;
3981 break;
3982
3983 case N_OLEVEL:
3984 // compiler -O level: name,,NO_SECT,0,0
3985 type = eSymbolTypeCompiler;
3986 break;
3987
3988 case N_PSYM:
3989 // parameter: name,,NO_SECT,type,offset
3990 type = eSymbolTypeVariable;
3991 break;
3992
3993 case N_ENTRY:
3994 // alternate entry: name,,n_sect,linenumber,address
3995 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
3996 type = eSymbolTypeLineEntry;
3997 break;
3998
3999 // Left and Right Braces
4000 case N_LBRAC:
4001 // left bracket: 0,,NO_SECT,nesting level,address We use the
4002 // current number of symbols in the symbol table in lieu of using
4003 // nlist_idx in case we ever start trimming entries out
4004 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4005 N_BRAC_indexes.push_back(sym_idx);
4006 type = eSymbolTypeScopeBegin;
4007 break;
4008
4009 case N_RBRAC:
4010 // right bracket: 0,,NO_SECT,nesting level,address Set the size of
4011 // the N_LBRAC to the terminating index of this N_RBRAC so that we
4012 // can always skip the entire symbol if we need to navigate more
4013 // quickly at the source level when parsing STABS
4014 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4015 if (!N_BRAC_indexes.empty()) {
4016 symbol_ptr = symtab.SymbolAtIndex(N_BRAC_indexes.back());
4017 symbol_ptr->SetByteSize(sym_idx + 1);
4018 symbol_ptr->SetSizeIsSibling(true);
4019 N_BRAC_indexes.pop_back();
4020 }
4021 type = eSymbolTypeScopeEnd;
4022 break;
4023
4024 case N_EXCL:
4025 // deleted include file: name,,NO_SECT,0,sum
4026 type = eSymbolTypeHeaderFile;
4027 break;
4028
4029 // COMM scopes
4030 case N_BCOMM:
4031 // begin common: name,,NO_SECT,0,0
4032 // We use the current number of symbols in the symbol table in lieu
4033 // of using nlist_idx in case we ever start trimming entries out
4034 type = eSymbolTypeScopeBegin;
4035 N_COMM_indexes.push_back(sym_idx);
4036 break;
4037
4038 case N_ECOML:
4039 // end common (local name): 0,,n_sect,0,address
4040 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4041 [[fallthrough]];
4042
4043 case N_ECOMM:
4044 // end common: name,,n_sect,0,0
4045 // Set the size of the N_BCOMM to the terminating index of this
4046 // N_ECOMM/N_ECOML so that we can always skip the entire symbol if
4047 // we need to navigate more quickly at the source level when
4048 // parsing STABS
4049 if (!N_COMM_indexes.empty()) {
4050 symbol_ptr = symtab.SymbolAtIndex(N_COMM_indexes.back());
4051 symbol_ptr->SetByteSize(sym_idx + 1);
4052 symbol_ptr->SetSizeIsSibling(true);
4053 N_COMM_indexes.pop_back();
4054 }
4055 type = eSymbolTypeScopeEnd;
4056 break;
4057
4058 case N_LENG:
4059 // second stab entry with length information
4060 type = eSymbolTypeAdditional;
4061 break;
4062
4063 default:
4064 break;
4065 }
4066 } else {
4067 uint8_t n_type = N_TYPE & nlist.n_type;
4068 sym[sym_idx].SetExternal((N_EXT & nlist.n_type) != 0);
4069
4070 switch (n_type) {
4071 case N_INDR: {
4072 const char *reexport_name_cstr = strtab_data.PeekCStr(nlist.n_value);
4073 if (reexport_name_cstr && reexport_name_cstr[0] && symbol_name) {
4074 type = eSymbolTypeReExported;
4075 ConstString reexport_name(reexport_name_cstr +
4076 ((reexport_name_cstr[0] == '_') ? 1 : 0));
4077 sym[sym_idx].SetReExportedSymbolName(reexport_name);
4078 set_value = false;
4079 reexport_shlib_needs_fixup[sym_idx] = reexport_name;
4080 indirect_symbol_names.insert(
4081 ConstString(symbol_name + ((symbol_name[0] == '_') ? 1 : 0)));
4082 } else
4083 type = eSymbolTypeUndefined;
4084 } break;
4085
4086 case N_UNDF:
4087 if (symbol_name && symbol_name[0]) {
4088 ConstString undefined_name(symbol_name +
4089 ((symbol_name[0] == '_') ? 1 : 0));
4090 undefined_name_to_desc[undefined_name] = nlist.n_desc;
4091 }
4092 [[fallthrough]];
4093
4094 case N_PBUD:
4095 type = eSymbolTypeUndefined;
4096 break;
4097
4098 case N_ABS:
4099 type = eSymbolTypeAbsolute;
4100 break;
4101
4102 case N_SECT: {
4103 symbol_section = section_info.GetSection(nlist.n_sect, nlist.n_value);
4104
4105 if (!symbol_section) {
4106 // TODO: warn about this?
4107 add_nlist = false;
4108 break;
4109 }
4110
4111 if (TEXT_eh_frame_sectID == nlist.n_sect) {
4112 type = eSymbolTypeException;
4113 } else {
4114 uint32_t section_type = symbol_section->Get() & SECTION_TYPE;
4115
4116 switch (section_type) {
4117 case S_CSTRING_LITERALS:
4118 type = eSymbolTypeData;
4119 break; // section with only literal C strings
4120 case S_4BYTE_LITERALS:
4121 type = eSymbolTypeData;
4122 break; // section with only 4 byte literals
4123 case S_8BYTE_LITERALS:
4124 type = eSymbolTypeData;
4125 break; // section with only 8 byte literals
4126 case S_LITERAL_POINTERS:
4127 type = eSymbolTypeTrampoline;
4128 break; // section with only pointers to literals
4129 case S_NON_LAZY_SYMBOL_POINTERS:
4130 type = eSymbolTypeTrampoline;
4131 break; // section with only non-lazy symbol pointers
4132 case S_LAZY_SYMBOL_POINTERS:
4133 type = eSymbolTypeTrampoline;
4134 break; // section with only lazy symbol pointers
4135 case S_SYMBOL_STUBS:
4136 type = eSymbolTypeTrampoline;
4137 break; // section with only symbol stubs, byte size of stub in
4138 // the reserved2 field
4139 case S_MOD_INIT_FUNC_POINTERS:
4140 type = eSymbolTypeCode;
4141 break; // section with only function pointers for initialization
4142 case S_MOD_TERM_FUNC_POINTERS:
4143 type = eSymbolTypeCode;
4144 break; // section with only function pointers for termination
4145 case S_INTERPOSING:
4146 type = eSymbolTypeTrampoline;
4147 break; // section with only pairs of function pointers for
4148 // interposing
4149 case S_16BYTE_LITERALS:
4150 type = eSymbolTypeData;
4151 break; // section with only 16 byte literals
4152 case S_DTRACE_DOF:
4154 break;
4155 case S_LAZY_DYLIB_SYMBOL_POINTERS:
4156 type = eSymbolTypeTrampoline;
4157 break;
4158 default:
4159 switch (symbol_section->GetType()) {
4161 type = eSymbolTypeCode;
4162 break;
4163 case eSectionTypeData:
4164 case eSectionTypeDataCString: // Inlined C string data
4165 case eSectionTypeDataCStringPointers: // Pointers to C string
4166 // data
4167 case eSectionTypeDataSymbolAddress: // Address of a symbol in
4168 // the symbol table
4169 case eSectionTypeData4:
4170 case eSectionTypeData8:
4171 case eSectionTypeData16:
4172 type = eSymbolTypeData;
4173 break;
4174 default:
4175 break;
4176 }
4177 break;
4178 }
4179
4180 if (type == eSymbolTypeInvalid) {
4181 const char *symbol_sect_name =
4182 symbol_section->GetName().AsCString();
4183 if (symbol_section->IsDescendant(text_section_sp.get())) {
4184 if (symbol_section->IsClear(S_ATTR_PURE_INSTRUCTIONS |
4185 S_ATTR_SELF_MODIFYING_CODE |
4186 S_ATTR_SOME_INSTRUCTIONS))
4187 type = eSymbolTypeData;
4188 else
4189 type = eSymbolTypeCode;
4190 } else if (symbol_section->IsDescendant(data_section_sp.get()) ||
4191 symbol_section->IsDescendant(
4192 data_dirty_section_sp.get()) ||
4193 symbol_section->IsDescendant(
4194 data_const_section_sp.get())) {
4195 if (symbol_sect_name &&
4196 ::strstr(symbol_sect_name, "__objc") == symbol_sect_name) {
4197 type = eSymbolTypeRuntime;
4198
4199 if (symbol_name) {
4200 llvm::StringRef symbol_name_ref(symbol_name);
4201 if (symbol_name_ref.startswith("_OBJC_")) {
4202 llvm::StringRef g_objc_v2_prefix_class(
4203 "_OBJC_CLASS_$_");
4204 llvm::StringRef g_objc_v2_prefix_metaclass(
4205 "_OBJC_METACLASS_$_");
4206 llvm::StringRef g_objc_v2_prefix_ivar(
4207 "_OBJC_IVAR_$_");
4208 if (symbol_name_ref.startswith(g_objc_v2_prefix_class)) {
4209 symbol_name_non_abi_mangled = symbol_name + 1;
4210 symbol_name =
4211 symbol_name + g_objc_v2_prefix_class.size();
4212 type = eSymbolTypeObjCClass;
4213 demangled_is_synthesized = true;
4214 } else if (symbol_name_ref.startswith(
4215 g_objc_v2_prefix_metaclass)) {
4216 symbol_name_non_abi_mangled = symbol_name + 1;
4217 symbol_name =
4218 symbol_name + g_objc_v2_prefix_metaclass.size();
4220 demangled_is_synthesized = true;
4221 } else if (symbol_name_ref.startswith(
4222 g_objc_v2_prefix_ivar)) {
4223 symbol_name_non_abi_mangled = symbol_name + 1;
4224 symbol_name =
4225 symbol_name + g_objc_v2_prefix_ivar.size();
4226 type = eSymbolTypeObjCIVar;
4227 demangled_is_synthesized = true;
4228 }
4229 }
4230 }
4231 } else if (symbol_sect_name &&
4232 ::strstr(symbol_sect_name, "__gcc_except_tab") ==
4233 symbol_sect_name) {
4234 type = eSymbolTypeException;
4235 } else {
4236 type = eSymbolTypeData;
4237 }
4238 } else if (symbol_sect_name &&
4239 ::strstr(symbol_sect_name, "__IMPORT") ==
4240 symbol_sect_name) {
4241 type = eSymbolTypeTrampoline;
4242 } else if (symbol_section->IsDescendant(objc_section_sp.get())) {
4243 type = eSymbolTypeRuntime;
4244 if (symbol_name && symbol_name[0] == '.') {
4245 llvm::StringRef symbol_name_ref(symbol_name);
4246 llvm::StringRef g_objc_v1_prefix_class(
4247 ".objc_class_name_");
4248 if (symbol_name_ref.startswith(g_objc_v1_prefix_class)) {
4249 symbol_name_non_abi_mangled = symbol_name;
4250 symbol_name = symbol_name + g_objc_v1_prefix_class.size();
4251 type = eSymbolTypeObjCClass;
4252 demangled_is_synthesized = true;
4253 }
4254 }
4255 }
4256 }
4257 }
4258 } break;
4259 }
4260 }
4261
4262 if (!add_nlist) {
4263 sym[sym_idx].Clear();
4264 return true;
4265 }
4266
4267 uint64_t symbol_value = nlist.n_value;
4268
4269 if (symbol_name_non_abi_mangled) {
4270 sym[sym_idx].GetMangled().SetMangledName(
4271 ConstString(symbol_name_non_abi_mangled));
4272 sym[sym_idx].GetMangled().SetDemangledName(ConstString(symbol_name));
4273 } else {
4274
4275 if (symbol_name && symbol_name[0] == '_') {
4276 symbol_name++; // Skip the leading underscore
4277 }
4278
4279 if (symbol_name) {
4280 ConstString const_symbol_name(symbol_name);
4281 sym[sym_idx].GetMangled().SetValue(const_symbol_name);
4282 }
4283 }
4284
4285 if (is_gsym) {
4286 const char *gsym_name = sym[sym_idx]
4287 .GetMangled()
4288 .GetName(Mangled::ePreferMangled)
4289 .GetCString();
4290 if (gsym_name)
4291 N_GSYM_name_to_sym_idx[gsym_name] = sym_idx;
4292 }
4293
4294 if (symbol_section) {
4295 const addr_t section_file_addr = symbol_section->GetFileAddress();
4296 if (symbol_byte_size == 0 && function_starts_count > 0) {
4297 addr_t symbol_lookup_file_addr = nlist.n_value;
4298 // Do an exact address match for non-ARM addresses, else get the
4299 // closest since the symbol might be a thumb symbol which has an
4300 // address with bit zero set.
4301 FunctionStarts::Entry *func_start_entry =
4302 function_starts.FindEntry(symbol_lookup_file_addr, !is_arm);
4303 if (is_arm && func_start_entry) {
4304 // Verify that the function start address is the symbol address
4305 // (ARM) or the symbol address + 1 (thumb).
4306 if (func_start_entry->addr != symbol_lookup_file_addr &&
4307 func_start_entry->addr != (symbol_lookup_file_addr + 1)) {
4308 // Not the right entry, NULL it out...
4309 func_start_entry = nullptr;
4310 }
4311 }
4312 if (func_start_entry) {
4313 func_start_entry->data = true;
4314
4315 addr_t symbol_file_addr = func_start_entry->addr;
4316 if (is_arm)
4317 symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4318
4319 const FunctionStarts::Entry *next_func_start_entry =
4320 function_starts.FindNextEntry(func_start_entry);
4321 const addr_t section_end_file_addr =
4322 section_file_addr + symbol_section->GetByteSize();
4323 if (next_func_start_entry) {
4324 addr_t next_symbol_file_addr = next_func_start_entry->addr;
4325 // Be sure the clear the Thumb address bit when we calculate the
4326 // size from the current and next address
4327 if (is_arm)
4328 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4329 symbol_byte_size = std::min<lldb::addr_t>(
4330 next_symbol_file_addr - symbol_file_addr,
4331 section_end_file_addr - symbol_file_addr);
4332 } else {
4333 symbol_byte_size = section_end_file_addr - symbol_file_addr;
4334 }
4335 }
4336 }
4337 symbol_value -= section_file_addr;
4338 }
4339
4340 if (!is_debug) {
4341 if (type == eSymbolTypeCode) {
4342 // See if we can find a N_FUN entry for any code symbols. If we do
4343 // find a match, and the name matches, then we can merge the two into
4344 // just the function symbol to avoid duplicate entries in the symbol
4345 // table.
4346 std::pair<ValueToSymbolIndexMap::const_iterator,
4347 ValueToSymbolIndexMap::const_iterator>
4348 range;
4349 range = N_FUN_addr_to_sym_idx.equal_range(nlist.n_value);
4350 if (range.first != range.second) {
4351 for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4352 pos != range.second; ++pos) {
4353 if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) ==
4354 sym[pos->second].GetMangled().GetName(
4355 Mangled::ePreferMangled)) {
4356 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4357 // We just need the flags from the linker symbol, so put these
4358 // flags into the N_FUN flags to avoid duplicate symbols in the
4359 // symbol table.
4360 sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4361 sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4362 if (resolver_addresses.find(nlist.n_value) !=
4363 resolver_addresses.end())
4364 sym[pos->second].SetType(eSymbolTypeResolver);
4365 sym[sym_idx].Clear();
4366 return true;
4367 }
4368 }
4369 } else {
4370 if (resolver_addresses.find(nlist.n_value) !=
4371 resolver_addresses.end())
4372 type = eSymbolTypeResolver;
4373 }
4374 } else if (type == eSymbolTypeData || type == eSymbolTypeObjCClass ||
4375 type == eSymbolTypeObjCMetaClass ||
4376 type == eSymbolTypeObjCIVar) {
4377 // See if we can find a N_STSYM entry for any data symbols. If we do
4378 // find a match, and the name matches, then we can merge the two into
4379 // just the Static symbol to avoid duplicate entries in the symbol
4380 // table.
4381 std::pair<ValueToSymbolIndexMap::const_iterator,
4382 ValueToSymbolIndexMap::const_iterator>
4383 range;
4384 range = N_STSYM_addr_to_sym_idx.equal_range(nlist.n_value);
4385 if (range.first != range.second) {
4386 for (ValueToSymbolIndexMap::const_iterator pos = range.first;
4387 pos != range.second; ++pos) {
4388 if (sym[sym_idx].GetMangled().GetName(Mangled::ePreferMangled) ==
4389 sym[pos->second].GetMangled().GetName(
4390 Mangled::ePreferMangled)) {
4391 m_nlist_idx_to_sym_idx[nlist_idx] = pos->second;
4392 // We just need the flags from the linker symbol, so put these
4393 // flags into the N_STSYM flags to avoid duplicate symbols in
4394 // the symbol table.
4395 sym[pos->second].SetExternal(sym[sym_idx].IsExternal());
4396 sym[pos->second].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4397 sym[sym_idx].Clear();
4398 return true;
4399 }
4400 }
4401 } else {
4402 // Combine N_GSYM stab entries with the non stab symbol.
4403 const char *gsym_name = sym[sym_idx]
4404 .GetMangled()
4405 .GetName(Mangled::ePreferMangled)
4406 .GetCString();
4407 if (gsym_name) {
4408 ConstNameToSymbolIndexMap::const_iterator pos =
4409 N_GSYM_name_to_sym_idx.find(gsym_name);
4410 if (pos != N_GSYM_name_to_sym_idx.end()) {
4411 const uint32_t GSYM_sym_idx = pos->second;
4412 m_nlist_idx_to_sym_idx[nlist_idx] = GSYM_sym_idx;
4413 // Copy the address, because often the N_GSYM address has an
4414 // invalid address of zero when the global is a common symbol.
4415 sym[GSYM_sym_idx].GetAddressRef().SetSection(symbol_section);
4416 sym[GSYM_sym_idx].GetAddressRef().SetOffset(symbol_value);
4417 add_symbol_addr(
4418 sym[GSYM_sym_idx].GetAddress().GetFileAddress());
4419 // We just need the flags from the linker symbol, so put these
4420 // flags into the N_GSYM flags to avoid duplicate symbols in
4421 // the symbol table.
4422 sym[GSYM_sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4423 sym[sym_idx].Clear();
4424 return true;
4425 }
4426 }
4427 }
4428 }
4429 }
4430
4431 sym[sym_idx].SetID(nlist_idx);
4432 sym[sym_idx].SetType(type);
4433 if (set_value) {
4434 sym[sym_idx].GetAddressRef().SetSection(symbol_section);
4435 sym[sym_idx].GetAddressRef().SetOffset(symbol_value);
4436 if (symbol_section)
4437 add_symbol_addr(sym[sym_idx].GetAddress().GetFileAddress());
4438 }
4439 sym[sym_idx].SetFlags(nlist.n_type << 16 | nlist.n_desc);
4440 if (nlist.n_desc & N_WEAK_REF)
4441 sym[sym_idx].SetIsWeak(true);
4442
4443 if (symbol_byte_size > 0)
4444 sym[sym_idx].SetByteSize(symbol_byte_size);
4445
4446 if (demangled_is_synthesized)
4447 sym[sym_idx].SetDemangledNameIsSynthesized(true);
4448
4449 ++sym_idx;
4450 return true;
4451 };
4452
4453 // First parse all the nlists but don't process them yet. See the next
4454 // comment for an explanation why.
4455 std::vector<struct nlist_64> nlists;
4456 nlists.reserve(symtab_load_command.nsyms);
4457 for (; nlist_idx < symtab_load_command.nsyms; ++nlist_idx) {
4458 if (auto nlist =
4459 ParseNList(nlist_data, nlist_data_offset, nlist_byte_size))
4460 nlists.push_back(*nlist);
4461 else
4462 break;
4463 }
4464
4465 // Now parse all the debug symbols. This is needed to merge non-debug
4466 // symbols in the next step. Non-debug symbols are always coalesced into
4467 // the debug symbol. Doing this in one step would mean that some symbols
4468 // won't be merged.
4469 nlist_idx = 0;
4470 for (auto &nlist : nlists) {
4471 if (!ParseSymbolLambda(nlist, nlist_idx++, DebugSymbols))
4472 break;
4473 }
4474
4475 // Finally parse all the non debug symbols.
4476 nlist_idx = 0;
4477 for (auto &nlist : nlists) {
4478 if (!ParseSymbolLambda(nlist, nlist_idx++, NonDebugSymbols))
4479 break;
4480 }
4481
4482 for (const auto &pos : reexport_shlib_needs_fixup) {
4483 const auto undef_pos = undefined_name_to_desc.find(pos.second);
4484 if (undef_pos != undefined_name_to_desc.end()) {
4485 const uint8_t dylib_ordinal =
4486 llvm::MachO::GET_LIBRARY_ORDINAL(undef_pos->second);
4487 if (dylib_ordinal > 0 && dylib_ordinal < dylib_files.GetSize())
4488 sym[pos.first].SetReExportedSymbolSharedLibrary(
4489 dylib_files.GetFileSpecAtIndex(dylib_ordinal - 1));
4490 }
4491 }
4492 }
4493
4494 // Count how many trie symbols we'll add to the symbol table
4495 int trie_symbol_table_augment_count = 0;
4496 for (auto &e : external_sym_trie_entries) {
4497 if (!symbols_added.contains(e.entry.address))
4498 trie_symbol_table_augment_count++;
4499 }
4500
4501 if (num_syms < sym_idx + trie_symbol_table_augment_count) {
4502 num_syms = sym_idx + trie_symbol_table_augment_count;
4503 sym = symtab.Resize(num_syms);
4504 }
4505 uint32_t synthetic_sym_id = symtab_load_command.nsyms;
4506
4507 // Add symbols from the trie to the symbol table.
4508 for (auto &e : external_sym_trie_entries) {
4509 if (symbols_added.contains(e.entry.address))
4510 continue;
4511
4512 // Find the section that this trie address is in, use that to annotate
4513 // symbol type as we add the trie address and name to the symbol table.
4514 Address symbol_addr;
4515 if (module_sp->ResolveFileAddress(e.entry.address, symbol_addr)) {
4516 SectionSP symbol_section(symbol_addr.GetSection());
4517 const char *symbol_name = e.entry.name.GetCString();
4518 bool demangled_is_synthesized = false;
4519 SymbolType type =
4520 GetSymbolType(symbol_name, demangled_is_synthesized, text_section_sp,
4521 data_section_sp, data_dirty_section_sp,
4522 data_const_section_sp, symbol_section);
4523
4524 sym[sym_idx].SetType(type);
4525 if (symbol_section) {
4526 sym[sym_idx].SetID(synthetic_sym_id++);
4527 sym[sym_idx].GetMangled().SetMangledName(ConstString(symbol_name));
4528 if (demangled_is_synthesized)
4529 sym[sym_idx].SetDemangledNameIsSynthesized(true);
4530 sym[sym_idx].SetIsSynthetic(true);
4531 sym[sym_idx].SetExternal(true);
4532 sym[sym_idx].GetAddressRef() = symbol_addr;
4533 add_symbol_addr(symbol_addr.GetFileAddress());
4534 if (e.entry.flags & TRIE_SYMBOL_IS_THUMB)
4536 ++sym_idx;
4537 }
4538 }
4539 }
4540
4541 if (function_starts_count > 0) {
4542 uint32_t num_synthetic_function_symbols = 0;
4543 for (i = 0; i < function_starts_count; ++i) {
4544 if (!symbols_added.contains(function_starts.GetEntryRef(i).addr))
4545 ++num_synthetic_function_symbols;
4546 }
4547
4548 if (num_synthetic_function_symbols > 0) {
4549 if (num_syms < sym_idx + num_synthetic_function_symbols) {
4550 num_syms = sym_idx + num_synthetic_function_symbols;
4551 sym = symtab.Resize(num_syms);
4552 }
4553 for (i = 0; i < function_starts_count; ++i) {
4554 const FunctionStarts::Entry *func_start_entry =
4555 function_starts.GetEntryAtIndex(i);
4556 if (!symbols_added.contains(func_start_entry->addr)) {
4557 addr_t symbol_file_addr = func_start_entry->addr;
4558 uint32_t symbol_flags = 0;
4559 if (func_start_entry->data)
4560 symbol_flags = MACHO_NLIST_ARM_SYMBOL_IS_THUMB;
4561 Address symbol_addr;
4562 if (module_sp->ResolveFileAddress(symbol_file_addr, symbol_addr)) {
4563 SectionSP symbol_section(symbol_addr.GetSection());
4564 uint32_t symbol_byte_size = 0;
4565 if (symbol_section) {
4566 const addr_t section_file_addr = symbol_section->GetFileAddress();
4567 const FunctionStarts::Entry *next_func_start_entry =
4568 function_starts.FindNextEntry(func_start_entry);
4569 const addr_t section_end_file_addr =
4570 section_file_addr + symbol_section->GetByteSize();
4571 if (next_func_start_entry) {
4572 addr_t next_symbol_file_addr = next_func_start_entry->addr;
4573 if (is_arm)
4574 next_symbol_file_addr &= THUMB_ADDRESS_BIT_MASK;
4575 symbol_byte_size = std::min<lldb::addr_t>(
4576 next_symbol_file_addr - symbol_file_addr,
4577 section_end_file_addr - symbol_file_addr);
4578 } else {
4579 symbol_byte_size = section_end_file_addr - symbol_file_addr;
4580 }
4581 sym[sym_idx].SetID(synthetic_sym_id++);
4582 // Don't set the name for any synthetic symbols, the Symbol
4583 // object will generate one if needed when the name is accessed
4584 // via accessors.
4585 sym[sym_idx].GetMangled().SetDemangledName(ConstString());
4586 sym[sym_idx].SetType(eSymbolTypeCode);
4587 sym[sym_idx].SetIsSynthetic(true);
4588 sym[sym_idx].GetAddressRef() = symbol_addr;
4589 add_symbol_addr(symbol_addr.GetFileAddress());
4590 if (symbol_flags)
4591 sym[sym_idx].SetFlags(symbol_flags);
4592 if (symbol_byte_size)
4593 sym[sym_idx].SetByteSize(symbol_byte_size);
4594 ++sym_idx;
4595 }
4596 }
4597 }
4598 }
4599 }
4600 }
4601
4602 // Trim our symbols down to just what we ended up with after removing any
4603 // symbols.
4604 if (sym_idx < num_syms) {
4605 num_syms = sym_idx;
4606 sym = symtab.Resize(num_syms);
4607 }
4608
4609 // Now synthesize indirect symbols
4610 if (m_dysymtab.nindirectsyms != 0) {
4611 if (indirect_symbol_index_data.GetByteSize()) {
4612 NListIndexToSymbolIndexMap::const_iterator end_index_pos =
4613 m_nlist_idx_to_sym_idx.end();
4614
4615 for (uint32_t sect_idx = 1; sect_idx < m_mach_sections.size();
4616 ++sect_idx) {
4617 if ((m_mach_sections[sect_idx].flags & SECTION_TYPE) ==
4618 S_SYMBOL_STUBS) {
4619 uint32_t symbol_stub_byte_size = m_mach_sections[sect_idx].reserved2;
4620 if (symbol_stub_byte_size == 0)
4621 continue;
4622
4623 const uint32_t num_symbol_stubs =
4624 m_mach_sections[sect_idx].size / symbol_stub_byte_size;
4625
4626 if (num_symbol_stubs == 0)
4627 continue;
4628
4629 const uint32_t symbol_stub_index_offset =
4630 m_mach_sections[sect_idx].reserved1;
4631 for (uint32_t stub_idx = 0; stub_idx < num_symbol_stubs; ++stub_idx) {
4632 const uint32_t symbol_stub_index =
4633 symbol_stub_index_offset + stub_idx;
4634 const lldb::addr_t symbol_stub_addr =
4635 m_mach_sections[sect_idx].addr +
4636 (stub_idx * symbol_stub_byte_size);
4637 lldb::offset_t symbol_stub_offset = symbol_stub_index * 4;
4638 if (indirect_symbol_index_data.ValidOffsetForDataOfSize(
4639 symbol_stub_offset, 4)) {
4640 const uint32_t stub_sym_id =
4641 indirect_symbol_index_data.GetU32(&symbol_stub_offset);
4642 if (stub_sym_id & (INDIRECT_SYMBOL_ABS | INDIRECT_SYMBOL_LOCAL))
4643 continue;
4644
4645 NListIndexToSymbolIndexMap::const_iterator index_pos =
4646 m_nlist_idx_to_sym_idx.find(stub_sym_id);
4647 Symbol *stub_symbol = nullptr;
4648 if (index_pos != end_index_pos) {
4649 // We have a remapping from the original nlist index to a
4650 // current symbol index, so just look this up by index
4651 stub_symbol = symtab.SymbolAtIndex(index_pos->second);
4652 } else {
4653 // We need to lookup a symbol using the original nlist symbol
4654 // index since this index is coming from the S_SYMBOL_STUBS
4655 stub_symbol = symtab.FindSymbolByID(stub_sym_id);
4656 }
4657
4658 if (stub_symbol) {
4659 Address so_addr(symbol_stub_addr, section_list);
4660
4661 if (stub_symbol->GetType() == eSymbolTypeUndefined) {
4662 // Change the external symbol into a trampoline that makes
4663 // sense These symbols were N_UNDF N_EXT, and are useless
4664 // to us, so we can re-use them so we don't have to make up
4665 // a synthetic symbol for no good reason.
4666 if (resolver_addresses.find(symbol_stub_addr) ==
4667 resolver_addresses.end())
4668 stub_symbol->SetType(eSymbolTypeTrampoline);
4669 else
4670 stub_symbol->SetType(eSymbolTypeResolver);
4671 stub_symbol->SetExternal(false);
4672 stub_symbol->GetAddressRef() = so_addr;
4673 stub_symbol->SetByteSize(symbol_stub_byte_size);
4674 } else {
4675 // Make a synthetic symbol to describe the trampoline stub
4676 Mangled stub_symbol_mangled_name(stub_symbol->GetMangled());
4677 if (sym_idx >= num_syms) {
4678 sym = symtab.Resize(++num_syms);
4679 stub_symbol = nullptr; // this pointer no longer valid
4680 }
4681 sym[sym_idx].SetID(synthetic_sym_id++);
4682 sym[sym_idx].GetMangled() = stub_symbol_mangled_name;
4683 if (resolver_addresses.find(symbol_stub_addr) ==
4684 resolver_addresses.end())
4685 sym[sym_idx].SetType(eSymbolTypeTrampoline);
4686 else
4687 sym[sym_idx].SetType(eSymbolTypeResolver);
4688 sym[sym_idx].SetIsSynthetic(true);
4689 sym[sym_idx].GetAddressRef() = so_addr;
4690 add_symbol_addr(so_addr.GetFileAddress());
4691 sym[sym_idx].SetByteSize(symbol_stub_byte_size);
4692 ++sym_idx;
4693 }
4694 } else {
4695 if (log)
4696 log->Warning("symbol stub referencing symbol table symbol "
4697 "%u that isn't in our minimal symbol table, "
4698 "fix this!!!",
4699 stub_sym_id);
4700 }
4701 }
4702 }
4703 }
4704 }
4705 }
4706 }
4707
4708 if (!reexport_trie_entries.empty()) {
4709 for (const auto &e : reexport_trie_entries) {
4710 if (e.entry.import_name) {
4711 // Only add indirect symbols from the Trie entries if we didn't have
4712 // a N_INDR nlist entry for this already
4713 if (indirect_symbol_names.find(e.entry.name) ==
4714 indirect_symbol_names.end()) {
4715 // Make a synthetic symbol to describe re-exported symbol.
4716 if (sym_idx >= num_syms)
4717 sym = symtab.Resize(++num_syms);
4718 sym[sym_idx].SetID(synthetic_sym_id++);
4719 sym[sym_idx].GetMangled() = Mangled(e.entry.name);
4720 sym[sym_idx].SetType(eSymbolTypeReExported);
4721 sym[sym_idx].SetIsSynthetic(true);
4722 sym[sym_idx].SetReExportedSymbolName(e.entry.import_name);
4723 if (e.entry.other > 0 && e.entry.other <= dylib_files.GetSize()) {
4725 dylib_files.GetFileSpecAtIndex(e.entry.other - 1));
4726 }
4727 ++sym_idx;
4728 }
4729 }
4730 }
4731 }
4732}
4733
4735 ModuleSP module_sp(GetModule());
4736 if (module_sp) {
4737 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
4738 s->Printf("%p: ", static_cast<void *>(this));
4739 s->Indent();
4740 if (m_header.magic == MH_MAGIC_64 || m_header.magic == MH_CIGAM_64)
4741 s->PutCString("ObjectFileMachO64");
4742 else
4743 s->PutCString("ObjectFileMachO32");
4744
4745 *s << ", file = '" << m_file;
4746 ModuleSpecList all_specs;
4747 ModuleSpec base_spec;
4749 base_spec, all_specs);
4750 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
4751 *s << "', triple";
4752 if (e)
4753 s->Printf("[%d]", i);
4754 *s << " = ";
4755 *s << all_specs.GetModuleSpecRefAtIndex(i)
4757 .GetTriple()
4758 .getTriple();
4759 }
4760 *s << "\n";
4761 SectionList *sections = GetSectionList();
4762 if (sections)
4763 sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
4764 UINT32_MAX);
4765
4766 if (m_symtab_up)
4767 m_symtab_up->Dump(s, nullptr, eSortOrderNone);
4768 }
4769}
4770
4771UUID ObjectFileMachO::GetUUID(const llvm::MachO::mach_header &header,
4772 const lldb_private::DataExtractor &data,
4773 lldb::offset_t lc_offset) {
4774 uint32_t i;
4775 llvm::MachO::uuid_command load_cmd;
4776
4777 lldb::offset_t offset = lc_offset;
4778 for (i = 0; i < header.ncmds; ++i) {
4779 const lldb::offset_t cmd_offset = offset;
4780 if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
4781 break;
4782
4783 if (load_cmd.cmd == LC_UUID) {
4784 const uint8_t *uuid_bytes = data.PeekData(offset, 16);
4785
4786 if (uuid_bytes) {
4787 // OpenCL on Mac OS X uses the same UUID for each of its object files.
4788 // We pretend these object files have no UUID to prevent crashing.
4789
4790 const uint8_t opencl_uuid[] = {0x8c, 0x8e, 0xb3, 0x9b, 0x3b, 0xa8,
4791 0x4b, 0x16, 0xb6, 0xa4, 0x27, 0x63,
4792 0xbb, 0x14, 0xf0, 0x0d};
4793
4794 if (!memcmp(uuid_bytes, opencl_uuid, 16))
4795 return UUID();
4796
4797 return UUID(uuid_bytes, 16);
4798 }
4799 return UUID();
4800 }
4801 offset = cmd_offset + load_cmd.cmdsize;
4802 }
4803 return UUID();
4804}
4805
4806static llvm::StringRef GetOSName(uint32_t cmd) {
4807 switch (cmd) {
4808 case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
4809 return llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4810 case llvm::MachO::LC_VERSION_MIN_MACOSX:
4811 return llvm::Triple::getOSTypeName(llvm::Triple::MacOSX);
4812 case llvm::MachO::LC_VERSION_MIN_TVOS:
4813 return llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4814 case llvm::MachO::LC_VERSION_MIN_WATCHOS:
4815 return llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4816 default:
4817 llvm_unreachable("unexpected LC_VERSION load command");
4818 }
4819}
4820
4821namespace {
4822struct OSEnv {
4823 llvm::StringRef os_type;
4824 llvm::StringRef environment;
4825 OSEnv(uint32_t cmd) {
4826 switch (cmd) {
4827 case llvm::MachO::PLATFORM_MACOS:
4828 os_type = llvm::Triple::getOSTypeName(llvm::Triple::MacOSX);
4829 return;
4830 case llvm::MachO::PLATFORM_IOS:
4831 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4832 return;
4833 case llvm::MachO::PLATFORM_TVOS:
4834 os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4835 return;
4836 case llvm::MachO::PLATFORM_WATCHOS:
4837 os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4838 return;
4839 // TODO: add BridgeOS & DriverKit once in llvm/lib/Support/Triple.cpp
4840 // NEED_BRIDGEOS_TRIPLE
4841 // case llvm::MachO::PLATFORM_BRIDGEOS:
4842 // os_type = llvm::Triple::getOSTypeName(llvm::Triple::BridgeOS);
4843 // return;
4844 // case llvm::MachO::PLATFORM_DRIVERKIT:
4845 // os_type = llvm::Triple::getOSTypeName(llvm::Triple::DriverKit);
4846 // return;
4847 case llvm::MachO::PLATFORM_MACCATALYST:
4848 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4849 environment = llvm::Triple::getEnvironmentTypeName(llvm::Triple::MacABI);
4850 return;
4851 case llvm::MachO::PLATFORM_IOSSIMULATOR:
4852 os_type = llvm::Triple::getOSTypeName(llvm::Triple::IOS);
4853 environment =
4854 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4855 return;
4856 case llvm::MachO::PLATFORM_TVOSSIMULATOR:
4857 os_type = llvm::Triple::getOSTypeName(llvm::Triple::TvOS);
4858 environment =
4859 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4860 return;
4861 case llvm::MachO::PLATFORM_WATCHOSSIMULATOR:
4862 os_type = llvm::Triple::getOSTypeName(llvm::Triple::WatchOS);
4863 environment =
4864 llvm::Triple::getEnvironmentTypeName(llvm::Triple::Simulator);
4865 return;
4866 default: {
4867 Log *log(GetLog(LLDBLog::Symbols | LLDBLog::Process));
4868 LLDB_LOGF(log, "unsupported platform in LC_BUILD_VERSION");
4869 }
4870 }
4871 }
4872};
4873
4874struct MinOS {
4875 uint32_t major_version, minor_version, patch_version;
4876 MinOS(uint32_t version)
4877 : major_version(version >> 16), minor_version((version >> 8) & 0xffu),
4878 patch_version(version & 0xffu) {}
4879};
4880} // namespace
4881
4882void ObjectFileMachO::GetAllArchSpecs(const llvm::MachO::mach_header &header,
4883 const lldb_private::DataExtractor &data,
4884 lldb::offset_t lc_offset,
4885 ModuleSpec &base_spec,
4886 lldb_private::ModuleSpecList &all_specs) {
4887 auto &base_arch = base_spec.GetArchitecture();
4888 base_arch.SetArchitecture(eArchTypeMachO, header.cputype, header.cpusubtype);
4889 if (!base_arch.IsValid())
4890 return;
4891
4892 bool found_any = false;
4893 auto add_triple = [&](const llvm::Triple &triple) {
4894 auto spec = base_spec;
4895 spec.GetArchitecture().GetTriple() = triple;
4896 if (spec.GetArchitecture().IsValid()) {
4897 spec.GetUUID() = ObjectFileMachO::GetUUID(header, data, lc_offset);
4898 all_specs.Append(spec);
4899 found_any = true;
4900 }
4901 };
4902
4903 // Set OS to an unspecified unknown or a "*" so it can match any OS
4904 llvm::Triple base_triple = base_arch.GetTriple();
4905 base_triple.setOS(llvm::Triple::UnknownOS);
4906 base_triple.setOSName(llvm::StringRef());
4907
4908 if (header.filetype == MH_PRELOAD) {
4909 if (header.cputype == CPU_TYPE_ARM) {
4910 // If this is a 32-bit arm binary, and it's a standalone binary, force
4911 // the Vendor to Apple so we don't accidentally pick up the generic
4912 // armv7 ABI at runtime. Apple's armv7 ABI always uses r7 for the
4913 // frame pointer register; most other armv7 ABIs use a combination of
4914 // r7 and r11.
4915 base_triple.setVendor(llvm::Triple::Apple);
4916 } else {
4917 // Set vendor to an unspecified unknown or a "*" so it can match any
4918 // vendor This is required for correct behavior of EFI debugging on
4919 // x86_64
4920 base_triple.setVendor(llvm::Triple::UnknownVendor);
4921 base_triple.setVendorName(llvm::StringRef());
4922 }
4923 return add_triple(base_triple);
4924 }
4925
4926 llvm::MachO::load_command load_cmd;
4927
4928 // See if there is an LC_VERSION_MIN_* load command that can give
4929 // us the OS type.
4930 lldb::offset_t offset = lc_offset;
4931 for (uint32_t i = 0; i < header.ncmds; ++i) {
4932 const lldb::offset_t cmd_offset = offset;
4933 if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
4934 break;
4935
4936 llvm::MachO::version_min_command version_min;
4937 switch (load_cmd.cmd) {
4938 case llvm::MachO::LC_VERSION_MIN_MACOSX:
4939 case llvm::MachO::LC_VERSION_MIN_IPHONEOS:
4940 case llvm::MachO::LC_VERSION_MIN_TVOS:
4941 case llvm::MachO::LC_VERSION_MIN_WATCHOS: {
4942 if (load_cmd.cmdsize != sizeof(version_min))
4943 break;
4944 if (data.ExtractBytes(cmd_offset, sizeof(version_min),
4945 data.GetByteOrder(), &version_min) == 0)
4946 break;
4947 MinOS min_os(version_min.version);
4948 llvm::SmallString<32> os_name;
4949 llvm::raw_svector_ostream os(os_name);
4950 os << GetOSName(load_cmd.cmd) << min_os.major_version << '.'
4951 << min_os.minor_version << '.' << min_os.patch_version;
4952
4953 auto triple = base_triple;
4954 triple.setOSName(os.str());
4955
4956 // Disambiguate legacy simulator platforms.
4957 if (load_cmd.cmd != llvm::MachO::LC_VERSION_MIN_MACOSX &&
4958 (base_triple.getArch() == llvm::Triple::x86_64 ||
4959 base_triple.getArch() == llvm::Triple::x86)) {
4960 // The combination of legacy LC_VERSION_MIN load command and
4961 // x86 architecture always indicates a simulator environment.
4962 // The combination of LC_VERSION_MIN and arm architecture only
4963 // appears for native binaries. Back-deploying simulator
4964 // binaries on Apple Silicon Macs use the modern unambigous
4965 // LC_BUILD_VERSION load commands; no special handling required.
4966 triple.setEnvironment(llvm::Triple::Simulator);
4967 }
4968 add_triple(triple);
4969 break;
4970 }
4971 default:
4972 break;
4973 }
4974
4975 offset = cmd_offset + load_cmd.cmdsize;
4976 }
4977
4978 // See if there are LC_BUILD_VERSION load commands that can give
4979 // us the OS type.
4980 offset = lc_offset;
4981 for (uint32_t i = 0; i < header.ncmds; ++i) {
4982 const lldb::offset_t cmd_offset = offset;
4983 if (data.GetU32(&offset, &load_cmd, 2) == nullptr)
4984 break;
4985
4986 do {
4987 if (load_cmd.cmd == llvm::MachO::LC_BUILD_VERSION) {
4988 llvm::MachO::build_version_command build_version;
4989 if (load_cmd.cmdsize < sizeof(build_version)) {
4990 // Malformed load command.
4991 break;
4992 }
4993 if (data.ExtractBytes(cmd_offset, sizeof(build_version),
4994 data.GetByteOrder(), &build_version) == 0)
4995 break;
4996 MinOS min_os(build_version.minos);
4997 OSEnv os_env(build_version.platform);
4998 llvm::SmallString<16> os_name;
4999 llvm::raw_svector_ostream os(os_name);
5000 os << os_env.os_type << min_os.major_version << '.'
5001 << min_os.minor_version << '.' << min_os.patch_version;
5002 auto triple = base_triple;
5003 triple.setOSName(os.str());
5004 os_name.clear();
5005 if (!os_env.environment.empty())
5006 triple.setEnvironmentName(os_env.environment);
5007 add_triple(triple);
5008 }
5009 } while (false);
5010 offset = cmd_offset + load_cmd.cmdsize;
5011 }
5012
5013 if (!found_any) {
5014 add_triple(base_triple);
5015 }
5016}
5017
5019 ModuleSP module_sp, const llvm::MachO::mach_header &header,
5020 const lldb_private::DataExtractor &data, lldb::offset_t lc_offset) {
5021 ModuleSpecList all_specs;
5022 ModuleSpec base_spec;
5023 GetAllArchSpecs(header, data, MachHeaderSizeFromMagic(header.magic),
5024 base_spec, all_specs);
5025
5026 // If the object file offers multiple alternative load commands,
5027 // pick the one that matches the module.
5028 if (module_sp) {
5029 const ArchSpec &module_arch = module_sp->GetArchitecture();
5030 for (unsigned i = 0, e = all_specs.GetSize(); i != e; ++i) {
5031 ArchSpec mach_arch =
5033 if (module_arch.IsCompatibleMatch(mach_arch))
5034 return mach_arch;
5035 }
5036 }
5037
5038 // Return the first arch we found.
5039 if (all_specs.GetSize() == 0)
5040 return {};
5041 return all_specs.GetModuleSpecRefAtIndex(0).GetArchitecture();
5042}
5043
5045 ModuleSP module_sp(GetModule());
5046 if (module_sp) {
5047 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5049 return GetUUID(m_header, m_data, offset);
5050 }
5051 return UUID();
5052}
5053
5055 uint32_t count = 0;
5056 ModuleSP module_sp(GetModule());
5057 if (module_sp) {
5058 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5059 llvm::MachO::load_command load_cmd;
5061 std::vector<std::string> rpath_paths;
5062 std::vector<std::string> rpath_relative_paths;
5063 std::vector<std::string> at_exec_relative_paths;
5064 uint32_t i;
5065 for (i = 0; i < m_header.ncmds; ++i) {
5066 const uint32_t cmd_offset = offset;
5067 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
5068 break;
5069
5070 switch (load_cmd.cmd) {
5071 case LC_RPATH:
5072 case LC_LOAD_DYLIB:
5073 case LC_LOAD_WEAK_DYLIB:
5074 case LC_REEXPORT_DYLIB:
5075 case LC_LOAD_DYLINKER:
5076 case LC_LOADFVMLIB:
5077 case LC_LOAD_UPWARD_DYLIB: {
5078 uint32_t name_offset = cmd_offset + m_data.GetU32(&offset);
5079 const char *path = m_data.PeekCStr(name_offset);
5080 if (path) {
5081 if (load_cmd.cmd == LC_RPATH)
5082 rpath_paths.push_back(path);
5083 else {
5084 if (path[0] == '@') {
5085 if (strncmp(path, "@rpath", strlen("@rpath")) == 0)
5086 rpath_relative_paths.push_back(path + strlen("@rpath"));
5087 else if (strncmp(path, "@executable_path",
5088 strlen("@executable_path")) == 0)
5089 at_exec_relative_paths.push_back(path +
5090 strlen("@executable_path"));
5091 } else {
5092 FileSpec file_spec(path);
5093 if (files.AppendIfUnique(file_spec))
5094 count++;
5095 }
5096 }
5097 }
5098 } break;
5099
5100 default:
5101 break;
5102 }
5103 offset = cmd_offset + load_cmd.cmdsize;
5104 }
5105
5106 FileSpec this_file_spec(m_file);
5107 FileSystem::Instance().Resolve(this_file_spec);
5108
5109 if (!rpath_paths.empty()) {
5110 // Fixup all LC_RPATH values to be absolute paths
5111 std::string loader_path("@loader_path");
5112 std::string executable_path("@executable_path");
5113 for (auto &rpath : rpath_paths) {
5114 if (llvm::StringRef(rpath).startswith(loader_path)) {
5115 rpath.erase(0, loader_path.size());
5116 rpath.insert(0, this_file_spec.GetDirectory().GetCString());
5117 } else if (llvm::StringRef(rpath).startswith(executable_path)) {
5118 rpath.erase(0, executable_path.size());
5119 rpath.insert(0, this_file_spec.GetDirectory().GetCString());
5120 }
5121 }
5122
5123 for (const auto &rpath_relative_path : rpath_relative_paths) {
5124 for (const auto &rpath : rpath_paths) {
5125 std::string path = rpath;
5126 path += rpath_relative_path;
5127 // It is OK to resolve this path because we must find a file on disk
5128 // for us to accept it anyway if it is rpath relative.
5129 FileSpec file_spec(path);
5130 FileSystem::Instance().Resolve(file_spec);
5131 if (FileSystem::Instance().Exists(file_spec) &&
5132 files.AppendIfUnique(file_spec)) {
5133 count++;
5134 break;
5135 }
5136 }
5137 }
5138 }
5139
5140 // We may have @executable_paths but no RPATHS. Figure those out here.
5141 // Only do this if this object file is the executable. We have no way to
5142 // get back to the actual executable otherwise, so we won't get the right
5143 // path.
5144 if (!at_exec_relative_paths.empty() && CalculateType() == eTypeExecutable) {
5145 FileSpec exec_dir = this_file_spec.CopyByRemovingLastPathComponent();
5146 for (const auto &at_exec_relative_path : at_exec_relative_paths) {
5147 FileSpec file_spec =
5148 exec_dir.CopyByAppendingPathComponent(at_exec_relative_path);
5149 if (FileSystem::Instance().Exists(file_spec) &&
5150 files.AppendIfUnique(file_spec))
5151 count++;
5152 }
5153 }
5154 }
5155 return count;
5156}
5157
5159 // If the object file is not an executable it can't hold the entry point.
5160 // m_entry_point_address is initialized to an invalid address, so we can just
5161 // return that. If m_entry_point_address is valid it means we've found it
5162 // already, so return the cached value.
5163
5164 if ((!IsExecutable() && !IsDynamicLoader()) ||
5166 return m_entry_point_address;
5167 }
5168
5169 // Otherwise, look for the UnixThread or Thread command. The data for the
5170 // Thread command is given in /usr/include/mach-o.h, but it is basically:
5171 //
5172 // uint32_t flavor - this is the flavor argument you would pass to
5173 // thread_get_state
5174 // uint32_t count - this is the count of longs in the thread state data
5175 // struct XXX_thread_state state - this is the structure from
5176 // <machine/thread_status.h> corresponding to the flavor.
5177 // <repeat this trio>
5178 //
5179 // So we just keep reading the various register flavors till we find the GPR
5180 // one, then read the PC out of there.
5181 // FIXME: We will need to have a "RegisterContext data provider" class at some
5182 // point that can get all the registers
5183 // out of data in this form & attach them to a given thread. That should
5184 // underlie the MacOS X User process plugin, and we'll also need it for the
5185 // MacOS X Core File process plugin. When we have that we can also use it
5186 // here.
5187 //
5188 // For now we hard-code the offsets and flavors we need:
5189 //
5190 //
5191
5192 ModuleSP module_sp(GetModule());
5193 if (module_sp) {
5194 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
5195 llvm::MachO::load_command load_cmd;
5197 uint32_t i;
5198 lldb::addr_t start_address = LLDB_INVALID_ADDRESS;
5199 bool done = false;
5200
5201 for (i = 0; i < m_header.ncmds; ++i) {
5202 const lldb::offset_t cmd_offset = offset;
5203 if (m_data.GetU32(&offset, &load_cmd, 2) == nullptr)
5204 break;
5205
5206 switch (load_cmd.cmd) {
5207 case LC_UNIXTHREAD:
5208 case LC_THREAD: {
5209 while (offset < cmd_offset + load_cmd.cmdsize) {
5210 uint32_t flavor = m_data.GetU32(&offset);
5211 uint32_t count = m_data.GetU32(&offset);
5212 if (count == 0) {
5213 // We've gotten off somehow, log and exit;
5214 return m_entry_point_address;
5215 }
5216
5217 switch (m_header.cputype) {
5218 case llvm::MachO::CPU_TYPE_ARM:
5219 if (flavor == 1 ||
5220 flavor == 9) // ARM_THREAD_STATE/ARM_THREAD_STATE32
5221 // from mach/arm/thread_status.h
5222 {
5223 offset += 60; // This is the offset of pc in the GPR thread state
5224 // data structure.
5225 start_address = m_data.GetU32(&offset);
5226 done = true;
5227 }
5228 break;
5229 case llvm::MachO::CPU_TYPE_ARM64:
5230 case llvm::MachO::CPU_TYPE_ARM64_32:
5231 if (flavor == 6) // ARM_THREAD_STATE64 from mach/arm/thread_status.h
5232 {
5233 offset += 256; // This is the offset of pc in the GPR thread state
5234 // data structure.
5235 start_address = m_data.GetU64(&offset);
5236 done = true;
5237 }
5238 break;
5239 case llvm::MachO::CPU_TYPE_I386:
5240 if (flavor ==
5241 1) // x86_THREAD_STATE32 from mach/i386/thread_status.h
5242 {
5243 offset += 40; // This is the offset of eip in the GPR thread state
5244 // data structure.
5245 start_address = m_data.GetU32(&offset);
5246 done = true;
5247 }
5248 break;
5249 case llvm::MachO::CPU_TYPE_X86_64:
5250 if (flavor ==
5251 4) // x86_THREAD_STATE64 from mach/i386/thread_status.h
5252 {
5253 offset += 16 * 8; // This is the offset of rip in the GPR thread
5254 // state data structure.
5255 start_address = m_data.GetU64(&offset);
5256 done = true;
5257 }
5258 break;
5259 default:
5260 return m_entry_point_address;
5261 }
5262 // Haven't found the GPR flavor yet, skip over the data for this
5263 // flavor:
5264 if (done)
5265 break;
5266 offset += count * 4;
5267 }
5268 } break;
5269 case LC_MAIN: {
5270 uint64_t entryoffset = m_data.GetU64(&offset);
5271 SectionSP text_segment_sp =
5273 if (text_segment_sp) {
5274 done = true;
5275 start_address = text_segment_sp->GetFileAddress() + entryoffset;
5276 }
5277 } break;
5278
5279 default:
5280 break;
5281 }
5282 if (done)
5283 break;
5284
5285 // Go to the next load command:
5286 offset = cmd_offset + load_cmd.cmdsize;
5287 }
5288
5289 if (start_address == LLDB_INVALID_ADDRESS && IsDynamicLoader()) {
5290 if (GetSymtab()) {
5291 Symbol *dyld_start_sym = GetSymtab()->FindFirstSymbolWithNameAndType(
5292 ConstString("_dyld_start"), SymbolType::eSymbolTypeCode,
5294 if (dyld_start_sym && dyld_start_sym->GetAddress().IsValid()) {
5295 start_address = dyld_start_sym->GetAddress().GetFileAddress();
5296 }
5297 }
5298 }
5299
5300 if (start_address != LL