LLDB mainline
MachOTrie.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "MachOTrie.h"
10
12#include "lldb/Utility/Flags.h"
13
14#include "llvm/ADT/StringRef.h"
15#include "llvm/BinaryFormat/MachO.h"
16
17#include <cstdio>
18
19using namespace lldb;
20using namespace lldb_private;
21using namespace llvm::MachO;
22
23/// Upper bound on the length of a symbol name assembled from export-trie edge
24/// labels. A corrupt trie can encode an edge label whose terminator is far
25/// away in the trie data, so a single label is many megabytes long; appending
26/// it to the running name would otherwise request an unbounded allocation. No
27/// legitimate symbol name comes close to this size. Also 1 MiB is the
28/// symbol length limit in ld.
29static constexpr size_t kMaxTrieSymbolNameLength = 1 << 20; // 1 MiB
30
31void TrieEntry::Dump() const {
32 printf("0x%16.16llx 0x%16.16llx 0x%16.16llx \"%s\"",
33 static_cast<unsigned long long>(address),
34 static_cast<unsigned long long>(flags),
35 static_cast<unsigned long long>(other), name.GetCString());
36 if (import_name)
37 printf(" -> \"%s\"\n", import_name.GetCString());
38 else
39 printf("\n");
40}
41
42void TrieEntryWithOffset::Dump(uint32_t idx) const {
43 printf("[%3u] 0x%16.16llx: ", idx,
44 static_cast<unsigned long long>(nodeOffset));
45 entry.Dump();
46}
47
48namespace {
49
50bool ParseTrieEntriesImpl(DataExtractor &data, lldb::offset_t offset,
51 const bool is_arm, addr_t text_seg_base_addr,
52 std::string &prefix,
53 std::set<lldb::addr_t> &resolver_addresses,
54 std::vector<TrieEntryWithOffset> &reexports,
55 std::vector<TrieEntryWithOffset> &ext_symbols,
56 std::set<lldb::offset_t> &visited_nodes) {
57 if (!data.ValidOffset(offset))
58 return true;
59
60 // Every node in a well-formed trie is reached by exactly one path, so a node
61 // offset seen twice means the trie is corrupt.
62 if (!visited_nodes.insert(offset).second)
63 return false;
64
65 // Terminal node -- end of a branch, possibly add this to
66 // the symbol table or resolver table.
67 const uint64_t terminalSize = data.GetULEB128(&offset);
68 lldb::offset_t children_offset = offset + terminalSize;
69 if (terminalSize != 0) {
70 TrieEntryWithOffset e(offset);
71 e.entry.flags = data.GetULEB128(&offset);
72 const char *import_name = nullptr;
73 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) {
74 e.entry.address = 0;
75 e.entry.other = data.GetULEB128(&offset); // dylib ordinal
76 import_name = data.GetCStr(&offset);
77 } else {
78 e.entry.address = data.GetULEB128(&offset);
79 if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
80 e.entry.address += text_seg_base_addr;
81 if (e.entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) {
82 e.entry.other = data.GetULEB128(&offset);
83 uint64_t resolver_addr = e.entry.other;
84 if (text_seg_base_addr != LLDB_INVALID_ADDRESS)
85 resolver_addr += text_seg_base_addr;
86 if (is_arm)
87 resolver_addr &= THUMB_ADDRESS_BIT_MASK;
88 resolver_addresses.insert(resolver_addr);
89 } else
90 e.entry.other = 0;
91 }
92 bool add_this_entry = false;
93 if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT) &&
94 import_name && import_name[0]) {
95 // add symbols that are reexport symbols with a valid import name.
96 add_this_entry = true;
97 } else if (e.entry.flags == 0 &&
98 (import_name == nullptr || import_name[0] == '\0')) {
99 // add externally visible symbols, in case the nlist record has
100 // been stripped/omitted.
101 add_this_entry = true;
102 }
103 if (add_this_entry) {
104 if (prefix.size() > 1) {
105 // Skip the leading '_'
106 e.entry.name.SetString(llvm::StringRef(prefix).drop_front());
107 }
108 if (import_name) {
109 // Skip the leading '_'
110 e.entry.import_name.SetCString(import_name + 1);
111 }
112 if (Flags(e.entry.flags).Test(EXPORT_SYMBOL_FLAGS_REEXPORT)) {
113 reexports.push_back(e);
114 } else {
115 if (is_arm && (e.entry.address & 1)) {
116 e.entry.flags |= TRIE_SYMBOL_IS_THUMB;
117 e.entry.address &= THUMB_ADDRESS_BIT_MASK;
118 }
119 ext_symbols.push_back(e);
120 }
121 }
122 }
123
124 const uint8_t childrenCount = data.GetU8(&children_offset);
125 for (uint8_t i = 0; i < childrenCount; ++i) {
126 const char *cstr = data.GetCStr(&children_offset);
127 if (!cstr)
128 return false; // Corrupt data
129 if (prefix.size() + llvm::StringRef(cstr).size() > kMaxTrieSymbolNameLength)
130 return false; // Corrupt data: implausibly long symbol name.
131 const size_t prevSize = prefix.size();
132 prefix.append(cstr);
133 lldb::offset_t childNodeOffset = data.GetULEB128(&children_offset);
134 // A child offset of 0 points back at the root; like any other repeated
135 // offset it is a cycle, which ParseTrieEntriesImpl rejects as corrupt.
136 if (!ParseTrieEntriesImpl(data, childNodeOffset, is_arm, text_seg_base_addr,
137 prefix, resolver_addresses, reexports,
138 ext_symbols, visited_nodes))
139 return false;
140 prefix.resize(prevSize);
141 }
142 return true;
143}
144
145} // namespace
146
148 DataExtractor &data, const bool is_arm, lldb::addr_t text_seg_base_addr,
149 std::set<lldb::addr_t> &resolver_addresses,
150 std::vector<TrieEntryWithOffset> &reexports,
151 std::vector<TrieEntryWithOffset> &ext_symbols) {
152 lldb::offset_t offset = 0;
153 std::set<lldb::offset_t> visited_nodes;
154 std::string prefix;
155 return ParseTrieEntriesImpl(data, offset, is_arm, text_seg_base_addr, prefix,
156 resolver_addresses, reexports, ext_symbols,
157 visited_nodes);
158}
static constexpr size_t kMaxTrieSymbolNameLength
Upper bound on the length of a symbol name assembled from export-trie edge labels.
Definition MachOTrie.cpp:29
An data extractor class.
uint64_t GetULEB128(lldb::offset_t *offset_ptr) const
Extract a unsigned LEB128 value from *offset_ptr.
const char * GetCStr(lldb::offset_t *offset_ptr) const
Extract a C string from *offset_ptr.
bool ValidOffset(lldb::offset_t offset) const
Test the validity of offset.
uint8_t GetU8(lldb::offset_t *offset_ptr) const
Extract a uint8_t value from *offset_ptr.
A class to manage flags.
Definition Flags.h:22
bool Test(ValueType bit) const
Test a single flag bit.
Definition Flags.h:96
#define LLDB_INVALID_ADDRESS
A class that represents a running process on the host machine.
constexpr uint64_t THUMB_ADDRESS_BIT_MASK
Mask that clears the low Thumb bit from an ARM function address.
Definition MachOTrie.h:30
bool ParseTrieEntries(DataExtractor &data, const bool is_arm, lldb::addr_t text_seg_base_addr, std::set< lldb::addr_t > &resolver_addresses, std::vector< TrieEntryWithOffset > &reexports, std::vector< TrieEntryWithOffset > &ext_symbols)
Parse the Mach-O export trie (the dyld symbol trie from LC_DYLD_INFO or LC_DYLD_EXPORTS_TRIE) startin...
constexpr uint64_t TRIE_SYMBOL_IS_THUMB
Set on TrieEntry::flags for an ARM symbol whose address has the low Thumb bit set; the bit is strippe...
Definition MachOTrie.h:27
uint64_t offset_t
Definition lldb-types.h:85
uint64_t addr_t
Definition lldb-types.h:80
A TrieEntry paired with the offset of the trie node it was parsed from.
Definition MachOTrie.h:46
void Dump(uint32_t idx) const
Definition MachOTrie.cpp:42
ConstString import_name
Definition MachOTrie.h:42