LLDB mainline
HashedNameToDIE.cpp
Go to the documentation of this file.
1//===-- HashedNameToDIE.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "HashedNameToDIE.h"
10#include "llvm/ADT/StringRef.h"
11
12using namespace lldb_private::dwarf;
13
15 const DIEInfoArray &die_info_array,
16 llvm::function_ref<bool(DIERef ref)> callback) {
17 const size_t count = die_info_array.size();
18 for (size_t i = 0; i < count; ++i)
19 if (!callback(DIERef(die_info_array[i])))
20 return false;
21 return true;
22}
23
25 const DIEInfoArray &die_info_array, const dw_tag_t tag,
26 llvm::function_ref<bool(DIERef ref)> callback) {
27 if (tag == 0) {
28 ExtractDIEArray(die_info_array, callback);
29 return;
30 }
31
32 const size_t count = die_info_array.size();
33 for (size_t i = 0; i < count; ++i) {
34 const dw_tag_t die_tag = die_info_array[i].tag;
35 bool tag_matches = die_tag == 0 || tag == die_tag;
36 if (!tag_matches) {
37 if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
38 tag_matches = tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
39 }
40 if (tag_matches) {
41 if (!callback(DIERef(die_info_array[i])))
42 return;
43 }
44 }
45}
46
48 const DIEInfoArray &die_info_array, const dw_tag_t tag,
49 const uint32_t qualified_name_hash,
50 llvm::function_ref<bool(DIERef ref)> callback) {
51 if (tag == 0) {
52 ExtractDIEArray(die_info_array, callback);
53 return;
54 }
55
56 const size_t count = die_info_array.size();
57 for (size_t i = 0; i < count; ++i) {
58 if (qualified_name_hash != die_info_array[i].qualified_name_hash)
59 continue;
60 const dw_tag_t die_tag = die_info_array[i].tag;
61 bool tag_matches = die_tag == 0 || tag == die_tag;
62 if (!tag_matches) {
63 if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
64 tag_matches = tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
65 }
66 if (tag_matches) {
67 if (!callback(DIERef(die_info_array[i])))
68 return;
69 }
70 }
71}
72
74 const DIEInfoArray &die_info_array,
75 bool return_implementation_only_if_available,
76 llvm::function_ref<bool(DIERef ref)> callback) {
77 const size_t count = die_info_array.size();
78 for (size_t i = 0; i < count; ++i) {
79 const dw_tag_t die_tag = die_info_array[i].tag;
80 if (!(die_tag == 0 || die_tag == DW_TAG_class_type ||
81 die_tag == DW_TAG_structure_type))
82 continue;
83 bool is_implementation =
84 (die_info_array[i].type_flags & eTypeFlagClassIsImplementation) != 0;
85 if (is_implementation != return_implementation_only_if_available)
86 continue;
87 if (return_implementation_only_if_available) {
88 // We found the one true definition for this class, so only return
89 // that
90 callback(DIERef(die_info_array[i]));
91 return;
92 }
93 if (!callback(DIERef(die_info_array[i])))
94 return;
95 }
96}
97
99 const DIEInfoArray &die_info_array, uint32_t type_flag_mask,
100 uint32_t type_flag_value, llvm::function_ref<bool(DIERef ref)> callback) {
101 const size_t count = die_info_array.size();
102 for (size_t i = 0; i < count; ++i) {
103 if ((die_info_array[i].type_flags & type_flag_mask) == type_flag_value) {
104 if (!callback(DIERef(die_info_array[i])))
105 return;
106 }
107 }
108}
109
111 switch (atom) {
112 case eAtomTypeNULL:
113 return "NULL";
115 return "die-offset";
117 return "cu-offset";
118 case eAtomTypeTag:
119 return "die-tag";
121 return "name-flags";
123 return "type-flags";
125 return "qualified-name-hash";
126 }
127 return "<invalid>";
128}
129
131 uint32_t h)
132 : die_offset(o), tag(t), type_flags(f), qualified_name_hash(h) {}
133
135 : die_base_offset(_die_base_offset), atoms() {
136 // Define an array of DIE offsets by first defining an array, and then define
137 // the atom type for the array, in this case we have an array of DIE offsets.
138 AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4);
139}
140
142 hash_data_has_fixed_byte_size = true;
143 min_hash_data_byte_size = 0;
144 atom_mask = 0;
145 atoms.clear();
146}
147
149 return (atom_mask & (1u << atom_type)) != 0;
150}
151
153 die_base_offset = 0;
154 ClearAtoms();
155}
156
158 atoms.push_back({type, form});
159 atom_mask |= 1u << type;
160 switch (form) {
161 case DW_FORM_indirect:
162 case DW_FORM_exprloc:
163 case DW_FORM_flag_present:
164 case DW_FORM_ref_sig8:
165 llvm_unreachable("Unhandled atom form");
166
167 case DW_FORM_addrx:
168 case DW_FORM_string:
169 case DW_FORM_block:
170 case DW_FORM_block1:
171 case DW_FORM_sdata:
172 case DW_FORM_udata:
173 case DW_FORM_ref_udata:
174 case DW_FORM_GNU_addr_index:
175 case DW_FORM_GNU_str_index:
176 hash_data_has_fixed_byte_size = false;
177 [[fallthrough]];
178 case DW_FORM_flag:
179 case DW_FORM_data1:
180 case DW_FORM_ref1:
181 case DW_FORM_sec_offset:
182 min_hash_data_byte_size += 1;
183 break;
184
185 case DW_FORM_block2:
186 hash_data_has_fixed_byte_size = false;
187 [[fallthrough]];
188 case DW_FORM_data2:
189 case DW_FORM_ref2:
190 min_hash_data_byte_size += 2;
191 break;
192
193 case DW_FORM_block4:
194 hash_data_has_fixed_byte_size = false;
195 [[fallthrough]];
196 case DW_FORM_data4:
197 case DW_FORM_ref4:
198 case DW_FORM_addr:
199 case DW_FORM_ref_addr:
200 case DW_FORM_strp:
201 min_hash_data_byte_size += 4;
202 break;
203
204 case DW_FORM_data8:
205 case DW_FORM_ref8:
206 min_hash_data_byte_size += 8;
207 break;
208 }
209}
210
213 lldb::offset_t offset) {
214 ClearAtoms();
215
216 die_base_offset = data.GetU32(&offset);
217
218 const uint32_t atom_count = data.GetU32(&offset);
219 if (atom_count == 0x00060003u) {
220 // Old format, deal with contents of old pre-release format.
221 while (data.GetU32(&offset)) {
222 /* do nothing */;
223 }
224
225 // Hardcode to the only known value for now.
226 AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4);
227 } else {
228 for (uint32_t i = 0; i < atom_count; ++i) {
229 AtomType type = (AtomType)data.GetU16(&offset);
230 dw_form_t form = (dw_form_t)data.GetU16(&offset);
231 AppendAtom(type, form);
232 }
233 }
234 return offset;
235}
236
238 // Add an extra count to the atoms size for the zero termination Atom that
239 // gets written to disk.
240 return sizeof(die_base_offset) + sizeof(uint32_t) +
241 atoms.size() * sizeof(Atom);
242}
243
245 return min_hash_data_byte_size;
246}
247
249 return hash_data_has_fixed_byte_size;
250}
251
253 return header_data.GetByteSize();
254}
255
257 lldb::offset_t offset) {
258 offset = MappedHash::Header<Prologue>::Read(data, offset);
259 if (offset != UINT32_MAX) {
260 offset = header_data.Read(data, offset);
261 }
262 return offset;
263}
264
266 lldb::offset_t *offset_ptr,
267 DIEInfo &hash_data) const {
268 const size_t num_atoms = header_data.atoms.size();
269 if (num_atoms == 0)
270 return false;
271
272 for (size_t i = 0; i < num_atoms; ++i) {
273 DWARFFormValue form_value(nullptr, header_data.atoms[i].form);
274
275 if (!form_value.ExtractValue(data, offset_ptr))
276 return false;
277
278 switch (header_data.atoms[i].type) {
279 case eAtomTypeDIEOffset: // DIE offset, check form for encoding
280 hash_data.die_offset =
281 DWARFFormValue::IsDataForm(form_value.Form())
282 ? form_value.Unsigned()
283 : form_value.Reference(header_data.die_base_offset);
284 break;
285
286 case eAtomTypeTag: // DW_TAG value for the DIE
287 hash_data.tag = (dw_tag_t)form_value.Unsigned();
288 break;
289
290 case eAtomTypeTypeFlags: // Flags from enum TypeFlags
291 hash_data.type_flags = (uint32_t)form_value.Unsigned();
292 break;
293
294 case eAtomTypeQualNameHash: // Flags from enum TypeFlags
295 hash_data.qualified_name_hash = form_value.Unsigned();
296 break;
297
298 default:
299 // We can always skip atoms we don't know about.
300 break;
301 }
302 }
303 return hash_data.die_offset != DW_INVALID_OFFSET;
304}
305
308 const lldb_private::DWARFDataExtractor &string_table, const char *name)
310 m_data(table_data), m_string_table(string_table), m_name(name) {}
311
312const char *
314 // The key in the DWARF table is the .debug_str offset for the string
315 return m_string_table.PeekCStr(key);
316}
317
319 HashData &hash_data) const {
320 lldb::offset_t offset = hash_data_offset;
321 // Skip string table offset that contains offset of hash name in .debug_str.
322 offset += 4;
323 const uint32_t count = m_data.GetU32(&offset);
324 if (count > 0) {
325 hash_data.resize(count);
326 for (uint32_t i = 0; i < count; ++i) {
327 if (!m_header.Read(m_data, &offset, hash_data[i]))
328 return false;
329 }
330 } else
331 hash_data.clear();
332 return true;
333}
334
337 llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr,
338 Pair &pair) const {
339 pair.key = m_data.GetU32(hash_data_offset_ptr);
340 pair.value.clear();
341
342 // If the key is zero, this terminates our chain of HashData objects for this
343 // hash value.
344 if (pair.key == 0)
345 return eResultEndOfHashData;
346
347 // There definitely should be a string for this string offset, if there
348 // isn't, there is something wrong, return and error.
349 const char *strp_cstr = m_string_table.PeekCStr(pair.key);
350 if (strp_cstr == nullptr) {
351 *hash_data_offset_ptr = UINT32_MAX;
352 return eResultError;
353 }
354
355 const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
356 const size_t min_total_hash_data_size =
357 count * m_header.header_data.GetMinimumHashDataByteSize();
358 if (count > 0 && m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
359 min_total_hash_data_size)) {
360 // We have at least one HashData entry, and we have enough data to parse at
361 // least "count" HashData entries.
362
363 // First make sure the entire C string matches...
364 const bool match = name == strp_cstr;
365
366 if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
367 // If the string doesn't match and we have fixed size data, we can just
368 // add the total byte size of all HashData objects to the hash data
369 // offset and be done...
370 *hash_data_offset_ptr += min_total_hash_data_size;
371 } else {
372 // If the string does match, or we don't have fixed size data then we
373 // need to read the hash data as a stream. If the string matches we also
374 // append all HashData objects to the value array.
375 for (uint32_t i = 0; i < count; ++i) {
376 DIEInfo die_info;
377 if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) {
378 // Only happened if the HashData of the string matched...
379 if (match)
380 pair.value.push_back(die_info);
381 } else {
382 // Something went wrong while reading the data.
383 *hash_data_offset_ptr = UINT32_MAX;
384 return eResultError;
385 }
386 }
387 }
388 // Return the correct response depending on if the string matched or not...
389 if (match) {
390 // The key (cstring) matches and we have lookup results!
391 return eResultKeyMatch;
392 } else {
393 // The key doesn't match, this function will get called again for the
394 // next key/value or the key terminator which in our case is a zero
395 // .debug_str offset.
396 return eResultKeyMismatch;
397 }
398 } else {
399 *hash_data_offset_ptr = UINT32_MAX;
400 return eResultError;
401 }
402}
403
407 lldb::offset_t *hash_data_offset_ptr, Pair &pair) const {
408 pair.key = m_data.GetU32(hash_data_offset_ptr);
409 // If the key is zero, this terminates our chain of HashData objects for this
410 // hash value.
411 if (pair.key == 0)
412 return eResultEndOfHashData;
413
414 // There definitely should be a string for this string offset, if there
415 // isn't, there is something wrong, return and error.
416 const char *strp_cstr = m_string_table.PeekCStr(pair.key);
417 if (strp_cstr == nullptr)
418 return eResultError;
419
420 const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
421 const size_t min_total_hash_data_size =
422 count * m_header.header_data.GetMinimumHashDataByteSize();
423 if (count > 0 && m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
424 min_total_hash_data_size)) {
425 const bool match = regex.Execute(llvm::StringRef(strp_cstr));
426
427 if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
428 // If the regex doesn't match and we have fixed size data, we can just
429 // add the total byte size of all HashData objects to the hash data
430 // offset and be done...
431 *hash_data_offset_ptr += min_total_hash_data_size;
432 } else {
433 // If the string does match, or we don't have fixed size data then we
434 // need to read the hash data as a stream. If the string matches we also
435 // append all HashData objects to the value array.
436 for (uint32_t i = 0; i < count; ++i) {
437 DIEInfo die_info;
438 if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) {
439 // Only happened if the HashData of the string matched...
440 if (match)
441 pair.value.push_back(die_info);
442 } else {
443 // Something went wrong while reading the data
444 *hash_data_offset_ptr = UINT32_MAX;
445 return eResultError;
446 }
447 }
448 }
449 // Return the correct response depending on if the string matched or not...
450 if (match) {
451 // The key (cstring) matches and we have lookup results!
452 return eResultKeyMatch;
453 } else {
454 // The key doesn't match, this function will get called again for the
455 // next key/value or the key terminator which in our case is a zero
456 // .debug_str offset.
457 return eResultKeyMismatch;
458 }
459 } else {
460 *hash_data_offset_ptr = UINT32_MAX;
461 return eResultError;
462 }
463}
464
467 DIEInfoArray &die_info_array) const {
468 const uint32_t hash_count = m_header.hashes_count;
469 Pair pair;
470 for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) {
471 lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx);
472 while (hash_data_offset != UINT32_MAX) {
473 const lldb::offset_t prev_hash_data_offset = hash_data_offset;
474 Result hash_result =
475 AppendHashDataForRegularExpression(regex, &hash_data_offset, pair);
476 if (prev_hash_data_offset == hash_data_offset)
477 break;
478
479 // Check the result of getting our hash data.
480 switch (hash_result) {
481 case eResultKeyMatch:
482 case eResultKeyMismatch:
483 // Whether we matches or not, it doesn't matter, we keep looking.
484 break;
485
486 case eResultEndOfHashData:
487 case eResultError:
488 hash_data_offset = UINT32_MAX;
489 break;
490 }
491 }
492 }
493 die_info_array.swap(pair.value);
494}
495
497 const uint32_t die_offset_start, const uint32_t die_offset_end,
498 DIEInfoArray &die_info_array) const {
499 const uint32_t hash_count = m_header.hashes_count;
500 for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) {
501 bool done = false;
502 lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx);
503 while (!done && hash_data_offset != UINT32_MAX) {
504 KeyType key = m_data.GetU32(&hash_data_offset);
505 // If the key is zero, this terminates our chain of HashData objects for
506 // this hash value.
507 if (key == 0)
508 break;
509
510 const uint32_t count = m_data.GetU32(&hash_data_offset);
511 for (uint32_t i = 0; i < count; ++i) {
512 DIEInfo die_info;
513 if (m_header.Read(m_data, &hash_data_offset, die_info)) {
514 if (die_info.die_offset == 0)
515 done = true;
516 if (die_offset_start <= die_info.die_offset &&
517 die_info.die_offset < die_offset_end)
518 die_info_array.push_back(die_info);
519 }
520 }
521 }
522 }
523}
524
526 llvm::StringRef name, llvm::function_ref<bool(DIERef ref)> callback) {
527 if (name.empty())
528 return true;
529
530 DIEInfoArray die_info_array;
531 FindByName(name, die_info_array);
532 return DWARFMappedHash::ExtractDIEArray(die_info_array, callback);
533}
534
536 llvm::StringRef name, const dw_tag_t tag,
537 llvm::function_ref<bool(DIERef ref)> callback) {
538 DIEInfoArray die_info_array;
539 FindByName(name, die_info_array);
540 DWARFMappedHash::ExtractDIEArray(die_info_array, tag, callback);
541}
542
544 llvm::StringRef name, const dw_tag_t tag,
545 const uint32_t qualified_name_hash,
546 llvm::function_ref<bool(DIERef ref)> callback) {
547 DIEInfoArray die_info_array;
548 FindByName(name, die_info_array);
549 DWARFMappedHash::ExtractDIEArray(die_info_array, tag, qualified_name_hash,
550 callback);
551}
552
554 llvm::StringRef name, llvm::function_ref<bool(DIERef ref)> callback,
555 bool must_be_implementation) {
556 DIEInfoArray die_info_array;
557 FindByName(name, die_info_array);
558 if (must_be_implementation &&
559 GetHeader().header_data.ContainsAtom(eAtomTypeTypeFlags)) {
560 // If we have two atoms, then we have the DIE offset and the type flags
561 // so we can find the objective C class efficiently.
563 die_info_array, UINT32_MAX, eTypeFlagClassIsImplementation, callback);
564 return;
565 }
566 // We don't only want the one true definition, so try and see what we can
567 // find, and only return class or struct DIEs. If we do have the full
568 // implementation, then return it alone, else return all possible
569 // matches.
570 bool found_implementation = false;
572 die_info_array, true /*return_implementation_only_if_available*/,
573 [&](DIERef ref) {
574 found_implementation = true;
575 // Here the return value does not matter as we are called at most once.
576 return callback(ref);
577 });
578 if (found_implementation)
579 return;
581 die_info_array, false /*return_implementation_only_if_available*/,
582 callback);
583}
584
586 DIEInfoArray &die_info_array) {
587 if (name.empty())
588 return;
589
590 Pair kv_pair;
591 if (Find(name, kv_pair))
592 die_info_array.swap(kv_pair.value);
593}
Identifies a DWARF debug info entry within a given Module.
Definition: DIERef.h:28
DWARFDIE Reference() const
dw_form_t Form() const
uint64_t Unsigned() const
static bool IsDataForm(const dw_form_t form)
bool ExtractValue(const lldb_private::DWARFDataExtractor &data, lldb::offset_t *offset_ptr)
lldb::offset_t Read(lldb_private::DataExtractor &data, lldb::offset_t offset) override
A class for reading and using a saved hash table from a block of data in memory.
void FindCompleteObjCClassByName(llvm::StringRef name, llvm::function_ref< bool(DIERef ref)> callback, bool must_be_implementation)
Result AppendHashDataForRegularExpression(const lldb_private::RegularExpression &regex, lldb::offset_t *hash_data_offset_ptr, Pair &pair) const
Result GetHashDataForName(llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr, Pair &pair) const override
MemoryTable(lldb_private::DWARFDataExtractor &table_data, const lldb_private::DWARFDataExtractor &string_table, const char *name)
void FindByNameAndTagAndQualifiedNameHash(llvm::StringRef name, const dw_tag_t tag, const uint32_t qualified_name_hash, llvm::function_ref< bool(DIERef ref)> callback)
bool ReadHashData(uint32_t hash_data_offset, HashData &hash_data) const override
void AppendAllDIEsInRange(const uint32_t die_offset_start, const uint32_t die_offset_end, DIEInfoArray &die_info_array) const
void FindByNameAndTag(llvm::StringRef name, const dw_tag_t tag, llvm::function_ref< bool(DIERef ref)> callback)
bool FindByName(llvm::StringRef name, llvm::function_ref< bool(DIERef ref)> callback)
const char * GetStringForKeyType(KeyType key) const override
void AppendAllDIEsThatMatchingRegex(const lldb_private::RegularExpression &regex, DIEInfoArray &die_info_array) const
lldb::offset_t Read(const lldb_private::DataExtractor &data, lldb::offset_t offset)
size_t GetMinimumHashDataByteSize() const
Prologue(dw_offset_t _die_base_offset=0)
void AppendAtom(AtomType type, dw_form_t form)
bool ContainsAtom(AtomType atom_type) const
static void ExtractTypesFromDIEArray(const DIEInfoArray &die_info_array, uint32_t type_flag_mask, uint32_t type_flag_value, llvm::function_ref< bool(DIERef ref)> callback)
static const char * GetAtomTypeName(uint16_t atom)
static void ExtractClassOrStructDIEArray(const DIEInfoArray &die_info_array, bool return_implementation_only_if_available, llvm::function_ref< bool(DIERef ref)> callback)
@ eTypeFlagClassIsImplementation
Always set for C++, only set for ObjC if this is the @implementation for class.
@ eAtomTypeQualNameHash
A 32 bit hash of the full qualified name (since all hash entries are basename only) For example a typ...
@ eAtomTypeCUOffset
DIE offset of the compiler unit header that contains the item in question.
@ eAtomTypeDIEOffset
DIE offset, check form for encoding.
@ eAtomTypeTag
DW_TAG_xxx value, should be encoded as DW_FORM_data1 (if no tags exceed 255) or DW_FORM_data2.
std::vector< DIEInfo > DIEInfoArray
static bool ExtractDIEArray(const DIEInfoArray &die_info_array, llvm::function_ref< bool(DIERef ref)> callback)
An data extractor class.
Definition: DataExtractor.h:48
uint32_t GetU32(lldb::offset_t *offset_ptr) const
Extract a uint32_t value from *offset_ptr.
uint16_t GetU16(lldb::offset_t *offset_ptr) const
Extract a uint16_t value from *offset_ptr.
bool Execute(llvm::StringRef string, llvm::SmallVectorImpl< llvm::StringRef > *matches=nullptr) const
Execute a regular expression match using the compiled regular expression that is already in this obje...
uint64_t dw_offset_t
Definition: dwarf.h:33
#define DW_INVALID_OFFSET
Definition: dwarf.h:38
llvm::dwarf::Tag dw_tag_t
Definition: dwarf.h:28
uint16_t dw_form_t
Definition: dwarf.h:27
#define UINT32_MAX
Definition: lldb-defines.h:19
uint64_t offset_t
Definition: lldb-types.h:83
uint32_t type_flags
Any flags for this DIEInfo.
uint32_t qualified_name_hash
A 32 bit hash of the fully qualified name.
virtual lldb::offset_t Read(lldb_private::DataExtractor &data, lldb::offset_t offset)
Definition: MappedHash.h:88