LLDB  mainline
HashedNameToDIE.cpp
Go to the documentation of this file.
1 //===-- HashedNameToDIE.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "HashedNameToDIE.h"
10 #include "llvm/ADT/StringRef.h"
11 
13  const DIEInfoArray &die_info_array,
14  llvm::function_ref<bool(DIERef ref)> callback) {
15  const size_t count = die_info_array.size();
16  for (size_t i = 0; i < count; ++i)
17  if (!callback(DIERef(die_info_array[i])))
18  return false;
19  return true;
20 }
21 
23  const DIEInfoArray &die_info_array, const dw_tag_t tag,
24  llvm::function_ref<bool(DIERef ref)> callback) {
25  if (tag == 0) {
26  ExtractDIEArray(die_info_array, callback);
27  return;
28  }
29 
30  const size_t count = die_info_array.size();
31  for (size_t i = 0; i < count; ++i) {
32  const dw_tag_t die_tag = die_info_array[i].tag;
33  bool tag_matches = die_tag == 0 || tag == die_tag;
34  if (!tag_matches) {
35  if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
36  tag_matches = tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
37  }
38  if (tag_matches) {
39  if (!callback(DIERef(die_info_array[i])))
40  return;
41  }
42  }
43 }
44 
46  const DIEInfoArray &die_info_array, const dw_tag_t tag,
47  const uint32_t qualified_name_hash,
48  llvm::function_ref<bool(DIERef ref)> callback) {
49  if (tag == 0) {
50  ExtractDIEArray(die_info_array, callback);
51  return;
52  }
53 
54  const size_t count = die_info_array.size();
55  for (size_t i = 0; i < count; ++i) {
56  if (qualified_name_hash != die_info_array[i].qualified_name_hash)
57  continue;
58  const dw_tag_t die_tag = die_info_array[i].tag;
59  bool tag_matches = die_tag == 0 || tag == die_tag;
60  if (!tag_matches) {
61  if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
62  tag_matches = tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
63  }
64  if (tag_matches) {
65  if (!callback(DIERef(die_info_array[i])))
66  return;
67  }
68  }
69 }
70 
72  const DIEInfoArray &die_info_array,
73  bool return_implementation_only_if_available,
74  llvm::function_ref<bool(DIERef ref)> callback) {
75  const size_t count = die_info_array.size();
76  for (size_t i = 0; i < count; ++i) {
77  const dw_tag_t die_tag = die_info_array[i].tag;
78  if (!(die_tag == 0 || die_tag == DW_TAG_class_type ||
79  die_tag == DW_TAG_structure_type))
80  continue;
81  bool is_implementation =
82  (die_info_array[i].type_flags & eTypeFlagClassIsImplementation) != 0;
83  if (is_implementation != return_implementation_only_if_available)
84  continue;
85  if (return_implementation_only_if_available) {
86  // We found the one true definition for this class, so only return
87  // that
88  callback(DIERef(die_info_array[i]));
89  return;
90  }
91  if (!callback(DIERef(die_info_array[i])))
92  return;
93  }
94 }
95 
97  const DIEInfoArray &die_info_array, uint32_t type_flag_mask,
98  uint32_t type_flag_value, llvm::function_ref<bool(DIERef ref)> callback) {
99  const size_t count = die_info_array.size();
100  for (size_t i = 0; i < count; ++i) {
101  if ((die_info_array[i].type_flags & type_flag_mask) == type_flag_value) {
102  if (!callback(DIERef(die_info_array[i])))
103  return;
104  }
105  }
106 }
107 
109  switch (atom) {
110  case eAtomTypeNULL:
111  return "NULL";
112  case eAtomTypeDIEOffset:
113  return "die-offset";
114  case eAtomTypeCUOffset:
115  return "cu-offset";
116  case eAtomTypeTag:
117  return "die-tag";
118  case eAtomTypeNameFlags:
119  return "name-flags";
120  case eAtomTypeTypeFlags:
121  return "type-flags";
123  return "qualified-name-hash";
124  }
125  return "<invalid>";
126 }
127 
129  uint32_t h)
130  : die_offset(o), tag(t), type_flags(f), qualified_name_hash(h) {}
131 
133  : die_base_offset(_die_base_offset), atoms(), atom_mask(0),
134  min_hash_data_byte_size(0), hash_data_has_fixed_byte_size(true) {
135  // Define an array of DIE offsets by first defining an array, and then define
136  // the atom type for the array, in this case we have an array of DIE offsets.
137  AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4);
138 }
139 
143  atom_mask = 0;
144  atoms.clear();
145 }
146 
148  return (atom_mask & (1u << atom_type)) != 0;
149 }
150 
152  die_base_offset = 0;
153  ClearAtoms();
154 }
155 
157  atoms.push_back({type, form});
158  atom_mask |= 1u << type;
159  switch (form) {
160  case DW_FORM_indirect:
161  case DW_FORM_exprloc:
162  case DW_FORM_flag_present:
163  case DW_FORM_ref_sig8:
164  llvm_unreachable("Unhandled atom form");
165 
166  case DW_FORM_addrx:
167  case DW_FORM_string:
168  case DW_FORM_block:
169  case DW_FORM_block1:
170  case DW_FORM_sdata:
171  case DW_FORM_udata:
172  case DW_FORM_ref_udata:
173  case DW_FORM_GNU_addr_index:
174  case DW_FORM_GNU_str_index:
176  LLVM_FALLTHROUGH;
177  case DW_FORM_flag:
178  case DW_FORM_data1:
179  case DW_FORM_ref1:
180  case DW_FORM_sec_offset:
182  break;
183 
184  case DW_FORM_block2:
186  LLVM_FALLTHROUGH;
187  case DW_FORM_data2:
188  case DW_FORM_ref2:
190  break;
191 
192  case DW_FORM_block4:
194  LLVM_FALLTHROUGH;
195  case DW_FORM_data4:
196  case DW_FORM_ref4:
197  case DW_FORM_addr:
198  case DW_FORM_ref_addr:
199  case DW_FORM_strp:
201  break;
202 
203  case DW_FORM_data8:
204  case DW_FORM_ref8:
206  break;
207  }
208 }
209 
212  lldb::offset_t offset) {
213  ClearAtoms();
214 
215  die_base_offset = data.GetU32(&offset);
216 
217  const uint32_t atom_count = data.GetU32(&offset);
218  if (atom_count == 0x00060003u) {
219  // Old format, deal with contents of old pre-release format.
220  while (data.GetU32(&offset)) {
221  /* do nothing */;
222  }
223 
224  // Hardcode to the only known value for now.
225  AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4);
226  } else {
227  for (uint32_t i = 0; i < atom_count; ++i) {
228  AtomType type = (AtomType)data.GetU16(&offset);
229  dw_form_t form = (dw_form_t)data.GetU16(&offset);
230  AppendAtom(type, form);
231  }
232  }
233  return offset;
234 }
235 
237  // Add an extra count to the atoms size for the zero termination Atom that
238  // gets written to disk.
239  return sizeof(die_base_offset) + sizeof(uint32_t) +
240  atoms.size() * sizeof(Atom);
241 }
242 
245 }
246 
249 }
250 
251 size_t DWARFMappedHash::Header::GetByteSize(const HeaderData &header_data) {
252  return header_data.GetByteSize();
253 }
254 
256  lldb::offset_t offset) {
257  offset = MappedHash::Header<Prologue>::Read(data, offset);
258  if (offset != UINT32_MAX) {
259  offset = header_data.Read(data, offset);
260  }
261  return offset;
262 }
263 
265  lldb::offset_t *offset_ptr,
266  DIEInfo &hash_data) const {
267  const size_t num_atoms = header_data.atoms.size();
268  if (num_atoms == 0)
269  return false;
270 
271  for (size_t i = 0; i < num_atoms; ++i) {
272  DWARFFormValue form_value(nullptr, header_data.atoms[i].form);
273 
274  if (!form_value.ExtractValue(data, offset_ptr))
275  return false;
276 
277  switch (header_data.atoms[i].type) {
278  case eAtomTypeDIEOffset: // DIE offset, check form for encoding
279  hash_data.die_offset =
280  DWARFFormValue::IsDataForm(form_value.Form())
281  ? form_value.Unsigned()
282  : form_value.Reference(header_data.die_base_offset);
283  break;
284 
285  case eAtomTypeTag: // DW_TAG value for the DIE
286  hash_data.tag = (dw_tag_t)form_value.Unsigned();
287  break;
288 
289  case eAtomTypeTypeFlags: // Flags from enum TypeFlags
290  hash_data.type_flags = (uint32_t)form_value.Unsigned();
291  break;
292 
293  case eAtomTypeQualNameHash: // Flags from enum TypeFlags
294  hash_data.qualified_name_hash = form_value.Unsigned();
295  break;
296 
297  default:
298  // We can always skip atoms we don't know about.
299  break;
300  }
301  }
302  return hash_data.die_offset != DW_INVALID_OFFSET;
303 }
304 
307  const lldb_private::DWARFDataExtractor &string_table, const char *name)
309  m_data(table_data), m_string_table(string_table), m_name(name) {}
310 
311 const char *
313  // The key in the DWARF table is the .debug_str offset for the string
314  return m_string_table.PeekCStr(key);
315 }
316 
318  HashData &hash_data) const {
319  lldb::offset_t offset = hash_data_offset;
320  // Skip string table offset that contains offset of hash name in .debug_str.
321  offset += 4;
322  const uint32_t count = m_data.GetU32(&offset);
323  if (count > 0) {
324  hash_data.resize(count);
325  for (uint32_t i = 0; i < count; ++i) {
326  if (!m_header.Read(m_data, &offset, hash_data[i]))
327  return false;
328  }
329  } else
330  hash_data.clear();
331  return true;
332 }
333 
336  llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr,
337  Pair &pair) const {
338  pair.key = m_data.GetU32(hash_data_offset_ptr);
339  pair.value.clear();
340 
341  // If the key is zero, this terminates our chain of HashData objects for this
342  // hash value.
343  if (pair.key == 0)
344  return eResultEndOfHashData;
345 
346  // There definitely should be a string for this string offset, if there
347  // isn't, there is something wrong, return and error.
348  const char *strp_cstr = m_string_table.PeekCStr(pair.key);
349  if (strp_cstr == nullptr) {
350  *hash_data_offset_ptr = UINT32_MAX;
351  return eResultError;
352  }
353 
354  const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
355  const size_t min_total_hash_data_size =
356  count * m_header.header_data.GetMinimumHashDataByteSize();
357  if (count > 0 && m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
358  min_total_hash_data_size)) {
359  // We have at least one HashData entry, and we have enough data to parse at
360  // least "count" HashData entries.
361 
362  // First make sure the entire C string matches...
363  const bool match = name == strp_cstr;
364 
365  if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
366  // If the string doesn't match and we have fixed size data, we can just
367  // add the total byte size of all HashData objects to the hash data
368  // offset and be done...
369  *hash_data_offset_ptr += min_total_hash_data_size;
370  } else {
371  // If the string does match, or we don't have fixed size data then we
372  // need to read the hash data as a stream. If the string matches we also
373  // append all HashData objects to the value array.
374  for (uint32_t i = 0; i < count; ++i) {
375  DIEInfo die_info;
376  if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) {
377  // Only happened if the HashData of the string matched...
378  if (match)
379  pair.value.push_back(die_info);
380  } else {
381  // Something went wrong while reading the data.
382  *hash_data_offset_ptr = UINT32_MAX;
383  return eResultError;
384  }
385  }
386  }
387  // Return the correct response depending on if the string matched or not...
388  if (match) {
389  // The key (cstring) matches and we have lookup results!
390  return eResultKeyMatch;
391  } else {
392  // The key doesn't match, this function will get called again for the
393  // next key/value or the key terminator which in our case is a zero
394  // .debug_str offset.
395  return eResultKeyMismatch;
396  }
397  } else {
398  *hash_data_offset_ptr = UINT32_MAX;
399  return eResultError;
400  }
401 }
402 
405  const lldb_private::RegularExpression &regex,
406  lldb::offset_t *hash_data_offset_ptr, Pair &pair) const {
407  pair.key = m_data.GetU32(hash_data_offset_ptr);
408  // If the key is zero, this terminates our chain of HashData objects for this
409  // hash value.
410  if (pair.key == 0)
411  return eResultEndOfHashData;
412 
413  // There definitely should be a string for this string offset, if there
414  // isn't, there is something wrong, return and error.
415  const char *strp_cstr = m_string_table.PeekCStr(pair.key);
416  if (strp_cstr == nullptr)
417  return eResultError;
418 
419  const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
420  const size_t min_total_hash_data_size =
421  count * m_header.header_data.GetMinimumHashDataByteSize();
422  if (count > 0 && m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
423  min_total_hash_data_size)) {
424  const bool match = regex.Execute(llvm::StringRef(strp_cstr));
425 
426  if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
427  // If the regex doesn't match and we have fixed size data, we can just
428  // add the total byte size of all HashData objects to the hash data
429  // offset and be done...
430  *hash_data_offset_ptr += min_total_hash_data_size;
431  } else {
432  // If the string does match, or we don't have fixed size data then we
433  // need to read the hash data as a stream. If the string matches we also
434  // append all HashData objects to the value array.
435  for (uint32_t i = 0; i < count; ++i) {
436  DIEInfo die_info;
437  if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) {
438  // Only happened if the HashData of the string matched...
439  if (match)
440  pair.value.push_back(die_info);
441  } else {
442  // Something went wrong while reading the data
443  *hash_data_offset_ptr = UINT32_MAX;
444  return eResultError;
445  }
446  }
447  }
448  // Return the correct response depending on if the string matched or not...
449  if (match) {
450  // The key (cstring) matches and we have lookup results!
451  return eResultKeyMatch;
452  } else {
453  // The key doesn't match, this function will get called again for the
454  // next key/value or the key terminator which in our case is a zero
455  // .debug_str offset.
456  return eResultKeyMismatch;
457  }
458  } else {
459  *hash_data_offset_ptr = UINT32_MAX;
460  return eResultError;
461  }
462 }
463 
465  const lldb_private::RegularExpression &regex,
466  DIEInfoArray &die_info_array) const {
467  const uint32_t hash_count = m_header.hashes_count;
468  Pair pair;
469  for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) {
470  lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx);
471  while (hash_data_offset != UINT32_MAX) {
472  const lldb::offset_t prev_hash_data_offset = hash_data_offset;
473  Result hash_result =
474  AppendHashDataForRegularExpression(regex, &hash_data_offset, pair);
475  if (prev_hash_data_offset == hash_data_offset)
476  break;
477 
478  // Check the result of getting our hash data.
479  switch (hash_result) {
480  case eResultKeyMatch:
481  case eResultKeyMismatch:
482  // Whether we matches or not, it doesn't matter, we keep looking.
483  break;
484 
486  case eResultError:
487  hash_data_offset = UINT32_MAX;
488  break;
489  }
490  }
491  }
492  die_info_array.swap(pair.value);
493 }
494 
496  const uint32_t die_offset_start, const uint32_t die_offset_end,
497  DIEInfoArray &die_info_array) const {
498  const uint32_t hash_count = m_header.hashes_count;
499  for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) {
500  bool done = false;
501  lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx);
502  while (!done && hash_data_offset != UINT32_MAX) {
503  KeyType key = m_data.GetU32(&hash_data_offset);
504  // If the key is zero, this terminates our chain of HashData objects for
505  // this hash value.
506  if (key == 0)
507  break;
508 
509  const uint32_t count = m_data.GetU32(&hash_data_offset);
510  for (uint32_t i = 0; i < count; ++i) {
511  DIEInfo die_info;
512  if (m_header.Read(m_data, &hash_data_offset, die_info)) {
513  if (die_info.die_offset == 0)
514  done = true;
515  if (die_offset_start <= die_info.die_offset &&
516  die_info.die_offset < die_offset_end)
517  die_info_array.push_back(die_info);
518  }
519  }
520  }
521  }
522 }
523 
525  llvm::StringRef name, llvm::function_ref<bool(DIERef ref)> callback) {
526  if (name.empty())
527  return true;
528 
529  DIEInfoArray die_info_array;
530  FindByName(name, die_info_array);
531  return DWARFMappedHash::ExtractDIEArray(die_info_array, callback);
532 }
533 
535  llvm::StringRef name, const dw_tag_t tag,
536  llvm::function_ref<bool(DIERef ref)> callback) {
537  DIEInfoArray die_info_array;
538  FindByName(name, die_info_array);
539  DWARFMappedHash::ExtractDIEArray(die_info_array, tag, callback);
540 }
541 
543  llvm::StringRef name, const dw_tag_t tag,
544  const uint32_t qualified_name_hash,
545  llvm::function_ref<bool(DIERef ref)> callback) {
546  DIEInfoArray die_info_array;
547  FindByName(name, die_info_array);
548  DWARFMappedHash::ExtractDIEArray(die_info_array, tag, qualified_name_hash,
549  callback);
550 }
551 
553  llvm::StringRef name, llvm::function_ref<bool(DIERef ref)> callback,
554  bool must_be_implementation) {
555  DIEInfoArray die_info_array;
556  FindByName(name, die_info_array);
557  if (must_be_implementation &&
558  GetHeader().header_data.ContainsAtom(eAtomTypeTypeFlags)) {
559  // If we have two atoms, then we have the DIE offset and the type flags
560  // so we can find the objective C class efficiently.
562  die_info_array, UINT32_MAX, eTypeFlagClassIsImplementation, callback);
563  return;
564  }
565  // We don't only want the one true definition, so try and see what we can
566  // find, and only return class or struct DIEs. If we do have the full
567  // implementation, then return it alone, else return all possible
568  // matches.
569  bool found_implementation = false;
571  die_info_array, true /*return_implementation_only_if_available*/,
572  [&](DIERef ref) {
573  found_implementation = true;
574  // Here the return value does not matter as we are called at most once.
575  return callback(ref);
576  });
577  if (found_implementation)
578  return;
580  die_info_array, false /*return_implementation_only_if_available*/,
581  callback);
582 }
583 
585  DIEInfoArray &die_info_array) {
586  if (name.empty())
587  return;
588 
589  Pair kv_pair;
590  if (Find(name, kv_pair))
591  die_info_array.swap(kv_pair.value);
592 }
uint32_t hashes_count
Definition: MappedHash.h:53
An data extractor class.
Definition: DataExtractor.h:46
Result AppendHashDataForRegularExpression(const lldb_private::RegularExpression &regex, lldb::offset_t *hash_data_offset_ptr, Pair &pair) const
uint32_t GetU32(lldb::offset_t *offset_ptr) const
Extract a uint32_t value from *offset_ptr.
const char * GetStringForKeyType(KeyType key) const override
Identifies a DWARF debug info entry within a given Module.
Definition: DIERef.h:26
dw_form_t Form() const
void FindByNameAndTag(llvm::StringRef name, const dw_tag_t tag, llvm::function_ref< bool(DIERef ref)> callback)
lldb::offset_t Read(const lldb_private::DataExtractor &data, lldb::offset_t offset)
const char * PeekCStr(lldb::offset_t offset) const
Peek at a C string at offset.
virtual lldb::offset_t Read(lldb_private::DataExtractor &data, lldb::offset_t offset)
Definition: MappedHash.h:90
static bool IsDataForm(const dw_form_t form)
MemoryTable(lldb_private::DWARFDataExtractor &table_data, const lldb_private::DWARFDataExtractor &string_table, const char *name)
DIE offset of the compiler unit header that contains the item in question.
Result GetHashDataForName(llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr, Pair &pair) const override
#define UINT32_MAX
Definition: lldb-defines.h:31
void AppendAllDIEsInRange(const uint32_t die_offset_start, const uint32_t die_offset_end, DIEInfoArray &die_info_array) const
bool ExtractValue(const lldb_private::DWARFDataExtractor &data, lldb::offset_t *offset_ptr)
Always set for C++, only set for ObjC if this is the for class.
uint64_t offset_t
Definition: lldb-types.h:87
void FindCompleteObjCClassByName(llvm::StringRef name, llvm::function_ref< bool(DIERef ref)> callback, bool must_be_implementation)
lldb::offset_t Read(lldb_private::DataExtractor &data, lldb::offset_t offset) override
HeaderData header_data
Definition: MappedHash.h:57
A class for reading and using a saved hash table from a block of data in memory.
bool ContainsAtom(AtomType atom_type) const
uint32_t type_flags
Any flags for this DIEInfo.
Prologue(dw_offset_t _die_base_offset=0)
size_t GetMinimumHashDataByteSize() const
llvm::dwarf::Tag dw_tag_t
Definition: dwarf.h:25
std::vector< DIEInfo > DIEInfoArray
uint64_t Unsigned() const
uint32_t qualified_name_hash
A 32 bit hash of the fully qualified name.
bool ValidOffsetForDataOfSize(lldb::offset_t offset, lldb::offset_t length) const
Test the availability of length bytes of data from offset.
bool ReadHashData(uint32_t hash_data_offset, HashData &hash_data) const override
static void ExtractTypesFromDIEArray(const DIEInfoArray &die_info_array, uint32_t type_flag_mask, uint32_t type_flag_value, llvm::function_ref< bool(DIERef ref)> callback)
void FindByNameAndTagAndQualifiedNameHash(llvm::StringRef name, const dw_tag_t tag, const uint32_t qualified_name_hash, llvm::function_ref< bool(DIERef ref)> callback)
DW_TAG_xxx value, should be encoded as DW_FORM_data1 (if no tags exceed 255) or DW_FORM_data2.
static bool ExtractDIEArray(const DIEInfoArray &die_info_array, llvm::function_ref< bool(DIERef ref)> callback)
void AppendAllDIEsThatMatchingRegex(const lldb_private::RegularExpression &regex, DIEInfoArray &die_info_array) const
uint16_t dw_form_t
Definition: dwarf.h:24
void AppendAtom(AtomType type, dw_form_t form)
bool FindByName(llvm::StringRef name, llvm::function_ref< bool(DIERef ref)> callback)
A 32 bit hash of the full qualified name (since all hash entries are basename only) For example a typ...
static const char * GetAtomTypeName(uint16_t atom)
dw_offset_t die_base_offset
DIE offset base so die offsets in hash_data can be CU relative.
DIE offset, check form for encoding.
DWARFDIE Reference() const
lldb_private::DWARFDataExtractor m_string_table
#define DW_INVALID_OFFSET
Definition: dwarf.h:34
static void ExtractClassOrStructDIEArray(const DIEInfoArray &die_info_array, bool return_implementation_only_if_available, llvm::function_ref< bool(DIERef ref)> callback)
bool Execute(llvm::StringRef string, llvm::SmallVectorImpl< llvm::StringRef > *matches=nullptr) const
Execute a regular expression match using the compiled regular expression that is already in this obje...
lldb_private::DWARFDataExtractor m_data
uint16_t GetU16(lldb::offset_t *offset_ptr) const
Extract a uint16_t value from *offset_ptr.