LLDB  mainline
MappedHash.h
Go to the documentation of this file.
1 //===-- MappedHash.h --------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef liblldb_MappedHash_h_
10 #define liblldb_MappedHash_h_
11 
12 #include <assert.h>
13 #include <stdint.h>
14 
15 #include <algorithm>
16 #include <functional>
17 #include <map>
18 #include <vector>
19 
21 #include "lldb/Utility/Stream.h"
22 #include "llvm/Support/DJB.h"
23 
24 class MappedHash {
25 public:
27  eHashFunctionDJB = 0u // Daniel J Bernstein hash function that is also used
28  // by the ELF GNU_HASH sections
29  };
30 
31  static uint32_t HashString(uint32_t hash_function, llvm::StringRef s) {
32  switch (hash_function) {
34  return llvm::djbHash(s);
35 
36  default:
37  break;
38  }
39  llvm_unreachable("Invalid hash function index");
40  }
41 
42  static const uint32_t HASH_MAGIC = 0x48415348u;
43  static const uint32_t HASH_CIGAM = 0x48534148u;
44 
45  template <typename T> struct Header {
46  typedef T HeaderData;
47 
48  uint32_t
49  magic; // HASH_MAGIC or HASH_CIGAM magic value to allow endian detection
50  uint16_t version; // Version number
51  uint16_t hash_function; // The hash function enumeration that was used
52  uint32_t bucket_count; // The number of buckets in this hash table
53  uint32_t hashes_count; // The total number of unique hash values and hash
54  // data offsets in this table
55  uint32_t header_data_len; // The size in bytes of the "header_data" template
56  // member below
57  HeaderData header_data; //
58 
60  : magic(HASH_MAGIC), version(1), hash_function(eHashFunctionDJB),
61  bucket_count(0), hashes_count(0), header_data_len(sizeof(T)),
62  header_data() {}
63 
64  virtual ~Header() = default;
65 
66  size_t GetByteSize() const {
67  return sizeof(magic) + sizeof(version) + sizeof(hash_function) +
68  sizeof(bucket_count) + sizeof(hashes_count) +
69  sizeof(header_data_len) + header_data_len;
70  }
71 
72  virtual size_t GetByteSize(const HeaderData &header_data) = 0;
73 
74  void SetHeaderDataByteSize(uint32_t header_data_byte_size) {
75  header_data_len = header_data_byte_size;
76  }
77 
79  s.Printf("header.magic = 0x%8.8x\n", magic);
80  s.Printf("header.version = 0x%4.4x\n", version);
81  s.Printf("header.hash_function = 0x%4.4x\n", hash_function);
82  s.Printf("header.bucket_count = 0x%8.8x %u\n", bucket_count,
83  bucket_count);
84  s.Printf("header.hashes_count = 0x%8.8x %u\n", hashes_count,
85  hashes_count);
86  s.Printf("header.header_data_len = 0x%8.8x %u\n", header_data_len,
87  header_data_len);
88  }
89 
91  lldb::offset_t offset) {
92  if (data.ValidOffsetForDataOfSize(
93  offset, sizeof(magic) + sizeof(version) + sizeof(hash_function) +
94  sizeof(bucket_count) + sizeof(hashes_count) +
95  sizeof(header_data_len))) {
96  magic = data.GetU32(&offset);
97  if (magic != HASH_MAGIC) {
98  if (magic == HASH_CIGAM) {
99  switch (data.GetByteOrder()) {
100  case lldb::eByteOrderBig:
102  break;
105  break;
106  default:
107  return LLDB_INVALID_OFFSET;
108  }
109  } else {
110  // Magic bytes didn't match
111  version = 0;
112  return LLDB_INVALID_OFFSET;
113  }
114  }
115 
116  version = data.GetU16(&offset);
117  if (version != 1) {
118  // Unsupported version
119  return LLDB_INVALID_OFFSET;
120  }
121  hash_function = data.GetU16(&offset);
122  if (hash_function == 4)
123  hash_function = 0; // Deal with pre-release version of this table...
124  bucket_count = data.GetU32(&offset);
125  hashes_count = data.GetU32(&offset);
126  header_data_len = data.GetU32(&offset);
127  return offset;
128  }
129  return LLDB_INVALID_OFFSET;
130  }
131  //
132  // // Returns a buffer that contains a serialized version of this
133  // table
134  // // that must be freed with free().
135  // virtual void *
136  // Write (int fd);
137  };
138 
139  // A class for reading and using a saved hash table from a block of data
140  // in memory
141  template <typename __KeyType, class __HeaderType, class __HashData>
142  class MemoryTable {
143  public:
144  typedef __HeaderType HeaderType;
145  typedef __KeyType KeyType;
146  typedef __HashData HashData;
147 
148  enum Result {
149  eResultKeyMatch = 0u, // The entry was found, key matched and "pair" was
150  // filled in successfully
151  eResultKeyMismatch =
152  1u, // Bucket hash data collision, but key didn't match
153  eResultEndOfHashData = 2u, // The chain of items for this hash data in
154  // this bucket is terminated, search no more
155  eResultError = 3u // Status parsing the hash data, abort
156  };
157 
158  struct Pair {
159  KeyType key;
160  HashData value;
161  };
162 
164  : m_header(), m_hash_indexes(nullptr), m_hash_values(nullptr),
165  m_hash_offsets(nullptr) {
166  lldb::offset_t offset = m_header.Read(data, 0);
167  if (offset != LLDB_INVALID_OFFSET && IsValid()) {
168  m_hash_indexes = (const uint32_t *)data.GetData(
169  &offset, m_header.bucket_count * sizeof(uint32_t));
170  m_hash_values = (const uint32_t *)data.GetData(
171  &offset, m_header.hashes_count * sizeof(uint32_t));
172  m_hash_offsets = (const uint32_t *)data.GetData(
173  &offset, m_header.hashes_count * sizeof(uint32_t));
174  }
175  }
176 
177  virtual ~MemoryTable() = default;
178 
179  bool IsValid() const {
180  return m_header.version == 1 &&
181  m_header.hash_function == eHashFunctionDJB &&
182  m_header.bucket_count > 0;
183  }
184 
185  uint32_t GetHashIndex(uint32_t bucket_idx) const {
186  uint32_t result = UINT32_MAX;
187  if (m_hash_indexes && bucket_idx < m_header.bucket_count)
188  memcpy(&result, m_hash_indexes + bucket_idx, sizeof(uint32_t));
189  return result;
190  }
191 
192  uint32_t GetHashValue(uint32_t hash_idx) const {
193  uint32_t result = UINT32_MAX;
194  if (m_hash_values && hash_idx < m_header.hashes_count)
195  memcpy(&result, m_hash_values + hash_idx, sizeof(uint32_t));
196  return result;
197  }
198 
200  uint32_t result = UINT32_MAX;
201  if (m_hash_offsets && hash_idx < m_header.hashes_count)
202  memcpy(&result, m_hash_offsets + hash_idx, sizeof(uint32_t));
203  return result;
204  }
205 
206  bool Find(llvm::StringRef name, Pair &pair) const {
207  if (name.empty())
208  return false;
209 
210  if (IsValid()) {
211  const uint32_t bucket_count = m_header.bucket_count;
212  const uint32_t hash_count = m_header.hashes_count;
213  const uint32_t hash_value =
214  MappedHash::HashString(m_header.hash_function, name);
215  const uint32_t bucket_idx = hash_value % bucket_count;
216  uint32_t hash_idx = GetHashIndex(bucket_idx);
217  if (hash_idx < hash_count) {
218  for (; hash_idx < hash_count; ++hash_idx) {
219  const uint32_t curr_hash_value = GetHashValue(hash_idx);
220  if (curr_hash_value == hash_value) {
221  lldb::offset_t hash_data_offset = GetHashDataOffset(hash_idx);
222  while (hash_data_offset != UINT32_MAX) {
223  const lldb::offset_t prev_hash_data_offset = hash_data_offset;
224  Result hash_result =
225  GetHashDataForName(name, &hash_data_offset, pair);
226  // Check the result of getting our hash data
227  switch (hash_result) {
228  case eResultKeyMatch:
229  return true;
230 
231  case eResultKeyMismatch:
232  if (prev_hash_data_offset == hash_data_offset)
233  return false;
234  break;
235 
236  case eResultEndOfHashData:
237  // The last HashData for this key has been reached, stop
238  // searching
239  return false;
240  case eResultError:
241  // Status parsing the hash data, abort
242  return false;
243  }
244  }
245  }
246  if ((curr_hash_value % bucket_count) != bucket_idx)
247  break;
248  }
249  }
250  }
251  return false;
252  }
253 
254  // This method must be implemented in any subclasses. The KeyType is user
255  // specified and must somehow result in a string value. For example, the
256  // KeyType might be a string offset in a string table and subclasses can
257  // store their string table as a member of the subclass and return a valie
258  // "const char *" given a "key". The value could also be a C string
259  // pointer, in which case just returning "key" will suffice.
260  virtual const char *GetStringForKeyType(KeyType key) const = 0;
261 
262  virtual bool ReadHashData(uint32_t hash_data_offset,
263  HashData &hash_data) const = 0;
264 
265  // This method must be implemented in any subclasses and it must try to
266  // read one "Pair" at the offset pointed to by the "hash_data_offset_ptr"
267  // parameter. This offset should be updated as bytes are consumed and a
268  // value "Result" enum should be returned. If the "name" matches the full
269  // name for the "pair.key" (which must be filled in by this call), then the
270  // HashData in the pair ("pair.value") should be extracted and filled in
271  // and "eResultKeyMatch" should be returned. If "name" doesn't match this
272  // string for the key, then "eResultKeyMismatch" should be returned and all
273  // data for the current HashData must be consumed or skipped and the
274  // "hash_data_offset_ptr" offset needs to be updated to point to the next
275  // HashData. If the end of the HashData objects for a given hash value have
276  // been reached, then "eResultEndOfHashData" should be returned. If
277  // anything else goes wrong during parsing, return "eResultError" and the
278  // corresponding "Find()" function will be canceled and return false.
279  virtual Result GetHashDataForName(llvm::StringRef name,
280  lldb::offset_t *hash_data_offset_ptr,
281  Pair &pair) const = 0;
282 
283  const HeaderType &GetHeader() { return m_header; }
284 
285  void ForEach(
286  std::function<bool(const HashData &hash_data)> const &callback) const {
287  const size_t num_hash_offsets = m_header.hashes_count;
288  for (size_t i = 0; i < num_hash_offsets; ++i) {
289  uint32_t hash_data_offset = GetHashDataOffset(i);
290  if (hash_data_offset != UINT32_MAX) {
291  HashData hash_data;
292  if (ReadHashData(hash_data_offset, hash_data)) {
293  // If the callback returns false, then we are done and should stop
294  if (callback(hash_data) == false)
295  return;
296  }
297  }
298  }
299  }
300 
301  protected:
302  // Implementation agnostic information
303  HeaderType m_header;
307  };
308 };
309 
310 #endif // liblldb_MappedHash_h_
uint32_t hashes_count
Definition: MappedHash.h:53
An data extractor class.
Definition: DataExtractor.h:47
const void * GetData(lldb::offset_t *offset_ptr, lldb::offset_t length) const
Extract length bytes from *offset_ptr.
A stream class that can stream formatted output to a file.
Definition: Stream.h:28
uint32_t GetU32(lldb::offset_t *offset_ptr) const
Extract a uint32_t value from *offset_ptr.
__HeaderType HeaderType
Definition: MappedHash.h:144
void ForEach(std::function< bool(const HashData &hash_data)> const &callback) const
Definition: MappedHash.h:285
static const uint32_t HASH_MAGIC
Definition: MappedHash.h:42
uint16_t version
Definition: MappedHash.h:50
virtual lldb::offset_t Read(lldb_private::DataExtractor &data, lldb::offset_t offset)
Definition: MappedHash.h:90
#define LLDB_INVALID_OFFSET
Definition: lldb-defines.h:96
#define UINT32_MAX
Definition: lldb-defines.h:31
const HeaderType & GetHeader()
Definition: MappedHash.h:283
uint32_t GetHashDataOffset(uint32_t hash_idx) const
Definition: MappedHash.h:199
const uint32_t * m_hash_indexes
Definition: MappedHash.h:304
uint64_t offset_t
Definition: lldb-types.h:87
HeaderData header_data
Definition: MappedHash.h:57
void SetHeaderDataByteSize(uint32_t header_data_byte_size)
Definition: MappedHash.h:74
lldb::ByteOrder GetByteOrder() const
Get the current byte order value.
uint32_t GetHashValue(uint32_t hash_idx) const
Definition: MappedHash.h:192
bool ValidOffsetForDataOfSize(lldb::offset_t offset, lldb::offset_t length) const
Test the availability of length bytes of data from offset.
size_t Printf(const char *format,...) __attribute__((format(printf
Output printf formatted output to the stream.
Definition: Stream.cpp:106
virtual ~Header()=default
const uint32_t * m_hash_values
Definition: MappedHash.h:305
uint32_t header_data_len
Definition: MappedHash.h:55
uint16_t hash_function
Definition: MappedHash.h:51
static uint32_t HashString(uint32_t hash_function, llvm::StringRef s)
Definition: MappedHash.h:31
bool Find(llvm::StringRef name, Pair &pair) const
Definition: MappedHash.h:206
void Dump(lldb_private::Stream &s)
Definition: MappedHash.h:78
static const uint32_t HASH_CIGAM
Definition: MappedHash.h:43
const uint32_t * m_hash_offsets
Definition: MappedHash.h:306
uint32_t bucket_count
Definition: MappedHash.h:52
uint16_t GetU16(lldb::offset_t *offset_ptr) const
Extract a uint16_t value from *offset_ptr.
size_t GetByteSize() const
Definition: MappedHash.h:66
bool IsValid() const
Definition: MappedHash.h:179
MemoryTable(lldb_private::DataExtractor &data)
Definition: MappedHash.h:163
uint32_t GetHashIndex(uint32_t bucket_idx) const
Definition: MappedHash.h:185
void SetByteOrder(lldb::ByteOrder byte_order)
Set the byte_order value.