LLDB  mainline
BreakpadRecords.cpp
Go to the documentation of this file.
1 //===-- BreakpadRecords.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/StringExtras.h"
11 #include "llvm/ADT/StringSwitch.h"
12 #include "llvm/Support/Endian.h"
13 #include "llvm/Support/FormatVariadic.h"
14 
15 using namespace lldb_private;
16 using namespace lldb_private::breakpad;
17 
18 namespace {
19 enum class Token {
20  Unknown,
21  Module,
22  Info,
23  CodeID,
24  File,
25  Func,
26  Public,
27  Stack,
28  CFI,
29  Init,
30  Win,
31 };
32 }
33 
34 template<typename T>
35 static T stringTo(llvm::StringRef Str);
36 
37 template <> Token stringTo<Token>(llvm::StringRef Str) {
38  return llvm::StringSwitch<Token>(Str)
39  .Case("MODULE", Token::Module)
40  .Case("INFO", Token::Info)
41  .Case("CODE_ID", Token::CodeID)
42  .Case("FILE", Token::File)
43  .Case("FUNC", Token::Func)
44  .Case("PUBLIC", Token::Public)
45  .Case("STACK", Token::Stack)
46  .Case("CFI", Token::CFI)
47  .Case("INIT", Token::Init)
48  .Case("WIN", Token::Win)
49  .Default(Token::Unknown);
50 }
51 
52 template <>
53 llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) {
54  using llvm::Triple;
55  return llvm::StringSwitch<Triple::OSType>(Str)
56  .Case("Linux", Triple::Linux)
57  .Case("mac", Triple::MacOSX)
58  .Case("windows", Triple::Win32)
59  .Default(Triple::UnknownOS);
60 }
61 
62 template <>
63 llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) {
64  using llvm::Triple;
65  return llvm::StringSwitch<Triple::ArchType>(Str)
66  .Case("arm", Triple::arm)
67  .Cases("arm64", "arm64e", Triple::aarch64)
68  .Case("mips", Triple::mips)
69  .Case("ppc", Triple::ppc)
70  .Case("ppc64", Triple::ppc64)
71  .Case("s390", Triple::systemz)
72  .Case("sparc", Triple::sparc)
73  .Case("sparcv9", Triple::sparcv9)
74  .Case("x86", Triple::x86)
75  .Cases("x86_64", "x86_64h", Triple::x86_64)
76  .Default(Triple::UnknownArch);
77 }
78 
79 template<typename T>
80 static T consume(llvm::StringRef &Str) {
81  llvm::StringRef Token;
82  std::tie(Token, Str) = getToken(Str);
83  return stringTo<T>(Token);
84 }
85 
86 /// Return the number of hex digits needed to encode an (POD) object of a given
87 /// type.
88 template <typename T> static constexpr size_t hex_digits() {
89  return 2 * sizeof(T);
90 }
91 
92 static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
93  struct data_t {
94  using uuid_t = uint8_t[16];
95  uuid_t uuid;
96  llvm::support::ubig32_t age;
97  } data;
98  static_assert(sizeof(data) == 20, "");
99  // The textual module id encoding should be between 33 and 40 bytes long,
100  // depending on the size of the age field, which is of variable length.
101  // The first three chunks of the id are encoded in big endian, so we need to
102  // byte-swap those.
103  if (str.size() <= hex_digits<data_t::uuid_t>() ||
104  str.size() > hex_digits<data_t>())
105  return UUID();
106  if (!all_of(str, llvm::isHexDigit))
107  return UUID();
108 
109  llvm::StringRef uuid_str = str.take_front(hex_digits<data_t::uuid_t>());
110  llvm::StringRef age_str = str.drop_front(hex_digits<data_t::uuid_t>());
111 
112  llvm::copy(fromHex(uuid_str), data.uuid);
113  uint32_t age;
114  bool success = to_integer(age_str, age, 16);
115  assert(success);
116  (void)success;
117  data.age = age;
118 
119  // On non-windows, the age field should always be zero, so we don't include to
120  // match the native uuid format of these platforms.
121  return UUID::fromData(&data, os == llvm::Triple::Win32 ? sizeof(data)
122  : sizeof(data.uuid));
123 }
124 
125 llvm::Optional<Record::Kind> Record::classify(llvm::StringRef Line) {
126  Token Tok = consume<Token>(Line);
127  switch (Tok) {
128  case Token::Module:
129  return Record::Module;
130  case Token::Info:
131  return Record::Info;
132  case Token::File:
133  return Record::File;
134  case Token::Func:
135  return Record::Func;
136  case Token::Public:
137  return Record::Public;
138  case Token::Stack:
139  Tok = consume<Token>(Line);
140  switch (Tok) {
141  case Token::CFI:
142  return Record::StackCFI;
143  case Token::Win:
144  return Record::StackWin;
145  default:
146  return llvm::None;
147  }
148 
149  case Token::Unknown:
150  // Optimistically assume that any unrecognised token means this is a line
151  // record, those don't have a special keyword and start directly with a
152  // hex number.
153  return Record::Line;
154 
155  case Token::CodeID:
156  case Token::CFI:
157  case Token::Init:
158  case Token::Win:
159  // These should never appear at the start of a valid record.
160  return llvm::None;
161  }
162  llvm_unreachable("Fully covered switch above!");
163 }
164 
165 llvm::Optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
166  // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
167  if (consume<Token>(Line) != Token::Module)
168  return llvm::None;
169 
170  llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line);
171  if (OS == llvm::Triple::UnknownOS)
172  return llvm::None;
173 
174  llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line);
175  if (Arch == llvm::Triple::UnknownArch)
176  return llvm::None;
177 
178  llvm::StringRef Str;
179  std::tie(Str, Line) = getToken(Line);
180  UUID ID = parseModuleId(OS, Str);
181  if (!ID)
182  return llvm::None;
183 
184  return ModuleRecord(OS, Arch, std::move(ID));
185 }
186 
187 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
188  const ModuleRecord &R) {
189  return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " "
190  << llvm::Triple::getArchTypeName(R.Arch) << " "
191  << R.ID.GetAsString();
192 }
193 
194 llvm::Optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
195  // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
196  if (consume<Token>(Line) != Token::Info)
197  return llvm::None;
198 
199  if (consume<Token>(Line) != Token::CodeID)
200  return llvm::None;
201 
202  llvm::StringRef Str;
203  std::tie(Str, Line) = getToken(Line);
204  // If we don't have any text following the code ID (e.g. on linux), we should
205  // use this as the UUID. Otherwise, we should revert back to the module ID.
206  UUID ID;
207  if (Line.trim().empty()) {
208  if (Str.empty() || !ID.SetFromStringRef(Str))
209  return llvm::None;
210  }
211  return InfoRecord(std::move(ID));
212 }
213 
214 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
215  const InfoRecord &R) {
216  return OS << "INFO CODE_ID " << R.ID.GetAsString();
217 }
218 
219 llvm::Optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
220  // FILE number name
221  if (consume<Token>(Line) != Token::File)
222  return llvm::None;
223 
224  llvm::StringRef Str;
225  size_t Number;
226  std::tie(Str, Line) = getToken(Line);
227  if (!to_integer(Str, Number))
228  return llvm::None;
229 
230  llvm::StringRef Name = Line.trim();
231  if (Name.empty())
232  return llvm::None;
233 
234  return FileRecord(Number, Name);
235 }
236 
237 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
238  const FileRecord &R) {
239  return OS << "FILE " << R.Number << " " << R.Name;
240 }
241 
242 static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple,
244  lldb::addr_t &ParamSize, llvm::StringRef &Name) {
245  // PUBLIC [m] address param_size name
246  // or
247  // FUNC [m] address size param_size name
248 
249  Token Tok = Size ? Token::Func : Token::Public;
250 
251  if (consume<Token>(Line) != Tok)
252  return false;
253 
254  llvm::StringRef Str;
255  std::tie(Str, Line) = getToken(Line);
256  Multiple = Str == "m";
257 
258  if (Multiple)
259  std::tie(Str, Line) = getToken(Line);
260  if (!to_integer(Str, Address, 16))
261  return false;
262 
263  if (Tok == Token::Func) {
264  std::tie(Str, Line) = getToken(Line);
265  if (!to_integer(Str, *Size, 16))
266  return false;
267  }
268 
269  std::tie(Str, Line) = getToken(Line);
270  if (!to_integer(Str, ParamSize, 16))
271  return false;
272 
273  Name = Line.trim();
274  if (Name.empty())
275  return false;
276 
277  return true;
278 }
279 
280 llvm::Optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) {
281  bool Multiple;
282  lldb::addr_t Address, Size, ParamSize;
283  llvm::StringRef Name;
284 
285  if (parsePublicOrFunc(Line, Multiple, Address, &Size, ParamSize, Name))
286  return FuncRecord(Multiple, Address, Size, ParamSize, Name);
287 
288  return llvm::None;
289 }
290 
291 bool breakpad::operator==(const FuncRecord &L, const FuncRecord &R) {
292  return L.Multiple == R.Multiple && L.Address == R.Address &&
293  L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name;
294 }
295 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
296  const FuncRecord &R) {
297  return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}",
298  R.Multiple ? "m " : "", R.Address, R.Size,
299  R.ParamSize, R.Name);
300 }
301 
302 llvm::Optional<LineRecord> LineRecord::parse(llvm::StringRef Line) {
304  llvm::StringRef Str;
305  std::tie(Str, Line) = getToken(Line);
306  if (!to_integer(Str, Address, 16))
307  return llvm::None;
308 
309  lldb::addr_t Size;
310  std::tie(Str, Line) = getToken(Line);
311  if (!to_integer(Str, Size, 16))
312  return llvm::None;
313 
314  uint32_t LineNum;
315  std::tie(Str, Line) = getToken(Line);
316  if (!to_integer(Str, LineNum))
317  return llvm::None;
318 
319  size_t FileNum;
320  std::tie(Str, Line) = getToken(Line);
321  if (!to_integer(Str, FileNum))
322  return llvm::None;
323 
324  return LineRecord(Address, Size, LineNum, FileNum);
325 }
326 
327 bool breakpad::operator==(const LineRecord &L, const LineRecord &R) {
328  return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum &&
329  L.FileNum == R.FileNum;
330 }
331 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
332  const LineRecord &R) {
333  return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size,
334  R.LineNum, R.FileNum);
335 }
336 
337 llvm::Optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) {
338  bool Multiple;
339  lldb::addr_t Address, ParamSize;
340  llvm::StringRef Name;
341 
342  if (parsePublicOrFunc(Line, Multiple, Address, nullptr, ParamSize, Name))
343  return PublicRecord(Multiple, Address, ParamSize, Name);
344 
345  return llvm::None;
346 }
347 
349  return L.Multiple == R.Multiple && L.Address == R.Address &&
350  L.ParamSize == R.ParamSize && L.Name == R.Name;
351 }
352 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
353  const PublicRecord &R) {
354  return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}",
355  R.Multiple ? "m " : "", R.Address, R.ParamSize,
356  R.Name);
357 }
358 
359 llvm::Optional<StackCFIRecord> StackCFIRecord::parse(llvm::StringRef Line) {
360  // STACK CFI INIT address size reg1: expr1 reg2: expr2 ...
361  // or
362  // STACK CFI address reg1: expr1 reg2: expr2 ...
363  // No token in exprN ends with a colon.
364 
365  if (consume<Token>(Line) != Token::Stack)
366  return llvm::None;
367  if (consume<Token>(Line) != Token::CFI)
368  return llvm::None;
369 
370  llvm::StringRef Str;
371  std::tie(Str, Line) = getToken(Line);
372 
373  bool IsInitRecord = stringTo<Token>(Str) == Token::Init;
374  if (IsInitRecord)
375  std::tie(Str, Line) = getToken(Line);
376 
378  if (!to_integer(Str, Address, 16))
379  return llvm::None;
380 
381  llvm::Optional<lldb::addr_t> Size;
382  if (IsInitRecord) {
383  Size.emplace();
384  std::tie(Str, Line) = getToken(Line);
385  if (!to_integer(Str, *Size, 16))
386  return llvm::None;
387  }
388 
389  return StackCFIRecord(Address, Size, Line.trim());
390 }
391 
393  return L.Address == R.Address && L.Size == R.Size &&
394  L.UnwindRules == R.UnwindRules;
395 }
396 
397 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
398  const StackCFIRecord &R) {
399  OS << "STACK CFI ";
400  if (R.Size)
401  OS << "INIT ";
402  OS << llvm::formatv("{0:x-} ", R.Address);
403  if (R.Size)
404  OS << llvm::formatv("{0:x-} ", *R.Size);
405  return OS << " " << R.UnwindRules;
406 }
407 
408 llvm::Optional<StackWinRecord> StackWinRecord::parse(llvm::StringRef Line) {
409  // STACK WIN type rva code_size prologue_size epilogue_size parameter_size
410  // saved_register_size local_size max_stack_size has_program_string
411  // program_string_OR_allocates_base_pointer
412 
413  if (consume<Token>(Line) != Token::Stack)
414  return llvm::None;
415  if (consume<Token>(Line) != Token::Win)
416  return llvm::None;
417 
418  llvm::StringRef Str;
419  uint8_t Type;
420  std::tie(Str, Line) = getToken(Line);
421  // Right now we only support the "FrameData" frame type.
422  if (!to_integer(Str, Type) || FrameType(Type) != FrameType::FrameData)
423  return llvm::None;
424 
425  lldb::addr_t RVA;
426  std::tie(Str, Line) = getToken(Line);
427  if (!to_integer(Str, RVA, 16))
428  return llvm::None;
429 
430  lldb::addr_t CodeSize;
431  std::tie(Str, Line) = getToken(Line);
432  if (!to_integer(Str, CodeSize, 16))
433  return llvm::None;
434 
435  // Skip fields which we aren't using right now.
436  std::tie(Str, Line) = getToken(Line); // prologue_size
437  std::tie(Str, Line) = getToken(Line); // epilogue_size
438 
439  lldb::addr_t ParameterSize;
440  std::tie(Str, Line) = getToken(Line);
441  if (!to_integer(Str, ParameterSize, 16))
442  return llvm::None;
443 
444  lldb::addr_t SavedRegisterSize;
445  std::tie(Str, Line) = getToken(Line);
446  if (!to_integer(Str, SavedRegisterSize, 16))
447  return llvm::None;
448 
449  lldb::addr_t LocalSize;
450  std::tie(Str, Line) = getToken(Line);
451  if (!to_integer(Str, LocalSize, 16))
452  return llvm::None;
453 
454  std::tie(Str, Line) = getToken(Line); // max_stack_size
455 
456  uint8_t HasProgramString;
457  std::tie(Str, Line) = getToken(Line);
458  if (!to_integer(Str, HasProgramString))
459  return llvm::None;
460  // FrameData records should always have a program string.
461  if (!HasProgramString)
462  return llvm::None;
463 
464  return StackWinRecord(RVA, CodeSize, ParameterSize, SavedRegisterSize,
465  LocalSize, Line.trim());
466 }
467 
469  return L.RVA == R.RVA && L.CodeSize == R.CodeSize &&
470  L.ParameterSize == R.ParameterSize &&
473 }
474 
475 llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
476  const StackWinRecord &R) {
477  return OS << llvm::formatv(
478  "STACK WIN 4 {0:x-} {1:x-} ? ? {2} {3} {4} ? 1 {5}", R.RVA,
480  R.ProgramString);
481 }
482 
483 llvm::StringRef breakpad::toString(Record::Kind K) {
484  switch (K) {
485  case Record::Module:
486  return "MODULE";
487  case Record::Info:
488  return "INFO";
489  case Record::File:
490  return "FILE";
491  case Record::Func:
492  return "FUNC";
493  case Record::Line:
494  return "LINE";
495  case Record::Public:
496  return "PUBLIC";
497  case Record::StackCFI:
498  return "STACK CFI";
499  case Record::StackWin:
500  return "STACK WIN";
501  }
502  llvm_unreachable("Unknown record kind!");
503 }
static T consume(llvm::StringRef &Str)
A class that represents a running process on the host machine.
static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str)
llvm::Optional< lldb::addr_t > Size
static llvm::Optional< StackCFIRecord > parse(llvm::StringRef Line)
static llvm::Optional< InfoRecord > parse(llvm::StringRef Line)
static llvm::Optional< PublicRecord > parse(llvm::StringRef Line)
llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, Record::Kind K)
static llvm::Optional< FileRecord > parse(llvm::StringRef Line)
An abstract base class for files.
Definition: File.h:34
static llvm::Optional< ModuleRecord > parse(llvm::StringRef Line)
std::string GetAsString(llvm::StringRef separator="-") const
Definition: UUID.cpp:38
static llvm::Optional< FuncRecord > parse(llvm::StringRef Line)
llvm::StringRef toString(Record::Kind K)
A class that describes an executable image and its associated object and symbol files.
Definition: Module.h:75
Token stringTo< Token >(llvm::StringRef Str)
static T stringTo(llvm::StringRef Str)
static char ID
static llvm::Optional< Kind > classify(llvm::StringRef Line)
Attempt to guess the kind of the record present in the argument without doing a full parse...
A section + offset based address class.
Definition: Address.h:59
static llvm::Optional< LineRecord > parse(llvm::StringRef Line)
static UUID fromData(const void *bytes, uint32_t num_bytes)
Creates a UUID from the data pointed to by the bytes argument.
Definition: UUID.h:28
bool operator==(const ModuleRecord &L, const ModuleRecord &R)
uint64_t addr_t
Definition: lldb-types.h:83
unsigned char uuid_t[16]
static llvm::Optional< StackWinRecord > parse(llvm::StringRef Line)
static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple, lldb::addr_t &Address, lldb::addr_t *Size, lldb::addr_t &ParamSize, llvm::StringRef &Name)
bool SetFromStringRef(llvm::StringRef str)
Definition: UUID.cpp:86
static constexpr size_t hex_digits()
Return the number of hex digits needed to encode an (POD) object of a given type. ...