LLDB mainline
BreakpadRecords.cpp
Go to the documentation of this file.
1//===-- BreakpadRecords.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "lldb/lldb-defines.h"
11#include "llvm/ADT/StringExtras.h"
12#include "llvm/ADT/StringSwitch.h"
13#include "llvm/Support/Endian.h"
14#include "llvm/Support/FormatVariadic.h"
15#include <optional>
16
17using namespace lldb_private;
18using namespace lldb_private::breakpad;
19
20namespace {
21enum class Token {
22 Unknown,
23 Module,
24 Info,
25 CodeID,
26 File,
27 Func,
28 Inline,
29 InlineOrigin,
30 Public,
31 Stack,
32 CFI,
33 Init,
34 Win,
35};
36}
37
38template<typename T>
39static T stringTo(llvm::StringRef Str);
40
41template <> Token stringTo<Token>(llvm::StringRef Str) {
42 return llvm::StringSwitch<Token>(Str)
43 .Case("MODULE", Token::Module)
44 .Case("INFO", Token::Info)
45 .Case("CODE_ID", Token::CodeID)
46 .Case("FILE", Token::File)
47 .Case("FUNC", Token::Func)
48 .Case("INLINE", Token::Inline)
49 .Case("INLINE_ORIGIN", Token::InlineOrigin)
50 .Case("PUBLIC", Token::Public)
51 .Case("STACK", Token::Stack)
52 .Case("CFI", Token::CFI)
53 .Case("INIT", Token::Init)
54 .Case("WIN", Token::Win)
55 .Default(Token::Unknown);
56}
57
58template <>
59llvm::Triple::OSType stringTo<llvm::Triple::OSType>(llvm::StringRef Str) {
60 using llvm::Triple;
61 return llvm::StringSwitch<Triple::OSType>(Str)
62 .Case("Linux", Triple::Linux)
63 .Case("mac", Triple::MacOSX)
64 .Case("windows", Triple::Win32)
65 .Default(Triple::UnknownOS);
66}
67
68template <>
69llvm::Triple::ArchType stringTo<llvm::Triple::ArchType>(llvm::StringRef Str) {
70 using llvm::Triple;
71 return llvm::StringSwitch<Triple::ArchType>(Str)
72 .Case("arm", Triple::arm)
73 .Cases("arm64", "arm64e", Triple::aarch64)
74 .Case("mips", Triple::mips)
75 .Case("msp430", Triple::msp430)
76 .Case("ppc", Triple::ppc)
77 .Case("ppc64", Triple::ppc64)
78 .Case("s390", Triple::systemz)
79 .Case("sparc", Triple::sparc)
80 .Case("sparcv9", Triple::sparcv9)
81 .Case("x86", Triple::x86)
82 .Cases("x86_64", "x86_64h", Triple::x86_64)
83 .Default(Triple::UnknownArch);
84}
85
86template<typename T>
87static T consume(llvm::StringRef &Str) {
88 llvm::StringRef Token;
89 std::tie(Token, Str) = getToken(Str);
90 return stringTo<T>(Token);
91}
92
93/// Return the number of hex digits needed to encode an (POD) object of a given
94/// type.
95template <typename T> static constexpr size_t hex_digits() {
96 return 2 * sizeof(T);
97}
98
99static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str) {
100 struct data_t {
101 using uuid_t = uint8_t[16];
102 uuid_t uuid;
103 llvm::support::ubig32_t age;
104 } data;
105 static_assert(sizeof(data) == 20);
106 // The textual module id encoding should be between 33 and 40 bytes long,
107 // depending on the size of the age field, which is of variable length.
108 // The first three chunks of the id are encoded in big endian, so we need to
109 // byte-swap those.
110 if (str.size() <= hex_digits<data_t::uuid_t>() ||
111 str.size() > hex_digits<data_t>())
112 return UUID();
113 if (!all_of(str, llvm::isHexDigit))
114 return UUID();
115
116 llvm::StringRef uuid_str = str.take_front(hex_digits<data_t::uuid_t>());
117 llvm::StringRef age_str = str.drop_front(hex_digits<data_t::uuid_t>());
118
119 llvm::copy(fromHex(uuid_str), data.uuid);
120 uint32_t age;
121 bool success = to_integer(age_str, age, 16);
122 assert(success);
124 data.age = age;
125
126 // On non-windows, the age field should always be zero, so we don't include to
127 // match the native uuid format of these platforms.
128 return UUID(&data, os == llvm::Triple::Win32 ? sizeof(data)
129 : sizeof(data.uuid));
130}
131
132std::optional<Record::Kind> Record::classify(llvm::StringRef Line) {
133 Token Tok = consume<Token>(Line);
134 switch (Tok) {
135 case Token::Module:
136 return Record::Module;
137 case Token::Info:
138 return Record::Info;
139 case Token::File:
140 return Record::File;
141 case Token::Func:
142 return Record::Func;
143 case Token::Public:
144 return Record::Public;
145 case Token::Stack:
146 Tok = consume<Token>(Line);
147 switch (Tok) {
148 case Token::CFI:
149 return Record::StackCFI;
150 case Token::Win:
151 return Record::StackWin;
152 default:
153 return std::nullopt;
154 }
155 case Token::Inline:
156 return Record::Inline;
157 case Token::InlineOrigin:
159 case Token::Unknown:
160 // Optimistically assume that any unrecognised token means this is a line
161 // record, those don't have a special keyword and start directly with a
162 // hex number.
163 return Record::Line;
164
165 case Token::CodeID:
166 case Token::CFI:
167 case Token::Init:
168 case Token::Win:
169 // These should never appear at the start of a valid record.
170 return std::nullopt;
171 }
172 llvm_unreachable("Fully covered switch above!");
173}
174
175std::optional<ModuleRecord> ModuleRecord::parse(llvm::StringRef Line) {
176 // MODULE Linux x86_64 E5894855C35DCCCCCCCCCCCCCCCCCCCC0 a.out
177 if (consume<Token>(Line) != Token::Module)
178 return std::nullopt;
179
180 llvm::Triple::OSType OS = consume<llvm::Triple::OSType>(Line);
181 if (OS == llvm::Triple::UnknownOS)
182 return std::nullopt;
183
184 llvm::Triple::ArchType Arch = consume<llvm::Triple::ArchType>(Line);
185 if (Arch == llvm::Triple::UnknownArch)
186 return std::nullopt;
187
188 llvm::StringRef Str;
189 std::tie(Str, Line) = getToken(Line);
190 UUID ID = parseModuleId(OS, Str);
191 if (!ID)
192 return std::nullopt;
193
194 return ModuleRecord(OS, Arch, std::move(ID));
195}
196
197llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
198 const ModuleRecord &R) {
199 return OS << "MODULE " << llvm::Triple::getOSTypeName(R.OS) << " "
200 << llvm::Triple::getArchTypeName(R.Arch) << " "
201 << R.ID.GetAsString();
202}
203
204std::optional<InfoRecord> InfoRecord::parse(llvm::StringRef Line) {
205 // INFO CODE_ID 554889E55DC3CCCCCCCCCCCCCCCCCCCC [a.exe]
206 if (consume<Token>(Line) != Token::Info)
207 return std::nullopt;
208
209 if (consume<Token>(Line) != Token::CodeID)
210 return std::nullopt;
211
212 llvm::StringRef Str;
213 std::tie(Str, Line) = getToken(Line);
214 // If we don't have any text following the code ID (e.g. on linux), we should
215 // use this as the UUID. Otherwise, we should revert back to the module ID.
216 UUID ID;
217 if (Line.trim().empty()) {
218 if (Str.empty() || !ID.SetFromStringRef(Str))
219 return std::nullopt;
220 }
221 return InfoRecord(std::move(ID));
222}
223
224llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
225 const InfoRecord &R) {
226 return OS << "INFO CODE_ID " << R.ID.GetAsString();
227}
228
229template <typename T>
230static std::optional<T> parseNumberName(llvm::StringRef Line, Token TokenType) {
231 // TOKEN number name
232 if (consume<Token>(Line) != TokenType)
233 return std::nullopt;
234
235 llvm::StringRef Str;
236 size_t Number;
237 std::tie(Str, Line) = getToken(Line);
238 if (!to_integer(Str, Number))
239 return std::nullopt;
240
241 llvm::StringRef Name = Line.trim();
242 if (Name.empty())
243 return std::nullopt;
244
245 return T(Number, Name);
246}
247
248std::optional<FileRecord> FileRecord::parse(llvm::StringRef Line) {
249 // FILE number name
250 return parseNumberName<FileRecord>(Line, Token::File);
251}
252
253llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
254 const FileRecord &R) {
255 return OS << "FILE " << R.Number << " " << R.Name;
256}
257
258std::optional<InlineOriginRecord>
259InlineOriginRecord::parse(llvm::StringRef Line) {
260 // INLINE_ORIGIN number name
261 return parseNumberName<InlineOriginRecord>(Line, Token::InlineOrigin);
262}
263
264llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
265 const InlineOriginRecord &R) {
266 return OS << "INLINE_ORIGIN " << R.Number << " " << R.Name;
267}
268
269static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple,
271 lldb::addr_t &ParamSize, llvm::StringRef &Name) {
272 // PUBLIC [m] address param_size name
273 // or
274 // FUNC [m] address size param_size name
275
276 Token Tok = Size ? Token::Func : Token::Public;
277
278 if (consume<Token>(Line) != Tok)
279 return false;
280
281 llvm::StringRef Str;
282 std::tie(Str, Line) = getToken(Line);
283 Multiple = Str == "m";
284
285 if (Multiple)
286 std::tie(Str, Line) = getToken(Line);
287 if (!to_integer(Str, Address, 16))
288 return false;
289
290 if (Tok == Token::Func) {
291 std::tie(Str, Line) = getToken(Line);
292 if (!to_integer(Str, *Size, 16))
293 return false;
294 }
295
296 std::tie(Str, Line) = getToken(Line);
297 if (!to_integer(Str, ParamSize, 16))
298 return false;
299
300 Name = Line.trim();
301 if (Name.empty())
302 return false;
303
304 return true;
305}
306
307std::optional<FuncRecord> FuncRecord::parse(llvm::StringRef Line) {
308 bool Multiple;
310 llvm::StringRef Name;
311
314
315 return std::nullopt;
316}
317
319 return L.Multiple == R.Multiple && L.Address == R.Address &&
320 L.Size == R.Size && L.ParamSize == R.ParamSize && L.Name == R.Name;
321}
322llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
323 const FuncRecord &R) {
324 return OS << llvm::formatv("FUNC {0}{1:x-} {2:x-} {3:x-} {4}",
325 R.Multiple ? "m " : "", R.Address, R.Size,
326 R.ParamSize, R.Name);
327}
328
329std::optional<InlineRecord> InlineRecord::parse(llvm::StringRef Line) {
330 // INLINE inline_nest_level call_site_line call_site_file_num origin_num
331 // [address size]+
332 if (consume<Token>(Line) != Token::Inline)
333 return std::nullopt;
334
335 llvm::SmallVector<llvm::StringRef> Tokens;
336 SplitString(Line, Tokens, " ");
337 if (Tokens.size() < 6 || Tokens.size() % 2 == 1)
338 return std::nullopt;
339
340 size_t InlineNestLevel;
341 uint32_t CallSiteLineNum;
342 size_t CallSiteFileNum;
343 size_t OriginNum;
344 if (!(to_integer(Tokens[0], InlineNestLevel) &&
345 to_integer(Tokens[1], CallSiteLineNum) &&
346 to_integer(Tokens[2], CallSiteFileNum) &&
347 to_integer(Tokens[3], OriginNum)))
348 return std::nullopt;
349
352 for (size_t i = 4; i < Tokens.size(); i += 2) {
354 if (!to_integer(Tokens[i], Address, 16))
355 return std::nullopt;
356 lldb::addr_t Size;
357 if (!to_integer(Tokens[i + 1].trim(), Size, 16))
358 return std::nullopt;
359 Record.Ranges.emplace_back(Address, Size);
360 }
361 return Record;
362}
363
365 return L.InlineNestLevel == R.InlineNestLevel &&
368 L.Ranges == R.Ranges;
369}
370
371llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
372 const InlineRecord &R) {
373 OS << llvm::formatv("INLINE {0} {1} {2} {3}", R.InlineNestLevel,
375 for (const auto &range : R.Ranges) {
376 OS << llvm::formatv(" {0:x-} {1:x-}", range.first, range.second);
377 }
378 return OS;
379}
380
381std::optional<LineRecord> LineRecord::parse(llvm::StringRef Line) {
383 llvm::StringRef Str;
384 std::tie(Str, Line) = getToken(Line);
385 if (!to_integer(Str, Address, 16))
386 return std::nullopt;
387
389 std::tie(Str, Line) = getToken(Line);
390 if (!to_integer(Str, Size, 16))
391 return std::nullopt;
392
393 uint32_t LineNum;
394 std::tie(Str, Line) = getToken(Line);
395 if (!to_integer(Str, LineNum))
396 return std::nullopt;
397
398 size_t FileNum;
399 std::tie(Str, Line) = getToken(Line);
400 if (!to_integer(Str, FileNum))
401 return std::nullopt;
402
404}
405
407 return L.Address == R.Address && L.Size == R.Size && L.LineNum == R.LineNum &&
408 L.FileNum == R.FileNum;
409}
410llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
411 const LineRecord &R) {
412 return OS << llvm::formatv("{0:x-} {1:x-} {2} {3}", R.Address, R.Size,
413 R.LineNum, R.FileNum);
414}
415
416std::optional<PublicRecord> PublicRecord::parse(llvm::StringRef Line) {
417 bool Multiple;
419 llvm::StringRef Name;
420
423
424 return std::nullopt;
425}
426
428 return L.Multiple == R.Multiple && L.Address == R.Address &&
429 L.ParamSize == R.ParamSize && L.Name == R.Name;
430}
431llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
432 const PublicRecord &R) {
433 return OS << llvm::formatv("PUBLIC {0}{1:x-} {2:x-} {3}",
434 R.Multiple ? "m " : "", R.Address, R.ParamSize,
435 R.Name);
436}
437
438std::optional<StackCFIRecord> StackCFIRecord::parse(llvm::StringRef Line) {
439 // STACK CFI INIT address size reg1: expr1 reg2: expr2 ...
440 // or
441 // STACK CFI address reg1: expr1 reg2: expr2 ...
442 // No token in exprN ends with a colon.
443
444 if (consume<Token>(Line) != Token::Stack)
445 return std::nullopt;
446 if (consume<Token>(Line) != Token::CFI)
447 return std::nullopt;
448
449 llvm::StringRef Str;
450 std::tie(Str, Line) = getToken(Line);
451
452 bool IsInitRecord = stringTo<Token>(Str) == Token::Init;
453 if (IsInitRecord)
454 std::tie(Str, Line) = getToken(Line);
455
457 if (!to_integer(Str, Address, 16))
458 return std::nullopt;
459
460 std::optional<lldb::addr_t> Size;
461 if (IsInitRecord) {
462 Size.emplace();
463 std::tie(Str, Line) = getToken(Line);
464 if (!to_integer(Str, *Size, 16))
465 return std::nullopt;
466 }
467
468 return StackCFIRecord(Address, Size, Line.trim());
469}
470
472 return L.Address == R.Address && L.Size == R.Size &&
474}
475
476llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
477 const StackCFIRecord &R) {
478 OS << "STACK CFI ";
479 if (R.Size)
480 OS << "INIT ";
481 OS << llvm::formatv("{0:x-} ", R.Address);
482 if (R.Size)
483 OS << llvm::formatv("{0:x-} ", *R.Size);
484 return OS << " " << R.UnwindRules;
485}
486
487std::optional<StackWinRecord> StackWinRecord::parse(llvm::StringRef Line) {
488 // STACK WIN type rva code_size prologue_size epilogue_size parameter_size
489 // saved_register_size local_size max_stack_size has_program_string
490 // program_string_OR_allocates_base_pointer
491
492 if (consume<Token>(Line) != Token::Stack)
493 return std::nullopt;
494 if (consume<Token>(Line) != Token::Win)
495 return std::nullopt;
496
497 llvm::StringRef Str;
498 uint8_t Type;
499 std::tie(Str, Line) = getToken(Line);
500 // Right now we only support the "FrameData" frame type.
501 if (!to_integer(Str, Type) || FrameType(Type) != FrameType::FrameData)
502 return std::nullopt;
503
505 std::tie(Str, Line) = getToken(Line);
506 if (!to_integer(Str, RVA, 16))
507 return std::nullopt;
508
510 std::tie(Str, Line) = getToken(Line);
511 if (!to_integer(Str, CodeSize, 16))
512 return std::nullopt;
513
514 // Skip fields which we aren't using right now.
515 std::tie(Str, Line) = getToken(Line); // prologue_size
516 std::tie(Str, Line) = getToken(Line); // epilogue_size
517
519 std::tie(Str, Line) = getToken(Line);
520 if (!to_integer(Str, ParameterSize, 16))
521 return std::nullopt;
522
524 std::tie(Str, Line) = getToken(Line);
525 if (!to_integer(Str, SavedRegisterSize, 16))
526 return std::nullopt;
527
529 std::tie(Str, Line) = getToken(Line);
530 if (!to_integer(Str, LocalSize, 16))
531 return std::nullopt;
532
533 std::tie(Str, Line) = getToken(Line); // max_stack_size
534
535 uint8_t HasProgramString;
536 std::tie(Str, Line) = getToken(Line);
537 if (!to_integer(Str, HasProgramString))
538 return std::nullopt;
539 // FrameData records should always have a program string.
540 if (!HasProgramString)
541 return std::nullopt;
542
544 LocalSize, Line.trim());
545}
546
548 return L.RVA == R.RVA && L.CodeSize == R.CodeSize &&
552}
553
554llvm::raw_ostream &breakpad::operator<<(llvm::raw_ostream &OS,
555 const StackWinRecord &R) {
556 return OS << llvm::formatv(
557 "STACK WIN 4 {0:x-} {1:x-} ? ? {2} {3} {4} ? 1 {5}", R.RVA,
559 R.ProgramString);
560}
561
563 switch (K) {
564 case Record::Module:
565 return "MODULE";
566 case Record::Info:
567 return "INFO";
568 case Record::File:
569 return "FILE";
570 case Record::Func:
571 return "FUNC";
572 case Record::Inline:
573 return "INLINE";
575 return "INLINE_ORIGIN";
576 case Record::Line:
577 return "LINE";
578 case Record::Public:
579 return "PUBLIC";
580 case Record::StackCFI:
581 return "STACK CFI";
582 case Record::StackWin:
583 return "STACK WIN";
584 }
585 llvm_unreachable("Unknown record kind!");
586}
unsigned char uuid_t[16]
static T consume(llvm::StringRef &Str)
static UUID parseModuleId(llvm::Triple::OSType os, llvm::StringRef str)
static constexpr size_t hex_digits()
Return the number of hex digits needed to encode an (POD) object of a given type.
llvm::Triple::ArchType stringTo< llvm::Triple::ArchType >(llvm::StringRef Str)
static T stringTo(llvm::StringRef Str)
llvm::Triple::OSType stringTo< llvm::Triple::OSType >(llvm::StringRef Str)
static bool parsePublicOrFunc(llvm::StringRef Line, bool &Multiple, lldb::addr_t &Address, lldb::addr_t *Size, lldb::addr_t &ParamSize, llvm::StringRef &Name)
Token stringTo< Token >(llvm::StringRef Str)
static std::optional< T > parseNumberName(llvm::StringRef Line, Token TokenType)
A section + offset based address class.
Definition: Address.h:62
An abstract base class for files.
Definition: File.h:36
A class that describes an executable image and its associated object and symbol files.
Definition: Module.h:88
bool SetFromStringRef(llvm::StringRef str)
Definition: UUID.cpp:97
std::string GetAsString(llvm::StringRef separator="-") const
Definition: UUID.cpp:49
static std::optional< FileRecord > parse(llvm::StringRef Line)
static std::optional< FuncRecord > parse(llvm::StringRef Line)
static std::optional< InfoRecord > parse(llvm::StringRef Line)
static std::optional< InlineOriginRecord > parse(llvm::StringRef Line)
static std::optional< InlineRecord > parse(llvm::StringRef Line)
std::vector< std::pair< lldb::addr_t, lldb::addr_t > > Ranges
static std::optional< LineRecord > parse(llvm::StringRef Line)
static std::optional< ModuleRecord > parse(llvm::StringRef Line)
static std::optional< PublicRecord > parse(llvm::StringRef Line)
static std::optional< Kind > classify(llvm::StringRef Line)
Attempt to guess the kind of the record present in the argument without doing a full parse.
std::optional< lldb::addr_t > Size
static std::optional< StackCFIRecord > parse(llvm::StringRef Line)
static std::optional< StackWinRecord > parse(llvm::StringRef Line)
#define UNUSED_IF_ASSERT_DISABLED(x)
Definition: lldb-defines.h:140
llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, Record::Kind K)
bool operator==(const ModuleRecord &L, const ModuleRecord &R)
llvm::StringRef toString(Record::Kind K)
A class that represents a running process on the host machine.
uint64_t addr_t
Definition: lldb-types.h:80