LLDB mainline
Mangled.cpp
Go to the documentation of this file.
1//===-- Mangled.cpp -------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "lldb/Core/Mangled.h"
10
17#include "lldb/Utility/Log.h"
19#include "lldb/Utility/Stream.h"
21
22#include "llvm/ADT/StringExtras.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/Demangle/Demangle.h"
25#include "llvm/Support/Compiler.h"
26
27#include <mutex>
28#include <string>
29#include <string_view>
30#include <utility>
31
32#include <cstdlib>
33#include <cstring>
34using namespace lldb_private;
35
36static inline bool cstring_is_mangled(llvm::StringRef s) {
38}
39
40#pragma mark Mangled
41
43 if (name.empty())
45
46 if (name.starts_with("?"))
48
49 if (name.starts_with("_R"))
51
52 if (name.starts_with("_D")) {
53 // A dlang mangled name begins with `_D`, followed by a numeric length. One
54 // known exception is the symbol `_Dmain`.
55 // See `SymbolName` and `LName` in
56 // https://dlang.org/spec/abi.html#name_mangling
57 llvm::StringRef buf = name.drop_front(2);
58 if (!buf.empty() && (llvm::isDigit(buf.front()) || name == "_Dmain"))
60 }
61
62 if (name.starts_with("_Z"))
64
65 // ___Z is a clang extension of block invocations
66 if (name.starts_with("___Z"))
68
69 // Swift's older style of mangling used "_T" as a mangling prefix. This can
70 // lead to false positives with other symbols that just so happen to start
71 // with "_T". To minimize the chance of that happening, we only return true
72 // for select old-style swift mangled names. The known cases are ObjC classes
73 // and protocols. Classes are either prefixed with "_TtC" or "_TtGC".
74 // Protocols are prefixed with "_TtP".
75 if (name.starts_with("_TtC") || name.starts_with("_TtGC") ||
76 name.starts_with("_TtP"))
78
79 // Swift 4.2 used "$S" and "_$S".
80 // Swift 5 and onward uses "$s" and "_$s".
81 // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames.
82 // Embedded Swift introduced "$e" and "_$e" as Swift mangling prefixes.
83 if (name.starts_with("$S") || name.starts_with("_$S") ||
84 name.starts_with("$s") || name.starts_with("_$s") ||
85 name.starts_with("$e") || name.starts_with("_$e") ||
86 name.starts_with("@__swiftmacro_"))
88
90}
91
92Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() {
93 if (s)
94 SetValue(s);
95}
96
97Mangled::Mangled(llvm::StringRef name) {
98 if (!name.empty())
99 SetValue(ConstString(name));
100}
101
102// Convert to bool operator. This allows code to check any Mangled objects
103// to see if they contain anything valid using code such as:
104//
105// Mangled mangled(...);
106// if (mangled)
107// { ...
108Mangled::operator bool() const { return m_mangled || m_demangled; }
109
110// Clear the mangled and demangled values.
114}
115
116// Compare the string values.
117int Mangled::Compare(const Mangled &a, const Mangled &b) {
120}
121
123 if (name) {
124 if (cstring_is_mangled(name.GetStringRef())) {
126 m_mangled = name;
127 } else {
128 m_demangled = name;
130 }
131 } else {
134 }
135}
136
137// Local helpers for different demangling implementations.
138static char *GetMSVCDemangledStr(llvm::StringRef M) {
139 char *demangled_cstr = llvm::microsoftDemangle(
140 M, nullptr, nullptr,
141 llvm::MSDemangleFlags(
142 llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention |
143 llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType));
144
145 if (Log *log = GetLog(LLDBLog::Demangle)) {
146 if (demangled_cstr && demangled_cstr[0])
147 LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M.data(), demangled_cstr);
148 else
149 LLDB_LOGF(log, "demangled msvc: %s -> error", M.data());
150 }
151
152 return demangled_cstr;
153}
154
155static char *GetItaniumDemangledStr(const char *M) {
156 char *demangled_cstr = nullptr;
157
158 llvm::ItaniumPartialDemangler ipd;
159 bool err = ipd.partialDemangle(M);
160 if (!err) {
161 // Default buffer and size (will realloc in case it's too small).
162 size_t demangled_size = 80;
163 demangled_cstr = static_cast<char *>(std::malloc(demangled_size));
164 demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size);
165
166 assert(demangled_cstr &&
167 "finishDemangle must always succeed if partialDemangle did");
168 assert(demangled_cstr[demangled_size - 1] == '\0' &&
169 "Expected demangled_size to return length including trailing null");
170 }
171
172 if (Log *log = GetLog(LLDBLog::Demangle)) {
173 if (demangled_cstr)
174 LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr);
175 else
176 LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M);
177 }
178
179 return demangled_cstr;
180}
181
182static char *GetRustV0DemangledStr(llvm::StringRef M) {
183 char *demangled_cstr = llvm::rustDemangle(M);
184
185 if (Log *log = GetLog(LLDBLog::Demangle)) {
186 if (demangled_cstr && demangled_cstr[0])
187 LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr);
188 else
189 LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle",
190 static_cast<std::string_view>(M));
191 }
192
193 return demangled_cstr;
194}
195
196static char *GetDLangDemangledStr(llvm::StringRef M) {
197 char *demangled_cstr = llvm::dlangDemangle(M);
198
199 if (Log *log = GetLog(LLDBLog::Demangle)) {
200 if (demangled_cstr && demangled_cstr[0])
201 LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);
202 else
203 LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle",
204 static_cast<std::string_view>(M));
205 }
206
207 return demangled_cstr;
208}
209
210// Explicit demangling for scheduled requests during batch processing. This
211// makes use of ItaniumPartialDemangler's rich demangle info
213 SkipMangledNameFn *skip_mangled_name) {
214 // Others are not meant to arrive here. ObjC names or C's main() for example
215 // have their names stored in m_demangled, while m_mangled is empty.
216 assert(m_mangled);
217
218 // Check whether or not we are interested in this name at all.
220 if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme))
221 return false;
222
223 switch (scheme) {
225 // The current mangled_name_filter would allow llvm_unreachable here.
226 return false;
227
229 // We want the rich mangling info here, so we don't care whether or not
230 // there is a demangled string in the pool already.
231 return context.FromItaniumName(m_mangled);
232
233 case eManglingSchemeMSVC: {
234 // We have no rich mangling for MSVC-mangled names yet, so first try to
235 // demangle it if necessary.
237 if (char *d = GetMSVCDemangledStr(m_mangled)) {
238 // Without the rich mangling info we have to demangle the full name.
239 // Copy it to string pool and connect the counterparts to accelerate
240 // later access in GetDemangledName().
242 m_mangled);
243 ::free(d);
244 } else {
246 }
247 }
248
249 if (m_demangled.IsEmpty()) {
250 // Cannot demangle it, so don't try parsing.
251 return false;
252 } else {
253 // Demangled successfully, we can try and parse it with
254 // CPlusPlusLanguage::MethodName.
255 return context.FromCxxMethodName(m_demangled);
256 }
257 }
258
260 case eManglingSchemeD:
262 // Rich demangling scheme is not supported
263 return false;
264 }
265 llvm_unreachable("Fully covered switch above!");
266}
267
268// Generate the demangled name on demand using this accessor. Code in this
269// class will need to use this accessor if it wishes to decode the demangled
270// name. The result is cached and will be kept until a new string value is
271// supplied to this object, or until the end of the object's lifetime.
273 // Check to make sure we have a valid mangled name and that we haven't
274 // already decoded our mangled name.
275 if (m_mangled && m_demangled.IsNull()) {
276 // Don't bother running anything that isn't mangled
277 const char *mangled_name = m_mangled.GetCString();
278 ManglingScheme mangling_scheme =
280 if (mangling_scheme != eManglingSchemeNone &&
282 // We didn't already mangle this name, demangle it and if all goes well
283 // add it to our map.
284 char *demangled_name = nullptr;
285 switch (mangling_scheme) {
287 demangled_name = GetMSVCDemangledStr(mangled_name);
288 break;
290 demangled_name = GetItaniumDemangledStr(mangled_name);
291 break;
292 }
294 demangled_name = GetRustV0DemangledStr(m_mangled);
295 break;
296 case eManglingSchemeD:
297 demangled_name = GetDLangDemangledStr(m_mangled);
298 break;
300 // Demangling a swift name requires the swift compiler. This is
301 // explicitly unsupported on llvm.org.
302 break;
304 llvm_unreachable("eManglingSchemeNone was handled already");
305 }
306 if (demangled_name) {
308 llvm::StringRef(demangled_name), m_mangled);
309 free(demangled_name);
310 }
311 }
312 if (m_demangled.IsNull()) {
313 // Set the demangled string to the empty string to indicate we tried to
314 // parse it once and failed.
316 }
317 }
318
319 return m_demangled;
320}
321
324 return lang->GetDisplayDemangledName(*this);
325 return GetDemangledName();
326}
327
328bool Mangled::NameMatches(const RegularExpression &regex) const {
329 if (m_mangled && regex.Execute(m_mangled.GetStringRef()))
330 return true;
331
332 ConstString demangled = GetDemangledName();
333 return demangled && regex.Execute(demangled.GetStringRef());
334}
335
336// Get the demangled name if there is one, else return the mangled name.
338 if (preference == ePreferMangled && m_mangled)
339 return m_mangled;
340
341 // Call the accessor to make sure we get a demangled name in case it hasn't
342 // been demangled yet...
343 ConstString demangled = GetDemangledName();
344
345 if (preference == ePreferDemangledWithoutArguments) {
347 return lang->GetDemangledFunctionNameWithoutArguments(*this);
348 }
349 }
350 if (preference == ePreferDemangled) {
351 if (demangled)
352 return demangled;
353 return m_mangled;
354 }
355 return demangled;
356}
357
358// Dump a Mangled object to stream "s". We don't force our demangled name to be
359// computed currently (we don't use the accessor).
360void Mangled::Dump(Stream *s) const {
361 if (m_mangled) {
362 *s << ", mangled = " << m_mangled;
363 }
364 if (m_demangled) {
365 const char *demangled = m_demangled.AsCString();
366 s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>");
367 }
368}
369
370// Dumps a debug version of this string with extra object and state information
371// to stream "s".
373 s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),
374 static_cast<const void *>(this));
376 s->Printf(", demangled = ");
378}
379
380// Return the size in byte that this object takes in memory. The size includes
381// the size of the objects it owns, and not the strings that it references
382// because they are shared strings.
383size_t Mangled::MemorySize() const {
385}
386
387// We "guess" the language because we can't determine a symbol's language from
388// it's name. For example, a Pascal symbol can be mangled using the C++
389// Itanium scheme, and defined in a compilation unit within the same module as
390// other C++ units. In addition, different targets could have different ways
391// of mangling names from a given language, likewise the compilation units
392// within those targets.
395 // Ask each language plugin to check if the mangled name belongs to it.
396 Language::ForEach([this, &result](Language *l) {
397 if (l->SymbolNameFitsToLanguage(*this)) {
398 result = l->GetLanguageType();
399 return false;
400 }
401 return true;
402 });
403 return result;
404}
405
406// Dump OBJ to the supplied stream S.
407Stream &operator<<(Stream &s, const Mangled &obj) {
408 if (obj.GetMangledName())
409 s << "mangled = '" << obj.GetMangledName() << "'";
410
411 ConstString demangled = obj.GetDemangledName();
412 if (demangled)
413 s << ", demangled = '" << demangled << '\'';
414 else
415 s << ", demangled = <error>";
416 return s;
417}
418
419// When encoding Mangled objects we can get away with encoding as little
420// information as is required. The enumeration below helps us to efficiently
421// encode Mangled objects.
423 /// If the Mangled object has neither a mangled name or demangled name we can
424 /// encode the object with one zero byte using the Empty enumeration.
425 Empty = 0u,
426 /// If the Mangled object has only a demangled name and no mangled named, we
427 /// can encode only the demangled name.
429 /// If the mangle name can calculate the demangled name (it is the
430 /// mangled/demangled counterpart), then we only need to encode the mangled
431 /// name as the demangled name can be recomputed.
433 /// If we have a Mangled object with two different names that are not related
434 /// then we need to save both strings. This can happen if we have a name that
435 /// isn't a true mangled name, but we want to be able to lookup a symbol by
436 /// name and type in the symbol table. We do this for Objective C symbols like
437 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to
438 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to
439 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it
440 /// would fail, but in these cases we want these unrelated names to be
441 /// preserved.
444
445bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
446 const StringTableReader &strtab) {
449 MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr);
450 switch (encoding) {
451 case Empty:
452 return true;
453
454 case DemangledOnly:
455 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
456 return true;
457
458 case MangledOnly:
459 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
460 return true;
461
463 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
464 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
465 return true;
466 }
467 return false;
468}
469/// The encoding format for the Mangled object is as follows:
470///
471/// uint8_t encoding;
472/// char str1[]; (only if DemangledOnly, MangledOnly)
473/// char str2[]; (only if MangledAndDemangled)
474///
475/// The strings are stored as NULL terminated UTF8 strings and str1 and str2
476/// are only saved if we need them based on the encoding.
477///
478/// Some mangled names have a mangled name that can be demangled by the built
479/// in demanglers. These kinds of mangled objects know when the mangled and
480/// demangled names are the counterparts for each other. This is done because
481/// demangling is very expensive and avoiding demangling the same name twice
482/// saves us a lot of compute time. For these kinds of names we only need to
483/// save the mangled name and have the encoding set to "MangledOnly".
484///
485/// If a mangled obejct has only a demangled name, then we save only that string
486/// and have the encoding set to "DemangledOnly".
487///
488/// Some mangled objects have both mangled and demangled names, but the
489/// demangled name can not be computed from the mangled name. This is often used
490/// for runtime named, like Objective C runtime V2 and V3 names. Both these
491/// names must be saved and the encoding is set to "MangledAndDemangled".
492///
493/// For a Mangled object with no names, we only need to set the encoding to
494/// "Empty" and not store any string values.
495void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const {
496 MangledEncoding encoding = Empty;
497 if (m_mangled) {
498 encoding = MangledOnly;
499 if (m_demangled) {
500 // We have both mangled and demangled names. If the demangled name is the
501 // counterpart of the mangled name, then we only need to save the mangled
502 // named. If they are different, we need to save both.
503 ConstString s;
505 encoding = MangledAndDemangled;
506 }
507 } else if (m_demangled) {
508 encoding = DemangledOnly;
509 }
510 file.AppendU8(encoding);
511 switch (encoding) {
512 case Empty:
513 break;
514 case DemangledOnly:
515 file.AppendU32(strtab.Add(m_demangled));
516 break;
517 case MangledOnly:
518 file.AppendU32(strtab.Add(m_mangled));
519 break;
521 file.AppendU32(strtab.Add(m_mangled));
522 file.AppendU32(strtab.Add(m_demangled));
523 break;
524 }
525}
#define LLDB_LOG(log,...)
The LLDB_LOG* macros defined below are the way to emit log messages.
Definition: Log.h:369
#define LLDB_LOGF(log,...)
Definition: Log.h:376
static char * GetDLangDemangledStr(llvm::StringRef M)
Definition: Mangled.cpp:196
static char * GetRustV0DemangledStr(llvm::StringRef M)
Definition: Mangled.cpp:182
MangledEncoding
Definition: Mangled.cpp:422
@ MangledAndDemangled
If we have a Mangled object with two different names that are not related then we need to save both s...
Definition: Mangled.cpp:442
@ DemangledOnly
If the Mangled object has only a demangled name and no mangled named, we can encode only the demangle...
Definition: Mangled.cpp:428
@ Empty
If the Mangled object has neither a mangled name or demangled name we can encode the object with one ...
Definition: Mangled.cpp:425
@ MangledOnly
If the mangle name can calculate the demangled name (it is the mangled/demangled counterpart),...
Definition: Mangled.cpp:432
static bool cstring_is_mangled(llvm::StringRef s)
Definition: Mangled.cpp:36
static char * GetItaniumDemangledStr(const char *M)
Definition: Mangled.cpp:155
static char * GetMSVCDemangledStr(llvm::StringRef M)
Definition: Mangled.cpp:138
Many cache files require string tables to store data efficiently.
uint32_t Add(ConstString s)
Add a string into the string table.
A uniqued constant string class.
Definition: ConstString.h:40
bool GetMangledCounterpart(ConstString &counterpart) const
Retrieve the mangled or demangled counterpart for a mangled or demangled ConstString.
size_t MemorySize() const
Get the memory cost of this object.
Definition: ConstString.h:397
bool IsNull() const
Test for null string.
Definition: ConstString.h:311
void SetCString(const char *cstr)
Set the C string value.
static int Compare(ConstString lhs, ConstString rhs, const bool case_sensitive=true)
Compare two string objects.
const char * AsCString(const char *value_if_empty=nullptr) const
Get the string value as a C string.
Definition: ConstString.h:188
void DumpDebug(Stream *s) const
Dump the object debug description to a stream.
bool IsEmpty() const
Test for empty string.
Definition: ConstString.h:304
llvm::StringRef GetStringRef() const
Get the string value as a llvm::StringRef.
Definition: ConstString.h:197
void SetString(llvm::StringRef s)
void Clear()
Clear this object's state.
Definition: ConstString.h:232
const char * GetCString() const
Get the string value as a C string.
Definition: ConstString.h:216
void SetStringWithMangledCounterpart(llvm::StringRef demangled, ConstString mangled)
Set the C string value and its mangled counterpart.
An binary data encoding class.
Definition: DataEncoder.h:42
void AppendU32(uint32_t value)
void AppendU8(uint8_t value)
Append a unsigned integer to the end of the owned data.
An data extractor class.
Definition: DataExtractor.h:48
uint32_t GetU32(lldb::offset_t *offset_ptr) const
Extract a uint32_t value from *offset_ptr.
uint8_t GetU8(lldb::offset_t *offset_ptr) const
Extract a uint8_t value from *offset_ptr.
static Language * FindPlugin(lldb::LanguageType language)
Definition: Language.cpp:84
static void ForEach(std::function< bool(Language *)> callback)
Definition: Language.cpp:131
A class that handles mangled names.
Definition: Mangled.h:33
void Encode(DataEncoder &encoder, ConstStringTable &strtab) const
Encode this object into a data encoder object.
Definition: Mangled.cpp:495
bool NameMatches(ConstString name) const
Check if "name" matches either the mangled or demangled name.
Definition: Mangled.h:171
static int Compare(const Mangled &lhs, const Mangled &rhs)
Compare the mangled string values.
Definition: Mangled.cpp:117
@ ePreferDemangledWithoutArguments
Definition: Mangled.h:38
Mangled()=default
Default constructor.
void DumpDebug(Stream *s) const
Dump a debug description of this object to a Stream s.
Definition: Mangled.cpp:372
static Mangled::ManglingScheme GetManglingScheme(llvm::StringRef const name)
Try to identify the mangling scheme used.
Definition: Mangled.cpp:42
size_t MemorySize() const
Get the memory cost of this object.
Definition: Mangled.cpp:383
bool GetRichManglingInfo(RichManglingContext &context, SkipMangledNameFn *skip_mangled_name)
Get rich mangling information.
Definition: Mangled.cpp:212
ConstString GetDemangledName() const
Demangled name get accessor.
Definition: Mangled.cpp:272
lldb::LanguageType GuessLanguage() const
Try to guess the language from the mangling.
Definition: Mangled.cpp:393
bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, const StringTableReader &strtab)
Decode a serialized version of this object from data.
Definition: Mangled.cpp:445
bool(llvm::StringRef, ManglingScheme) SkipMangledNameFn
Function signature for filtering mangled names.
Definition: Mangled.h:214
ConstString & GetMangledName()
Mangled name get accessor.
Definition: Mangled.h:145
void SetValue(ConstString name)
Set the string value in this object.
Definition: Mangled.cpp:122
ConstString GetName(NamePreference preference=ePreferDemangled) const
Best name get accessor.
Definition: Mangled.cpp:337
ConstString m_mangled
Mangled member variables.
Definition: Mangled.h:280
ConstString m_demangled
Mutable so we can get it on demand with a const version of this object.
Definition: Mangled.h:281
ConstString GetDisplayDemangledName() const
Display demangled name get accessor.
Definition: Mangled.cpp:322
void Dump(Stream *s) const
Dump a description of this object to a Stream s.
Definition: Mangled.cpp:360
void Clear()
Clear the mangled and demangled values.
Definition: Mangled.cpp:111
bool Execute(llvm::StringRef string, llvm::SmallVectorImpl< llvm::StringRef > *matches=nullptr) const
Execute a regular expression match using the compiled regular expression that is already in this obje...
Uniform wrapper for access to rich mangling information from different providers.
bool FromItaniumName(ConstString mangled)
Use the ItaniumPartialDemangler to obtain rich mangling information from the given mangled name.
bool FromCxxMethodName(ConstString demangled)
Use the legacy language parser implementation to obtain rich mangling information from the given dema...
A stream class that can stream formatted output to a file.
Definition: Stream.h:28
size_t Printf(const char *format,...) __attribute__((format(printf
Output printf formatted output to the stream.
Definition: Stream.cpp:134
Many cache files require string tables to store data efficiently.
llvm::StringRef Get(uint32_t offset) const
A class that represents a running process on the host machine.
Log * GetLog(Cat mask)
Retrieve the Log object for the channel associated with the given log enum.
Definition: Log.h:332
Stream & operator<<(Stream &s, const Mangled &obj)
uint64_t offset_t
Definition: lldb-types.h:85
LanguageType
Programming language type.
@ eLanguageTypeUnknown
Unknown or invalid language value.