LLDB mainline
Mangled.cpp
Go to the documentation of this file.
1//===-- Mangled.cpp -------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "lldb/Core/Mangled.h"
10
17#include "lldb/Utility/Log.h"
19#include "lldb/Utility/Stream.h"
21
22#include "llvm/ADT/StringExtras.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/Demangle/Demangle.h"
25#include "llvm/Support/Compiler.h"
26
27#include <mutex>
28#include <string>
29#include <string_view>
30#include <utility>
31
32#include <cstdlib>
33#include <cstring>
34using namespace lldb_private;
35
36static inline bool cstring_is_mangled(llvm::StringRef s) {
38}
39
40#pragma mark Mangled
41
43 if (name.empty())
45
46 if (name.starts_with("?"))
48
49 if (name.starts_with("_R"))
51
52 if (name.starts_with("_D")) {
53 // A dlang mangled name begins with `_D`, followed by a numeric length. One
54 // known exception is the symbol `_Dmain`.
55 // See `SymbolName` and `LName` in
56 // https://dlang.org/spec/abi.html#name_mangling
57 llvm::StringRef buf = name.drop_front(2);
58 if (!buf.empty() && (llvm::isDigit(buf.front()) || name == "_Dmain"))
60 }
61
62 if (name.starts_with("_Z"))
64
65 // ___Z is a clang extension of block invocations
66 if (name.starts_with("___Z"))
68
69 // Swift's older style of mangling used "_T" as a mangling prefix. This can
70 // lead to false positives with other symbols that just so happen to start
71 // with "_T". To minimize the chance of that happening, we only return true
72 // for select old-style swift mangled names. The known cases are ObjC classes
73 // and protocols. Classes are either prefixed with "_TtC" or "_TtGC".
74 // Protocols are prefixed with "_TtP".
75 if (name.starts_with("_TtC") || name.starts_with("_TtGC") ||
76 name.starts_with("_TtP"))
78
79 // Swift 4.2 used "$S" and "_$S".
80 // Swift 5 and onward uses "$s" and "_$s".
81 // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames.
82 if (name.starts_with("$S") || name.starts_with("_$S") ||
83 name.starts_with("$s") || name.starts_with("_$s") ||
84 name.starts_with("@__swiftmacro_"))
86
88}
89
90Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() {
91 if (s)
92 SetValue(s);
93}
94
95Mangled::Mangled(llvm::StringRef name) {
96 if (!name.empty())
97 SetValue(ConstString(name));
98}
99
100// Convert to bool operator. This allows code to check any Mangled objects
101// to see if they contain anything valid using code such as:
102//
103// Mangled mangled(...);
104// if (mangled)
105// { ...
106Mangled::operator bool() const { return m_mangled || m_demangled; }
107
108// Clear the mangled and demangled values.
112}
113
114// Compare the string values.
115int Mangled::Compare(const Mangled &a, const Mangled &b) {
118}
119
121 if (name) {
122 if (cstring_is_mangled(name.GetStringRef())) {
124 m_mangled = name;
125 } else {
126 m_demangled = name;
128 }
129 } else {
132 }
133}
134
135// Local helpers for different demangling implementations.
136static char *GetMSVCDemangledStr(llvm::StringRef M) {
137 char *demangled_cstr = llvm::microsoftDemangle(
138 M, nullptr, nullptr,
139 llvm::MSDemangleFlags(
140 llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention |
141 llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType));
142
143 if (Log *log = GetLog(LLDBLog::Demangle)) {
144 if (demangled_cstr && demangled_cstr[0])
145 LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M.data(), demangled_cstr);
146 else
147 LLDB_LOGF(log, "demangled msvc: %s -> error", M.data());
148 }
149
150 return demangled_cstr;
151}
152
153static char *GetItaniumDemangledStr(const char *M) {
154 char *demangled_cstr = nullptr;
155
156 llvm::ItaniumPartialDemangler ipd;
157 bool err = ipd.partialDemangle(M);
158 if (!err) {
159 // Default buffer and size (will realloc in case it's too small).
160 size_t demangled_size = 80;
161 demangled_cstr = static_cast<char *>(std::malloc(demangled_size));
162 demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size);
163
164 assert(demangled_cstr &&
165 "finishDemangle must always succeed if partialDemangle did");
166 assert(demangled_cstr[demangled_size - 1] == '\0' &&
167 "Expected demangled_size to return length including trailing null");
168 }
169
170 if (Log *log = GetLog(LLDBLog::Demangle)) {
171 if (demangled_cstr)
172 LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr);
173 else
174 LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M);
175 }
176
177 return demangled_cstr;
178}
179
180static char *GetRustV0DemangledStr(llvm::StringRef M) {
181 char *demangled_cstr = llvm::rustDemangle(M);
182
183 if (Log *log = GetLog(LLDBLog::Demangle)) {
184 if (demangled_cstr && demangled_cstr[0])
185 LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr);
186 else
187 LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle",
188 static_cast<std::string_view>(M));
189 }
190
191 return demangled_cstr;
192}
193
194static char *GetDLangDemangledStr(llvm::StringRef M) {
195 char *demangled_cstr = llvm::dlangDemangle(M);
196
197 if (Log *log = GetLog(LLDBLog::Demangle)) {
198 if (demangled_cstr && demangled_cstr[0])
199 LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);
200 else
201 LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle",
202 static_cast<std::string_view>(M));
203 }
204
205 return demangled_cstr;
206}
207
208// Explicit demangling for scheduled requests during batch processing. This
209// makes use of ItaniumPartialDemangler's rich demangle info
211 SkipMangledNameFn *skip_mangled_name) {
212 // Others are not meant to arrive here. ObjC names or C's main() for example
213 // have their names stored in m_demangled, while m_mangled is empty.
214 assert(m_mangled);
215
216 // Check whether or not we are interested in this name at all.
218 if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme))
219 return false;
220
221 switch (scheme) {
223 // The current mangled_name_filter would allow llvm_unreachable here.
224 return false;
225
227 // We want the rich mangling info here, so we don't care whether or not
228 // there is a demangled string in the pool already.
229 return context.FromItaniumName(m_mangled);
230
231 case eManglingSchemeMSVC: {
232 // We have no rich mangling for MSVC-mangled names yet, so first try to
233 // demangle it if necessary.
235 if (char *d = GetMSVCDemangledStr(m_mangled)) {
236 // Without the rich mangling info we have to demangle the full name.
237 // Copy it to string pool and connect the counterparts to accelerate
238 // later access in GetDemangledName().
240 m_mangled);
241 ::free(d);
242 } else {
244 }
245 }
246
247 if (m_demangled.IsEmpty()) {
248 // Cannot demangle it, so don't try parsing.
249 return false;
250 } else {
251 // Demangled successfully, we can try and parse it with
252 // CPlusPlusLanguage::MethodName.
253 return context.FromCxxMethodName(m_demangled);
254 }
255 }
256
258 case eManglingSchemeD:
260 // Rich demangling scheme is not supported
261 return false;
262 }
263 llvm_unreachable("Fully covered switch above!");
264}
265
266// Generate the demangled name on demand using this accessor. Code in this
267// class will need to use this accessor if it wishes to decode the demangled
268// name. The result is cached and will be kept until a new string value is
269// supplied to this object, or until the end of the object's lifetime.
271 // Check to make sure we have a valid mangled name and that we haven't
272 // already decoded our mangled name.
273 if (m_mangled && m_demangled.IsNull()) {
274 // Don't bother running anything that isn't mangled
275 const char *mangled_name = m_mangled.GetCString();
276 ManglingScheme mangling_scheme =
278 if (mangling_scheme != eManglingSchemeNone &&
280 // We didn't already mangle this name, demangle it and if all goes well
281 // add it to our map.
282 char *demangled_name = nullptr;
283 switch (mangling_scheme) {
285 demangled_name = GetMSVCDemangledStr(mangled_name);
286 break;
288 demangled_name = GetItaniumDemangledStr(mangled_name);
289 break;
290 }
292 demangled_name = GetRustV0DemangledStr(m_mangled);
293 break;
294 case eManglingSchemeD:
295 demangled_name = GetDLangDemangledStr(m_mangled);
296 break;
298 // Demangling a swift name requires the swift compiler. This is
299 // explicitly unsupported on llvm.org.
300 break;
302 llvm_unreachable("eManglingSchemeNone was handled already");
303 }
304 if (demangled_name) {
306 llvm::StringRef(demangled_name), m_mangled);
307 free(demangled_name);
308 }
309 }
310 if (m_demangled.IsNull()) {
311 // Set the demangled string to the empty string to indicate we tried to
312 // parse it once and failed.
314 }
315 }
316
317 return m_demangled;
318}
319
322 return lang->GetDisplayDemangledName(*this);
323 return GetDemangledName();
324}
325
326bool Mangled::NameMatches(const RegularExpression &regex) const {
327 if (m_mangled && regex.Execute(m_mangled.GetStringRef()))
328 return true;
329
330 ConstString demangled = GetDemangledName();
331 return demangled && regex.Execute(demangled.GetStringRef());
332}
333
334// Get the demangled name if there is one, else return the mangled name.
336 if (preference == ePreferMangled && m_mangled)
337 return m_mangled;
338
339 // Call the accessor to make sure we get a demangled name in case it hasn't
340 // been demangled yet...
341 ConstString demangled = GetDemangledName();
342
343 if (preference == ePreferDemangledWithoutArguments) {
345 return lang->GetDemangledFunctionNameWithoutArguments(*this);
346 }
347 }
348 if (preference == ePreferDemangled) {
349 if (demangled)
350 return demangled;
351 return m_mangled;
352 }
353 return demangled;
354}
355
356// Dump a Mangled object to stream "s". We don't force our demangled name to be
357// computed currently (we don't use the accessor).
358void Mangled::Dump(Stream *s) const {
359 if (m_mangled) {
360 *s << ", mangled = " << m_mangled;
361 }
362 if (m_demangled) {
363 const char *demangled = m_demangled.AsCString();
364 s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>");
365 }
366}
367
368// Dumps a debug version of this string with extra object and state information
369// to stream "s".
371 s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),
372 static_cast<const void *>(this));
374 s->Printf(", demangled = ");
376}
377
378// Return the size in byte that this object takes in memory. The size includes
379// the size of the objects it owns, and not the strings that it references
380// because they are shared strings.
381size_t Mangled::MemorySize() const {
383}
384
385// We "guess" the language because we can't determine a symbol's language from
386// it's name. For example, a Pascal symbol can be mangled using the C++
387// Itanium scheme, and defined in a compilation unit within the same module as
388// other C++ units. In addition, different targets could have different ways
389// of mangling names from a given language, likewise the compilation units
390// within those targets.
393 // Ask each language plugin to check if the mangled name belongs to it.
394 Language::ForEach([this, &result](Language *l) {
395 if (l->SymbolNameFitsToLanguage(*this)) {
396 result = l->GetLanguageType();
397 return false;
398 }
399 return true;
400 });
401 return result;
402}
403
404// Dump OBJ to the supplied stream S.
405Stream &operator<<(Stream &s, const Mangled &obj) {
406 if (obj.GetMangledName())
407 s << "mangled = '" << obj.GetMangledName() << "'";
408
409 ConstString demangled = obj.GetDemangledName();
410 if (demangled)
411 s << ", demangled = '" << demangled << '\'';
412 else
413 s << ", demangled = <error>";
414 return s;
415}
416
417// When encoding Mangled objects we can get away with encoding as little
418// information as is required. The enumeration below helps us to efficiently
419// encode Mangled objects.
421 /// If the Mangled object has neither a mangled name or demangled name we can
422 /// encode the object with one zero byte using the Empty enumeration.
423 Empty = 0u,
424 /// If the Mangled object has only a demangled name and no mangled named, we
425 /// can encode only the demangled name.
427 /// If the mangle name can calculate the demangled name (it is the
428 /// mangled/demangled counterpart), then we only need to encode the mangled
429 /// name as the demangled name can be recomputed.
431 /// If we have a Mangled object with two different names that are not related
432 /// then we need to save both strings. This can happen if we have a name that
433 /// isn't a true mangled name, but we want to be able to lookup a symbol by
434 /// name and type in the symbol table. We do this for Objective C symbols like
435 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to
436 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to
437 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it
438 /// would fail, but in these cases we want these unrelated names to be
439 /// preserved.
442
443bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
444 const StringTableReader &strtab) {
447 MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr);
448 switch (encoding) {
449 case Empty:
450 return true;
451
452 case DemangledOnly:
453 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
454 return true;
455
456 case MangledOnly:
457 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
458 return true;
459
461 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
462 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
463 return true;
464 }
465 return false;
466}
467/// The encoding format for the Mangled object is as follows:
468///
469/// uint8_t encoding;
470/// char str1[]; (only if DemangledOnly, MangledOnly)
471/// char str2[]; (only if MangledAndDemangled)
472///
473/// The strings are stored as NULL terminated UTF8 strings and str1 and str2
474/// are only saved if we need them based on the encoding.
475///
476/// Some mangled names have a mangled name that can be demangled by the built
477/// in demanglers. These kinds of mangled objects know when the mangled and
478/// demangled names are the counterparts for each other. This is done because
479/// demangling is very expensive and avoiding demangling the same name twice
480/// saves us a lot of compute time. For these kinds of names we only need to
481/// save the mangled name and have the encoding set to "MangledOnly".
482///
483/// If a mangled obejct has only a demangled name, then we save only that string
484/// and have the encoding set to "DemangledOnly".
485///
486/// Some mangled objects have both mangled and demangled names, but the
487/// demangled name can not be computed from the mangled name. This is often used
488/// for runtime named, like Objective C runtime V2 and V3 names. Both these
489/// names must be saved and the encoding is set to "MangledAndDemangled".
490///
491/// For a Mangled object with no names, we only need to set the encoding to
492/// "Empty" and not store any string values.
493void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const {
494 MangledEncoding encoding = Empty;
495 if (m_mangled) {
496 encoding = MangledOnly;
497 if (m_demangled) {
498 // We have both mangled and demangled names. If the demangled name is the
499 // counterpart of the mangled name, then we only need to save the mangled
500 // named. If they are different, we need to save both.
501 ConstString s;
503 encoding = MangledAndDemangled;
504 }
505 } else if (m_demangled) {
506 encoding = DemangledOnly;
507 }
508 file.AppendU8(encoding);
509 switch (encoding) {
510 case Empty:
511 break;
512 case DemangledOnly:
513 file.AppendU32(strtab.Add(m_demangled));
514 break;
515 case MangledOnly:
516 file.AppendU32(strtab.Add(m_mangled));
517 break;
519 file.AppendU32(strtab.Add(m_mangled));
520 file.AppendU32(strtab.Add(m_demangled));
521 break;
522 }
523}
#define LLDB_LOG(log,...)
The LLDB_LOG* macros defined below are the way to emit log messages.
Definition: Log.h:359
#define LLDB_LOGF(log,...)
Definition: Log.h:366
static char * GetDLangDemangledStr(llvm::StringRef M)
Definition: Mangled.cpp:194
static char * GetRustV0DemangledStr(llvm::StringRef M)
Definition: Mangled.cpp:180
MangledEncoding
Definition: Mangled.cpp:420
@ MangledAndDemangled
If we have a Mangled object with two different names that are not related then we need to save both s...
Definition: Mangled.cpp:440
@ DemangledOnly
If the Mangled object has only a demangled name and no mangled named, we can encode only the demangle...
Definition: Mangled.cpp:426
@ Empty
If the Mangled object has neither a mangled name or demangled name we can encode the object with one ...
Definition: Mangled.cpp:423
@ MangledOnly
If the mangle name can calculate the demangled name (it is the mangled/demangled counterpart),...
Definition: Mangled.cpp:430
static bool cstring_is_mangled(llvm::StringRef s)
Definition: Mangled.cpp:36
static char * GetItaniumDemangledStr(const char *M)
Definition: Mangled.cpp:153
static char * GetMSVCDemangledStr(llvm::StringRef M)
Definition: Mangled.cpp:136
Many cache files require string tables to store data efficiently.
uint32_t Add(ConstString s)
Add a string into the string table.
A uniqued constant string class.
Definition: ConstString.h:40
bool GetMangledCounterpart(ConstString &counterpart) const
Retrieve the mangled or demangled counterpart for a mangled or demangled ConstString.
size_t MemorySize() const
Get the memory cost of this object.
Definition: ConstString.h:397
bool IsNull() const
Test for null string.
Definition: ConstString.h:311
void SetCString(const char *cstr)
Set the C string value.
static int Compare(ConstString lhs, ConstString rhs, const bool case_sensitive=true)
Compare two string objects.
const char * AsCString(const char *value_if_empty=nullptr) const
Get the string value as a C string.
Definition: ConstString.h:188
void DumpDebug(Stream *s) const
Dump the object debug description to a stream.
bool IsEmpty() const
Test for empty string.
Definition: ConstString.h:304
llvm::StringRef GetStringRef() const
Get the string value as a llvm::StringRef.
Definition: ConstString.h:197
void SetString(llvm::StringRef s)
void Clear()
Clear this object's state.
Definition: ConstString.h:232
const char * GetCString() const
Get the string value as a C string.
Definition: ConstString.h:216
void SetStringWithMangledCounterpart(llvm::StringRef demangled, ConstString mangled)
Set the C string value and its mangled counterpart.
An binary data encoding class.
Definition: DataEncoder.h:42
void AppendU32(uint32_t value)
void AppendU8(uint8_t value)
Append a unsigned integer to the end of the owned data.
An data extractor class.
Definition: DataExtractor.h:48
uint32_t GetU32(lldb::offset_t *offset_ptr) const
Extract a uint32_t value from *offset_ptr.
uint8_t GetU8(lldb::offset_t *offset_ptr) const
Extract a uint8_t value from *offset_ptr.
static Language * FindPlugin(lldb::LanguageType language)
Definition: Language.cpp:84
static void ForEach(std::function< bool(Language *)> callback)
Definition: Language.cpp:131
A class that handles mangled names.
Definition: Mangled.h:33
void Encode(DataEncoder &encoder, ConstStringTable &strtab) const
Encode this object into a data encoder object.
Definition: Mangled.cpp:493
bool NameMatches(ConstString name) const
Check if "name" matches either the mangled or demangled name.
Definition: Mangled.h:171
static int Compare(const Mangled &lhs, const Mangled &rhs)
Compare the mangled string values.
Definition: Mangled.cpp:115
@ ePreferDemangledWithoutArguments
Definition: Mangled.h:38
Mangled()=default
Default constructor.
void DumpDebug(Stream *s) const
Dump a debug description of this object to a Stream s.
Definition: Mangled.cpp:370
static Mangled::ManglingScheme GetManglingScheme(llvm::StringRef const name)
Try to identify the mangling scheme used.
Definition: Mangled.cpp:42
size_t MemorySize() const
Get the memory cost of this object.
Definition: Mangled.cpp:381
bool GetRichManglingInfo(RichManglingContext &context, SkipMangledNameFn *skip_mangled_name)
Get rich mangling information.
Definition: Mangled.cpp:210
ConstString GetDemangledName() const
Demangled name get accessor.
Definition: Mangled.cpp:270
lldb::LanguageType GuessLanguage() const
Try to guess the language from the mangling.
Definition: Mangled.cpp:391
bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, const StringTableReader &strtab)
Decode a serialized version of this object from data.
Definition: Mangled.cpp:443
bool(llvm::StringRef, ManglingScheme) SkipMangledNameFn
Function signature for filtering mangled names.
Definition: Mangled.h:214
ConstString & GetMangledName()
Mangled name get accessor.
Definition: Mangled.h:145
void SetValue(ConstString name)
Set the string value in this object.
Definition: Mangled.cpp:120
ConstString GetName(NamePreference preference=ePreferDemangled) const
Best name get accessor.
Definition: Mangled.cpp:335
ConstString m_mangled
Mangled member variables.
Definition: Mangled.h:280
ConstString m_demangled
Mutable so we can get it on demand with a const version of this object.
Definition: Mangled.h:281
ConstString GetDisplayDemangledName() const
Display demangled name get accessor.
Definition: Mangled.cpp:320
void Dump(Stream *s) const
Dump a description of this object to a Stream s.
Definition: Mangled.cpp:358
void Clear()
Clear the mangled and demangled values.
Definition: Mangled.cpp:109
bool Execute(llvm::StringRef string, llvm::SmallVectorImpl< llvm::StringRef > *matches=nullptr) const
Execute a regular expression match using the compiled regular expression that is already in this obje...
Uniform wrapper for access to rich mangling information from different providers.
bool FromItaniumName(ConstString mangled)
Use the ItaniumPartialDemangler to obtain rich mangling information from the given mangled name.
bool FromCxxMethodName(ConstString demangled)
Use the legacy language parser implementation to obtain rich mangling information from the given dema...
A stream class that can stream formatted output to a file.
Definition: Stream.h:28
size_t Printf(const char *format,...) __attribute__((format(printf
Output printf formatted output to the stream.
Definition: Stream.cpp:134
Many cache files require string tables to store data efficiently.
llvm::StringRef Get(uint32_t offset) const
A class that represents a running process on the host machine.
Log * GetLog(Cat mask)
Retrieve the Log object for the channel associated with the given log enum.
Definition: Log.h:331
Stream & operator<<(Stream &s, const Mangled &obj)
uint64_t offset_t
Definition: lldb-types.h:85
LanguageType
Programming language type.
@ eLanguageTypeUnknown
Unknown or invalid language value.