LLDB mainline
Mangled.cpp
Go to the documentation of this file.
1//===-- Mangled.cpp -------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "lldb/Core/Mangled.h"
10
17#include "lldb/Utility/Log.h"
19#include "lldb/Utility/Stream.h"
21
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Demangle/Demangle.h"
24#include "llvm/Support/Compiler.h"
25
26#include <mutex>
27#include <string>
28#include <string_view>
29#include <utility>
30
31#include <cstdlib>
32#include <cstring>
33using namespace lldb_private;
34
35static inline bool cstring_is_mangled(llvm::StringRef s) {
37}
38
39#pragma mark Mangled
40
42 if (name.empty())
44
45 if (name.starts_with("?"))
47
48 if (name.starts_with("_R"))
50
51 if (name.starts_with("_D"))
53
54 if (name.starts_with("_Z"))
56
57 // ___Z is a clang extension of block invocations
58 if (name.starts_with("___Z"))
60
61 // Swift's older style of mangling used "_T" as a mangling prefix. This can
62 // lead to false positives with other symbols that just so happen to start
63 // with "_T". To minimize the chance of that happening, we only return true
64 // for select old-style swift mangled names. The known cases are ObjC classes
65 // and protocols. Classes are either prefixed with "_TtC" or "_TtGC".
66 // Protocols are prefixed with "_TtP".
67 if (name.starts_with("_TtC") || name.starts_with("_TtGC") ||
68 name.starts_with("_TtP"))
70
71 // Swift 4.2 used "$S" and "_$S".
72 // Swift 5 and onward uses "$s" and "_$s".
73 // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames.
74 if (name.starts_with("$S") || name.starts_with("_$S") ||
75 name.starts_with("$s") || name.starts_with("_$s") ||
76 name.starts_with("@__swiftmacro_"))
78
80}
81
82Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() {
83 if (s)
84 SetValue(s);
85}
86
87Mangled::Mangled(llvm::StringRef name) {
88 if (!name.empty())
89 SetValue(ConstString(name));
90}
91
92// Convert to bool operator. This allows code to check any Mangled objects
93// to see if they contain anything valid using code such as:
94//
95// Mangled mangled(...);
96// if (mangled)
97// { ...
98Mangled::operator bool() const { return m_mangled || m_demangled; }
99
100// Clear the mangled and demangled values.
104}
105
106// Compare the string values.
107int Mangled::Compare(const Mangled &a, const Mangled &b) {
110}
111
113 if (name) {
114 if (cstring_is_mangled(name.GetStringRef())) {
116 m_mangled = name;
117 } else {
118 m_demangled = name;
120 }
121 } else {
124 }
125}
126
127// Local helpers for different demangling implementations.
128static char *GetMSVCDemangledStr(llvm::StringRef M) {
129 char *demangled_cstr = llvm::microsoftDemangle(
130 M, nullptr, nullptr,
131 llvm::MSDemangleFlags(
132 llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention |
133 llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType));
134
135 if (Log *log = GetLog(LLDBLog::Demangle)) {
136 if (demangled_cstr && demangled_cstr[0])
137 LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M.data(), demangled_cstr);
138 else
139 LLDB_LOGF(log, "demangled msvc: %s -> error", M.data());
140 }
141
142 return demangled_cstr;
143}
144
145static char *GetItaniumDemangledStr(const char *M) {
146 char *demangled_cstr = nullptr;
147
148 llvm::ItaniumPartialDemangler ipd;
149 bool err = ipd.partialDemangle(M);
150 if (!err) {
151 // Default buffer and size (will realloc in case it's too small).
152 size_t demangled_size = 80;
153 demangled_cstr = static_cast<char *>(std::malloc(demangled_size));
154 demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size);
155
156 assert(demangled_cstr &&
157 "finishDemangle must always succeed if partialDemangle did");
158 assert(demangled_cstr[demangled_size - 1] == '\0' &&
159 "Expected demangled_size to return length including trailing null");
160 }
161
162 if (Log *log = GetLog(LLDBLog::Demangle)) {
163 if (demangled_cstr)
164 LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr);
165 else
166 LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M);
167 }
168
169 return demangled_cstr;
170}
171
172static char *GetRustV0DemangledStr(llvm::StringRef M) {
173 char *demangled_cstr = llvm::rustDemangle(M);
174
175 if (Log *log = GetLog(LLDBLog::Demangle)) {
176 if (demangled_cstr && demangled_cstr[0])
177 LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr);
178 else
179 LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle",
180 static_cast<std::string_view>(M));
181 }
182
183 return demangled_cstr;
184}
185
186static char *GetDLangDemangledStr(llvm::StringRef M) {
187 char *demangled_cstr = llvm::dlangDemangle(M);
188
189 if (Log *log = GetLog(LLDBLog::Demangle)) {
190 if (demangled_cstr && demangled_cstr[0])
191 LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);
192 else
193 LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle",
194 static_cast<std::string_view>(M));
195 }
196
197 return demangled_cstr;
198}
199
200// Explicit demangling for scheduled requests during batch processing. This
201// makes use of ItaniumPartialDemangler's rich demangle info
203 SkipMangledNameFn *skip_mangled_name) {
204 // Others are not meant to arrive here. ObjC names or C's main() for example
205 // have their names stored in m_demangled, while m_mangled is empty.
206 assert(m_mangled);
207
208 // Check whether or not we are interested in this name at all.
210 if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme))
211 return false;
212
213 switch (scheme) {
215 // The current mangled_name_filter would allow llvm_unreachable here.
216 return false;
217
219 // We want the rich mangling info here, so we don't care whether or not
220 // there is a demangled string in the pool already.
221 return context.FromItaniumName(m_mangled);
222
223 case eManglingSchemeMSVC: {
224 // We have no rich mangling for MSVC-mangled names yet, so first try to
225 // demangle it if necessary.
227 if (char *d = GetMSVCDemangledStr(m_mangled)) {
228 // Without the rich mangling info we have to demangle the full name.
229 // Copy it to string pool and connect the counterparts to accelerate
230 // later access in GetDemangledName().
232 m_mangled);
233 ::free(d);
234 } else {
236 }
237 }
238
239 if (m_demangled.IsEmpty()) {
240 // Cannot demangle it, so don't try parsing.
241 return false;
242 } else {
243 // Demangled successfully, we can try and parse it with
244 // CPlusPlusLanguage::MethodName.
245 return context.FromCxxMethodName(m_demangled);
246 }
247 }
248
250 case eManglingSchemeD:
252 // Rich demangling scheme is not supported
253 return false;
254 }
255 llvm_unreachable("Fully covered switch above!");
256}
257
258// Generate the demangled name on demand using this accessor. Code in this
259// class will need to use this accessor if it wishes to decode the demangled
260// name. The result is cached and will be kept until a new string value is
261// supplied to this object, or until the end of the object's lifetime.
263 // Check to make sure we have a valid mangled name and that we haven't
264 // already decoded our mangled name.
265 if (m_mangled && m_demangled.IsNull()) {
266 // Don't bother running anything that isn't mangled
267 const char *mangled_name = m_mangled.GetCString();
268 ManglingScheme mangling_scheme =
270 if (mangling_scheme != eManglingSchemeNone &&
272 // We didn't already mangle this name, demangle it and if all goes well
273 // add it to our map.
274 char *demangled_name = nullptr;
275 switch (mangling_scheme) {
277 demangled_name = GetMSVCDemangledStr(mangled_name);
278 break;
280 demangled_name = GetItaniumDemangledStr(mangled_name);
281 break;
282 }
284 demangled_name = GetRustV0DemangledStr(m_mangled);
285 break;
286 case eManglingSchemeD:
287 demangled_name = GetDLangDemangledStr(m_mangled);
288 break;
290 // Demangling a swift name requires the swift compiler. This is
291 // explicitly unsupported on llvm.org.
292 break;
294 llvm_unreachable("eManglingSchemeNone was handled already");
295 }
296 if (demangled_name) {
298 llvm::StringRef(demangled_name), m_mangled);
299 free(demangled_name);
300 }
301 }
302 if (m_demangled.IsNull()) {
303 // Set the demangled string to the empty string to indicate we tried to
304 // parse it once and failed.
306 }
307 }
308
309 return m_demangled;
310}
311
313 return GetDemangledName();
314}
315
316bool Mangled::NameMatches(const RegularExpression &regex) const {
317 if (m_mangled && regex.Execute(m_mangled.GetStringRef()))
318 return true;
319
320 ConstString demangled = GetDemangledName();
321 return demangled && regex.Execute(demangled.GetStringRef());
322}
323
324// Get the demangled name if there is one, else return the mangled name.
326 if (preference == ePreferMangled && m_mangled)
327 return m_mangled;
328
329 // Call the accessor to make sure we get a demangled name in case it hasn't
330 // been demangled yet...
331 ConstString demangled = GetDemangledName();
332
333 if (preference == ePreferDemangledWithoutArguments) {
335 return lang->GetDemangledFunctionNameWithoutArguments(*this);
336 }
337 }
338 if (preference == ePreferDemangled) {
339 if (demangled)
340 return demangled;
341 return m_mangled;
342 }
343 return demangled;
344}
345
346// Dump a Mangled object to stream "s". We don't force our demangled name to be
347// computed currently (we don't use the accessor).
348void Mangled::Dump(Stream *s) const {
349 if (m_mangled) {
350 *s << ", mangled = " << m_mangled;
351 }
352 if (m_demangled) {
353 const char *demangled = m_demangled.AsCString();
354 s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>");
355 }
356}
357
358// Dumps a debug version of this string with extra object and state information
359// to stream "s".
361 s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),
362 static_cast<const void *>(this));
364 s->Printf(", demangled = ");
366}
367
368// Return the size in byte that this object takes in memory. The size includes
369// the size of the objects it owns, and not the strings that it references
370// because they are shared strings.
371size_t Mangled::MemorySize() const {
373}
374
375// We "guess" the language because we can't determine a symbol's language from
376// it's name. For example, a Pascal symbol can be mangled using the C++
377// Itanium scheme, and defined in a compilation unit within the same module as
378// other C++ units. In addition, different targets could have different ways
379// of mangling names from a given language, likewise the compilation units
380// within those targets.
383 // Ask each language plugin to check if the mangled name belongs to it.
384 Language::ForEach([this, &result](Language *l) {
385 if (l->SymbolNameFitsToLanguage(*this)) {
386 result = l->GetLanguageType();
387 return false;
388 }
389 return true;
390 });
391 return result;
392}
393
394// Dump OBJ to the supplied stream S.
395Stream &operator<<(Stream &s, const Mangled &obj) {
396 if (obj.GetMangledName())
397 s << "mangled = '" << obj.GetMangledName() << "'";
398
399 ConstString demangled = obj.GetDemangledName();
400 if (demangled)
401 s << ", demangled = '" << demangled << '\'';
402 else
403 s << ", demangled = <error>";
404 return s;
405}
406
407// When encoding Mangled objects we can get away with encoding as little
408// information as is required. The enumeration below helps us to efficiently
409// encode Mangled objects.
411 /// If the Mangled object has neither a mangled name or demangled name we can
412 /// encode the object with one zero byte using the Empty enumeration.
413 Empty = 0u,
414 /// If the Mangled object has only a demangled name and no mangled named, we
415 /// can encode only the demangled name.
417 /// If the mangle name can calculate the demangled name (it is the
418 /// mangled/demangled counterpart), then we only need to encode the mangled
419 /// name as the demangled name can be recomputed.
421 /// If we have a Mangled object with two different names that are not related
422 /// then we need to save both strings. This can happen if we have a name that
423 /// isn't a true mangled name, but we want to be able to lookup a symbol by
424 /// name and type in the symbol table. We do this for Objective C symbols like
425 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to
426 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to
427 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it
428 /// would fail, but in these cases we want these unrelated names to be
429 /// preserved.
432
433bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
434 const StringTableReader &strtab) {
437 MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr);
438 switch (encoding) {
439 case Empty:
440 return true;
441
442 case DemangledOnly:
443 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
444 return true;
445
446 case MangledOnly:
447 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
448 return true;
449
451 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
452 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
453 return true;
454 }
455 return false;
456}
457/// The encoding format for the Mangled object is as follows:
458///
459/// uint8_t encoding;
460/// char str1[]; (only if DemangledOnly, MangledOnly)
461/// char str2[]; (only if MangledAndDemangled)
462///
463/// The strings are stored as NULL terminated UTF8 strings and str1 and str2
464/// are only saved if we need them based on the encoding.
465///
466/// Some mangled names have a mangled name that can be demangled by the built
467/// in demanglers. These kinds of mangled objects know when the mangled and
468/// demangled names are the counterparts for each other. This is done because
469/// demangling is very expensive and avoiding demangling the same name twice
470/// saves us a lot of compute time. For these kinds of names we only need to
471/// save the mangled name and have the encoding set to "MangledOnly".
472///
473/// If a mangled obejct has only a demangled name, then we save only that string
474/// and have the encoding set to "DemangledOnly".
475///
476/// Some mangled objects have both mangled and demangled names, but the
477/// demangled name can not be computed from the mangled name. This is often used
478/// for runtime named, like Objective C runtime V2 and V3 names. Both these
479/// names must be saved and the encoding is set to "MangledAndDemangled".
480///
481/// For a Mangled object with no names, we only need to set the encoding to
482/// "Empty" and not store any string values.
483void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const {
484 MangledEncoding encoding = Empty;
485 if (m_mangled) {
486 encoding = MangledOnly;
487 if (m_demangled) {
488 // We have both mangled and demangled names. If the demangled name is the
489 // counterpart of the mangled name, then we only need to save the mangled
490 // named. If they are different, we need to save both.
491 ConstString s;
493 encoding = MangledAndDemangled;
494 }
495 } else if (m_demangled) {
496 encoding = DemangledOnly;
497 }
498 file.AppendU8(encoding);
499 switch (encoding) {
500 case Empty:
501 break;
502 case DemangledOnly:
503 file.AppendU32(strtab.Add(m_demangled));
504 break;
505 case MangledOnly:
506 file.AppendU32(strtab.Add(m_mangled));
507 break;
509 file.AppendU32(strtab.Add(m_mangled));
510 file.AppendU32(strtab.Add(m_demangled));
511 break;
512 }
513}
#define LLDB_LOG(log,...)
The LLDB_LOG* macros defined below are the way to emit log messages.
Definition: Log.h:342
#define LLDB_LOGF(log,...)
Definition: Log.h:349
static char * GetDLangDemangledStr(llvm::StringRef M)
Definition: Mangled.cpp:186
static char * GetRustV0DemangledStr(llvm::StringRef M)
Definition: Mangled.cpp:172
MangledEncoding
Definition: Mangled.cpp:410
@ MangledAndDemangled
If we have a Mangled object with two different names that are not related then we need to save both s...
Definition: Mangled.cpp:430
@ DemangledOnly
If the Mangled object has only a demangled name and no mangled named, we can encode only the demangle...
Definition: Mangled.cpp:416
@ Empty
If the Mangled object has neither a mangled name or demangled name we can encode the object with one ...
Definition: Mangled.cpp:413
@ MangledOnly
If the mangle name can calculate the demangled name (it is the mangled/demangled counterpart),...
Definition: Mangled.cpp:420
static bool cstring_is_mangled(llvm::StringRef s)
Definition: Mangled.cpp:35
static char * GetItaniumDemangledStr(const char *M)
Definition: Mangled.cpp:145
static char * GetMSVCDemangledStr(llvm::StringRef M)
Definition: Mangled.cpp:128
Many cache files require string tables to store data efficiently.
uint32_t Add(ConstString s)
Add a string into the string table.
A uniqued constant string class.
Definition: ConstString.h:40
bool GetMangledCounterpart(ConstString &counterpart) const
Retrieve the mangled or demangled counterpart for a mangled or demangled ConstString.
size_t MemorySize() const
Get the memory cost of this object.
Definition: ConstString.h:395
bool IsNull() const
Test for null string.
Definition: ConstString.h:309
void SetCString(const char *cstr)
Set the C string value.
static int Compare(ConstString lhs, ConstString rhs, const bool case_sensitive=true)
Compare two string objects.
const char * AsCString(const char *value_if_empty=nullptr) const
Get the string value as a C string.
Definition: ConstString.h:188
void DumpDebug(Stream *s) const
Dump the object debug description to a stream.
bool IsEmpty() const
Test for empty string.
Definition: ConstString.h:302
llvm::StringRef GetStringRef() const
Get the string value as a llvm::StringRef.
Definition: ConstString.h:197
void SetString(llvm::StringRef s)
void Clear()
Clear this object's state.
Definition: ConstString.h:230
const char * GetCString() const
Get the string value as a C string.
Definition: ConstString.h:214
void SetStringWithMangledCounterpart(llvm::StringRef demangled, ConstString mangled)
Set the C string value and its mangled counterpart.
An binary data encoding class.
Definition: DataEncoder.h:42
void AppendU32(uint32_t value)
void AppendU8(uint8_t value)
Append a unsigned integer to the end of the owned data.
An data extractor class.
Definition: DataExtractor.h:48
uint32_t GetU32(lldb::offset_t *offset_ptr) const
Extract a uint32_t value from *offset_ptr.
uint8_t GetU8(lldb::offset_t *offset_ptr) const
Extract a uint8_t value from *offset_ptr.
static Language * FindPlugin(lldb::LanguageType language)
Definition: Language.cpp:83
static void ForEach(std::function< bool(Language *)> callback)
Definition: Language.cpp:130
A class that handles mangled names.
Definition: Mangled.h:33
void Encode(DataEncoder &encoder, ConstStringTable &strtab) const
Encode this object into a data encoder object.
Definition: Mangled.cpp:483
bool NameMatches(ConstString name) const
Check if "name" matches either the mangled or demangled name.
Definition: Mangled.h:171
static int Compare(const Mangled &lhs, const Mangled &rhs)
Compare the mangled string values.
Definition: Mangled.cpp:107
@ ePreferDemangledWithoutArguments
Definition: Mangled.h:38
Mangled()=default
Default constructor.
void DumpDebug(Stream *s) const
Dump a debug description of this object to a Stream s.
Definition: Mangled.cpp:360
static Mangled::ManglingScheme GetManglingScheme(llvm::StringRef const name)
Try to identify the mangling scheme used.
Definition: Mangled.cpp:41
size_t MemorySize() const
Get the memory cost of this object.
Definition: Mangled.cpp:371
bool GetRichManglingInfo(RichManglingContext &context, SkipMangledNameFn *skip_mangled_name)
Get rich mangling information.
Definition: Mangled.cpp:202
ConstString GetDemangledName() const
Demangled name get accessor.
Definition: Mangled.cpp:262
lldb::LanguageType GuessLanguage() const
Try to guess the language from the mangling.
Definition: Mangled.cpp:381
bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, const StringTableReader &strtab)
Decode a serialized version of this object from data.
Definition: Mangled.cpp:433
bool(llvm::StringRef, ManglingScheme) SkipMangledNameFn
Function signature for filtering mangled names.
Definition: Mangled.h:214
ConstString & GetMangledName()
Mangled name get accessor.
Definition: Mangled.h:145
void SetValue(ConstString name)
Set the string value in this object.
Definition: Mangled.cpp:112
ConstString GetName(NamePreference preference=ePreferDemangled) const
Best name get accessor.
Definition: Mangled.cpp:325
ConstString m_mangled
Mangled member variables.
Definition: Mangled.h:280
ConstString m_demangled
Mutable so we can get it on demand with a const version of this object.
Definition: Mangled.h:281
ConstString GetDisplayDemangledName() const
Display demangled name get accessor.
Definition: Mangled.cpp:312
void Dump(Stream *s) const
Dump a description of this object to a Stream s.
Definition: Mangled.cpp:348
void Clear()
Clear the mangled and demangled values.
Definition: Mangled.cpp:101
bool Execute(llvm::StringRef string, llvm::SmallVectorImpl< llvm::StringRef > *matches=nullptr) const
Execute a regular expression match using the compiled regular expression that is already in this obje...
Uniform wrapper for access to rich mangling information from different providers.
bool FromItaniumName(ConstString mangled)
Use the ItaniumPartialDemangler to obtain rich mangling information from the given mangled name.
bool FromCxxMethodName(ConstString demangled)
Use the legacy language parser implementation to obtain rich mangling information from the given dema...
A stream class that can stream formatted output to a file.
Definition: Stream.h:28
size_t Printf(const char *format,...) __attribute__((format(printf
Output printf formatted output to the stream.
Definition: Stream.cpp:134
Many cache files require string tables to store data efficiently.
llvm::StringRef Get(uint32_t offset) const
A class that represents a running process on the host machine.
Definition: SBAttachInfo.h:14
Log * GetLog(Cat mask)
Retrieve the Log object for the channel associated with the given log enum.
Definition: Log.h:314
Stream & operator<<(Stream &s, const Mangled &obj)
uint64_t offset_t
Definition: lldb-types.h:83
LanguageType
Programming language type.
@ eLanguageTypeUnknown
Unknown or invalid language value.