LLDB mainline
Mangled.cpp
Go to the documentation of this file.
1//===-- Mangled.cpp -------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "lldb/Core/Mangled.h"
10
17#include "lldb/Utility/Log.h"
19#include "lldb/Utility/Stream.h"
21
22#include "llvm/ADT/StringRef.h"
23#include "llvm/Demangle/Demangle.h"
24#include "llvm/Support/Compiler.h"
25
26#include <mutex>
27#include <string>
28#include <string_view>
29#include <utility>
30
31#include <cstdlib>
32#include <cstring>
33using namespace lldb_private;
34
35static inline bool cstring_is_mangled(llvm::StringRef s) {
37}
38
39#pragma mark Mangled
40
42 if (name.empty())
44
45 if (name.startswith("?"))
47
48 if (name.startswith("_R"))
50
51 if (name.startswith("_D"))
53
54 if (name.startswith("_Z"))
56
57 // ___Z is a clang extension of block invocations
58 if (name.startswith("___Z"))
60
61 // Swift's older style of mangling used "_T" as a mangling prefix. This can
62 // lead to false positives with other symbols that just so happen to start
63 // with "_T". To minimize the chance of that happening, we only return true
64 // for select old-style swift mangled names. The known cases are ObjC classes
65 // and protocols. Classes are either prefixed with "_TtC" or "_TtGC".
66 // Protocols are prefixed with "_TtP".
67 if (name.startswith("_TtC") || name.startswith("_TtGC") ||
68 name.startswith("_TtP"))
70
71 // Swift 4.2 used "$S" and "_$S".
72 // Swift 5 and onward uses "$s" and "_$s".
73 // Swift also uses "@__swiftmacro_" as a prefix for mangling filenames.
74 if (name.startswith("$S") || name.startswith("_$S") ||
75 name.startswith("$s") || name.startswith("_$s") ||
76 name.startswith("@__swiftmacro_"))
78
80}
81
82Mangled::Mangled(ConstString s) : m_mangled(), m_demangled() {
83 if (s)
84 SetValue(s);
85}
86
87Mangled::Mangled(llvm::StringRef name) {
88 if (!name.empty())
89 SetValue(ConstString(name));
90}
91
92// Convert to bool operator. This allows code to check any Mangled objects
93// to see if they contain anything valid using code such as:
94//
95// Mangled mangled(...);
96// if (mangled)
97// { ...
98Mangled::operator bool() const { return m_mangled || m_demangled; }
99
100// Clear the mangled and demangled values.
104}
105
106// Compare the string values.
107int Mangled::Compare(const Mangled &a, const Mangled &b) {
110}
111
113 if (name) {
114 if (cstring_is_mangled(name.GetStringRef())) {
116 m_mangled = name;
117 } else {
118 m_demangled = name;
120 }
121 } else {
124 }
125}
126
127// Local helpers for different demangling implementations.
128static char *GetMSVCDemangledStr(std::string_view M) {
129 char *demangled_cstr = llvm::microsoftDemangle(
130 M, nullptr, nullptr,
131 llvm::MSDemangleFlags(
132 llvm::MSDF_NoAccessSpecifier | llvm::MSDF_NoCallingConvention |
133 llvm::MSDF_NoMemberType | llvm::MSDF_NoVariableType));
134
135 if (Log *log = GetLog(LLDBLog::Demangle)) {
136 if (demangled_cstr && demangled_cstr[0])
137 LLDB_LOGF(log, "demangled msvc: %s -> \"%s\"", M.data(), demangled_cstr);
138 else
139 LLDB_LOGF(log, "demangled msvc: %s -> error", M.data());
140 }
141
142 return demangled_cstr;
143}
144
145static char *GetItaniumDemangledStr(const char *M) {
146 char *demangled_cstr = nullptr;
147
148 llvm::ItaniumPartialDemangler ipd;
149 bool err = ipd.partialDemangle(M);
150 if (!err) {
151 // Default buffer and size (will realloc in case it's too small).
152 size_t demangled_size = 80;
153 demangled_cstr = static_cast<char *>(std::malloc(demangled_size));
154 demangled_cstr = ipd.finishDemangle(demangled_cstr, &demangled_size);
155
156 assert(demangled_cstr &&
157 "finishDemangle must always succeed if partialDemangle did");
158 assert(demangled_cstr[demangled_size - 1] == '\0' &&
159 "Expected demangled_size to return length including trailing null");
160 }
161
162 if (Log *log = GetLog(LLDBLog::Demangle)) {
163 if (demangled_cstr)
164 LLDB_LOGF(log, "demangled itanium: %s -> \"%s\"", M, demangled_cstr);
165 else
166 LLDB_LOGF(log, "demangled itanium: %s -> error: failed to demangle", M);
167 }
168
169 return demangled_cstr;
170}
171
172static char *GetRustV0DemangledStr(std::string_view M) {
173 char *demangled_cstr = llvm::rustDemangle(M);
174
175 if (Log *log = GetLog(LLDBLog::Demangle)) {
176 if (demangled_cstr && demangled_cstr[0])
177 LLDB_LOG(log, "demangled rustv0: {0} -> \"{1}\"", M, demangled_cstr);
178 else
179 LLDB_LOG(log, "demangled rustv0: {0} -> error: failed to demangle", M);
180 }
181
182 return demangled_cstr;
183}
184
185static char *GetDLangDemangledStr(std::string_view M) {
186 char *demangled_cstr = llvm::dlangDemangle(M);
187
188 if (Log *log = GetLog(LLDBLog::Demangle)) {
189 if (demangled_cstr && demangled_cstr[0])
190 LLDB_LOG(log, "demangled dlang: {0} -> \"{1}\"", M, demangled_cstr);
191 else
192 LLDB_LOG(log, "demangled dlang: {0} -> error: failed to demangle", M);
193 }
194
195 return demangled_cstr;
196}
197
198// Explicit demangling for scheduled requests during batch processing. This
199// makes use of ItaniumPartialDemangler's rich demangle info
201 SkipMangledNameFn *skip_mangled_name) {
202 // Others are not meant to arrive here. ObjC names or C's main() for example
203 // have their names stored in m_demangled, while m_mangled is empty.
204 assert(m_mangled);
205
206 // Check whether or not we are interested in this name at all.
208 if (skip_mangled_name && skip_mangled_name(m_mangled.GetStringRef(), scheme))
209 return false;
210
211 switch (scheme) {
213 // The current mangled_name_filter would allow llvm_unreachable here.
214 return false;
215
217 // We want the rich mangling info here, so we don't care whether or not
218 // there is a demangled string in the pool already.
219 return context.FromItaniumName(m_mangled);
220
221 case eManglingSchemeMSVC: {
222 // We have no rich mangling for MSVC-mangled names yet, so first try to
223 // demangle it if necessary.
225 if (char *d = GetMSVCDemangledStr(m_mangled)) {
226 // Without the rich mangling info we have to demangle the full name.
227 // Copy it to string pool and connect the counterparts to accelerate
228 // later access in GetDemangledName().
230 m_mangled);
231 ::free(d);
232 } else {
234 }
235 }
236
237 if (m_demangled.IsEmpty()) {
238 // Cannot demangle it, so don't try parsing.
239 return false;
240 } else {
241 // Demangled successfully, we can try and parse it with
242 // CPlusPlusLanguage::MethodName.
243 return context.FromCxxMethodName(m_demangled);
244 }
245 }
246
248 case eManglingSchemeD:
250 // Rich demangling scheme is not supported
251 return false;
252 }
253 llvm_unreachable("Fully covered switch above!");
254}
255
256// Generate the demangled name on demand using this accessor. Code in this
257// class will need to use this accessor if it wishes to decode the demangled
258// name. The result is cached and will be kept until a new string value is
259// supplied to this object, or until the end of the object's lifetime.
261 // Check to make sure we have a valid mangled name and that we haven't
262 // already decoded our mangled name.
263 if (m_mangled && m_demangled.IsNull()) {
264 // Don't bother running anything that isn't mangled
265 const char *mangled_name = m_mangled.GetCString();
266 ManglingScheme mangling_scheme =
268 if (mangling_scheme != eManglingSchemeNone &&
270 // We didn't already mangle this name, demangle it and if all goes well
271 // add it to our map.
272 char *demangled_name = nullptr;
273 switch (mangling_scheme) {
275 demangled_name = GetMSVCDemangledStr(mangled_name);
276 break;
278 demangled_name = GetItaniumDemangledStr(mangled_name);
279 break;
280 }
282 demangled_name = GetRustV0DemangledStr(m_mangled);
283 break;
284 case eManglingSchemeD:
285 demangled_name = GetDLangDemangledStr(m_mangled);
286 break;
288 // Demangling a swift name requires the swift compiler. This is
289 // explicitly unsupported on llvm.org.
290 break;
292 llvm_unreachable("eManglingSchemeNone was handled already");
293 }
294 if (demangled_name) {
296 llvm::StringRef(demangled_name), m_mangled);
297 free(demangled_name);
298 }
299 }
300 if (m_demangled.IsNull()) {
301 // Set the demangled string to the empty string to indicate we tried to
302 // parse it once and failed.
304 }
305 }
306
307 return m_demangled;
308}
309
311 return GetDemangledName();
312}
313
314bool Mangled::NameMatches(const RegularExpression &regex) const {
315 if (m_mangled && regex.Execute(m_mangled.GetStringRef()))
316 return true;
317
318 ConstString demangled = GetDemangledName();
319 return demangled && regex.Execute(demangled.GetStringRef());
320}
321
322// Get the demangled name if there is one, else return the mangled name.
324 if (preference == ePreferMangled && m_mangled)
325 return m_mangled;
326
327 // Call the accessor to make sure we get a demangled name in case it hasn't
328 // been demangled yet...
329 ConstString demangled = GetDemangledName();
330
331 if (preference == ePreferDemangledWithoutArguments) {
333 return lang->GetDemangledFunctionNameWithoutArguments(*this);
334 }
335 }
336 if (preference == ePreferDemangled) {
337 if (demangled)
338 return demangled;
339 return m_mangled;
340 }
341 return demangled;
342}
343
344// Dump a Mangled object to stream "s". We don't force our demangled name to be
345// computed currently (we don't use the accessor).
346void Mangled::Dump(Stream *s) const {
347 if (m_mangled) {
348 *s << ", mangled = " << m_mangled;
349 }
350 if (m_demangled) {
351 const char *demangled = m_demangled.AsCString();
352 s->Printf(", demangled = %s", demangled[0] ? demangled : "<error>");
353 }
354}
355
356// Dumps a debug version of this string with extra object and state information
357// to stream "s".
359 s->Printf("%*p: Mangled mangled = ", static_cast<int>(sizeof(void *) * 2),
360 static_cast<const void *>(this));
362 s->Printf(", demangled = ");
364}
365
366// Return the size in byte that this object takes in memory. The size includes
367// the size of the objects it owns, and not the strings that it references
368// because they are shared strings.
369size_t Mangled::MemorySize() const {
371}
372
373// We "guess" the language because we can't determine a symbol's language from
374// it's name. For example, a Pascal symbol can be mangled using the C++
375// Itanium scheme, and defined in a compilation unit within the same module as
376// other C++ units. In addition, different targets could have different ways
377// of mangling names from a given language, likewise the compilation units
378// within those targets.
381 // Ask each language plugin to check if the mangled name belongs to it.
382 Language::ForEach([this, &result](Language *l) {
383 if (l->SymbolNameFitsToLanguage(*this)) {
384 result = l->GetLanguageType();
385 return false;
386 }
387 return true;
388 });
389 return result;
390}
391
392// Dump OBJ to the supplied stream S.
393Stream &operator<<(Stream &s, const Mangled &obj) {
394 if (obj.GetMangledName())
395 s << "mangled = '" << obj.GetMangledName() << "'";
396
397 ConstString demangled = obj.GetDemangledName();
398 if (demangled)
399 s << ", demangled = '" << demangled << '\'';
400 else
401 s << ", demangled = <error>";
402 return s;
403}
404
405// When encoding Mangled objects we can get away with encoding as little
406// information as is required. The enumeration below helps us to efficiently
407// encode Mangled objects.
409 /// If the Mangled object has neither a mangled name or demangled name we can
410 /// encode the object with one zero byte using the Empty enumeration.
411 Empty = 0u,
412 /// If the Mangled object has only a demangled name and no mangled named, we
413 /// can encode only the demangled name.
415 /// If the mangle name can calculate the demangled name (it is the
416 /// mangled/demangled counterpart), then we only need to encode the mangled
417 /// name as the demangled name can be recomputed.
419 /// If we have a Mangled object with two different names that are not related
420 /// then we need to save both strings. This can happen if we have a name that
421 /// isn't a true mangled name, but we want to be able to lookup a symbol by
422 /// name and type in the symbol table. We do this for Objective C symbols like
423 /// "OBJC_CLASS_$_NSValue" where the mangled named will be set to
424 /// "OBJC_CLASS_$_NSValue" and the demangled name will be manually set to
425 /// "NSValue". If we tried to demangled the name "OBJC_CLASS_$_NSValue" it
426 /// would fail, but in these cases we want these unrelated names to be
427 /// preserved.
430
431bool Mangled::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
432 const StringTableReader &strtab) {
435 MangledEncoding encoding = (MangledEncoding)data.GetU8(offset_ptr);
436 switch (encoding) {
437 case Empty:
438 return true;
439
440 case DemangledOnly:
441 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
442 return true;
443
444 case MangledOnly:
445 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
446 return true;
447
449 m_mangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
450 m_demangled.SetString(strtab.Get(data.GetU32(offset_ptr)));
451 return true;
452 }
453 return false;
454}
455/// The encoding format for the Mangled object is as follows:
456///
457/// uint8_t encoding;
458/// char str1[]; (only if DemangledOnly, MangledOnly)
459/// char str2[]; (only if MangledAndDemangled)
460///
461/// The strings are stored as NULL terminated UTF8 strings and str1 and str2
462/// are only saved if we need them based on the encoding.
463///
464/// Some mangled names have a mangled name that can be demangled by the built
465/// in demanglers. These kinds of mangled objects know when the mangled and
466/// demangled names are the counterparts for each other. This is done because
467/// demangling is very expensive and avoiding demangling the same name twice
468/// saves us a lot of compute time. For these kinds of names we only need to
469/// save the mangled name and have the encoding set to "MangledOnly".
470///
471/// If a mangled obejct has only a demangled name, then we save only that string
472/// and have the encoding set to "DemangledOnly".
473///
474/// Some mangled objects have both mangled and demangled names, but the
475/// demangled name can not be computed from the mangled name. This is often used
476/// for runtime named, like Objective C runtime V2 and V3 names. Both these
477/// names must be saved and the encoding is set to "MangledAndDemangled".
478///
479/// For a Mangled object with no names, we only need to set the encoding to
480/// "Empty" and not store any string values.
481void Mangled::Encode(DataEncoder &file, ConstStringTable &strtab) const {
482 MangledEncoding encoding = Empty;
483 if (m_mangled) {
484 encoding = MangledOnly;
485 if (m_demangled) {
486 // We have both mangled and demangled names. If the demangled name is the
487 // counterpart of the mangled name, then we only need to save the mangled
488 // named. If they are different, we need to save both.
489 ConstString s;
491 encoding = MangledAndDemangled;
492 }
493 } else if (m_demangled) {
494 encoding = DemangledOnly;
495 }
496 file.AppendU8(encoding);
497 switch (encoding) {
498 case Empty:
499 break;
500 case DemangledOnly:
501 file.AppendU32(strtab.Add(m_demangled));
502 break;
503 case MangledOnly:
504 file.AppendU32(strtab.Add(m_mangled));
505 break;
507 file.AppendU32(strtab.Add(m_mangled));
508 file.AppendU32(strtab.Add(m_demangled));
509 break;
510 }
511}
#define LLDB_LOG(log,...)
The LLDB_LOG* macros defined below are the way to emit log messages.
Definition: Log.h:342
#define LLDB_LOGF(log,...)
Definition: Log.h:349
static char * GetRustV0DemangledStr(std::string_view M)
Definition: Mangled.cpp:172
static char * GetDLangDemangledStr(std::string_view M)
Definition: Mangled.cpp:185
MangledEncoding
Definition: Mangled.cpp:408
@ MangledAndDemangled
If we have a Mangled object with two different names that are not related then we need to save both s...
Definition: Mangled.cpp:428
@ DemangledOnly
If the Mangled object has only a demangled name and no mangled named, we can encode only the demangle...
Definition: Mangled.cpp:414
@ Empty
If the Mangled object has neither a mangled name or demangled name we can encode the object with one ...
Definition: Mangled.cpp:411
@ MangledOnly
If the mangle name can calculate the demangled name (it is the mangled/demangled counterpart),...
Definition: Mangled.cpp:418
static char * GetMSVCDemangledStr(std::string_view M)
Definition: Mangled.cpp:128
static bool cstring_is_mangled(llvm::StringRef s)
Definition: Mangled.cpp:35
static char * GetItaniumDemangledStr(const char *M)
Definition: Mangled.cpp:145
Many cache files require string tables to store data efficiently.
uint32_t Add(ConstString s)
Add a string into the string table.
A uniqued constant string class.
Definition: ConstString.h:40
bool GetMangledCounterpart(ConstString &counterpart) const
Retrieve the mangled or demangled counterpart for a mangled or demangled ConstString.
size_t MemorySize() const
Get the memory cost of this object.
Definition: ConstString.h:386
bool IsNull() const
Test for null string.
Definition: ConstString.h:300
void SetCString(const char *cstr)
Set the C string value.
static int Compare(ConstString lhs, ConstString rhs, const bool case_sensitive=true)
Compare two string objects.
const char * AsCString(const char *value_if_empty=nullptr) const
Get the string value as a C string.
Definition: ConstString.h:182
void DumpDebug(Stream *s) const
Dump the object debug description to a stream.
bool IsEmpty() const
Test for empty string.
Definition: ConstString.h:293
llvm::StringRef GetStringRef() const
Get the string value as a llvm::StringRef.
Definition: ConstString.h:191
void SetString(llvm::StringRef s)
void Clear()
Clear this object's state.
Definition: ConstString.h:221
const char * GetCString() const
Get the string value as a C string.
Definition: ConstString.h:205
void SetStringWithMangledCounterpart(llvm::StringRef demangled, ConstString mangled)
Set the C string value and its mangled counterpart.
An binary data encoding class.
Definition: DataEncoder.h:42
void AppendU32(uint32_t value)
void AppendU8(uint8_t value)
Append a unsigned integer to the end of the owned data.
An data extractor class.
Definition: DataExtractor.h:48
uint32_t GetU32(lldb::offset_t *offset_ptr) const
Extract a uint32_t value from *offset_ptr.
uint8_t GetU8(lldb::offset_t *offset_ptr) const
Extract a uint8_t value from *offset_ptr.
static Language * FindPlugin(lldb::LanguageType language)
Definition: Language.cpp:53
static void ForEach(std::function< bool(Language *)> callback)
Definition: Language.cpp:100
A class that handles mangled names.
Definition: Mangled.h:33
void Encode(DataEncoder &encoder, ConstStringTable &strtab) const
Encode this object into a data encoder object.
Definition: Mangled.cpp:481
bool NameMatches(ConstString name) const
Check if "name" matches either the mangled or demangled name.
Definition: Mangled.h:171
static int Compare(const Mangled &lhs, const Mangled &rhs)
Compare the mangled string values.
Definition: Mangled.cpp:107
@ ePreferDemangledWithoutArguments
Definition: Mangled.h:38
Mangled()=default
Default constructor.
void DumpDebug(Stream *s) const
Dump a debug description of this object to a Stream s.
Definition: Mangled.cpp:358
static Mangled::ManglingScheme GetManglingScheme(llvm::StringRef const name)
Try to identify the mangling scheme used.
Definition: Mangled.cpp:41
size_t MemorySize() const
Get the memory cost of this object.
Definition: Mangled.cpp:369
bool GetRichManglingInfo(RichManglingContext &context, SkipMangledNameFn *skip_mangled_name)
Get rich mangling information.
Definition: Mangled.cpp:200
ConstString GetDemangledName() const
Demangled name get accessor.
Definition: Mangled.cpp:260
lldb::LanguageType GuessLanguage() const
Try to guess the language from the mangling.
Definition: Mangled.cpp:379
bool Decode(const DataExtractor &data, lldb::offset_t *offset_ptr, const StringTableReader &strtab)
Decode a serialized version of this object from data.
Definition: Mangled.cpp:431
bool(llvm::StringRef, ManglingScheme) SkipMangledNameFn
Function signature for filtering mangled names.
Definition: Mangled.h:214
ConstString & GetMangledName()
Mangled name get accessor.
Definition: Mangled.h:145
void SetValue(ConstString name)
Set the string value in this object.
Definition: Mangled.cpp:112
ConstString GetName(NamePreference preference=ePreferDemangled) const
Best name get accessor.
Definition: Mangled.cpp:323
ConstString m_mangled
Mangled member variables.
Definition: Mangled.h:280
ConstString m_demangled
Mutable so we can get it on demand with a const version of this object.
Definition: Mangled.h:281
ConstString GetDisplayDemangledName() const
Display demangled name get accessor.
Definition: Mangled.cpp:310
void Dump(Stream *s) const
Dump a description of this object to a Stream s.
Definition: Mangled.cpp:346
void Clear()
Clear the mangled and demangled values.
Definition: Mangled.cpp:101
bool Execute(llvm::StringRef string, llvm::SmallVectorImpl< llvm::StringRef > *matches=nullptr) const
Execute a regular expression match using the compiled regular expression that is already in this obje...
Uniform wrapper for access to rich mangling information from different providers.
bool FromItaniumName(ConstString mangled)
Use the ItaniumPartialDemangler to obtain rich mangling information from the given mangled name.
bool FromCxxMethodName(ConstString demangled)
Use the legacy language parser implementation to obtain rich mangling information from the given dema...
A stream class that can stream formatted output to a file.
Definition: Stream.h:28
size_t Printf(const char *format,...) __attribute__((format(printf
Output printf formatted output to the stream.
Definition: Stream.cpp:107
Many cache files require string tables to store data efficiently.
llvm::StringRef Get(uint32_t offset) const
A class that represents a running process on the host machine.
Definition: SBAttachInfo.h:14
Log * GetLog(Cat mask)
Retrieve the Log object for the channel associated with the given log enum.
Definition: Log.h:314
Stream & operator<<(Stream &s, const Mangled &obj)
uint64_t offset_t
Definition: lldb-types.h:83
LanguageType
Programming language type.
@ eLanguageTypeUnknown
Unknown or invalid language value.