LLDB mainline
CPlusPlusNameParser.cpp
Go to the documentation of this file.
1//===-- CPlusPlusNameParser.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10
11#include "clang/Basic/IdentifierTable.h"
12#include "clang/Basic/TokenKinds.h"
13#include "llvm/ADT/StringMap.h"
14#include "llvm/Support/Threading.h"
15#include <optional>
16
17using namespace lldb;
18using namespace lldb_private;
21namespace tok = clang::tok;
22
23std::optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
25 std::optional<ParsedFunction> result(std::nullopt);
26
27 // Try to parse the name as function without a return type specified e.g.
28 // main(int, char*[])
29 {
30 Bookmark start_position = SetBookmark();
31 result = ParseFunctionImpl(false);
32 if (result && !HasMoreTokens())
33 return result;
34 }
35
36 // Try to parse the name as function with function pointer return type e.g.
37 // void (*get_func(const char*))()
38 result = ParseFuncPtr(true);
39 if (result)
40 return result;
41
42 // Finally try to parse the name as a function with non-function return type
43 // e.g. int main(int, char*[])
44 result = ParseFunctionImpl(true);
45 if (HasMoreTokens())
46 return std::nullopt;
47 return result;
48}
49
50std::optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
52 std::optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
53 if (!name_ranges)
54 return std::nullopt;
55 if (HasMoreTokens())
56 return std::nullopt;
57 ParsedName result;
58 result.basename = GetTextForRange(name_ranges->basename_range);
59 result.context = GetTextForRange(name_ranges->context_range);
60 return result;
61}
62
64 return m_next_token_index < m_tokens.size();
65}
66
68
70
71bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
72 if (!HasMoreTokens())
73 return false;
74
75 if (!Peek().is(kind))
76 return false;
77
78 Advance();
79 return true;
80}
81
82template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
83 if (!HasMoreTokens())
84 return false;
85
86 if (!Peek().isOneOf(kinds...))
87 return false;
88
89 Advance();
90 return true;
91}
92
95}
96
98
100 assert(HasMoreTokens());
102}
103
104std::optional<ParsedFunction>
106 Bookmark start_position = SetBookmark();
107
108 ParsedFunction result;
109 if (expect_return_type) {
110 size_t return_start = GetCurrentPosition();
111 // Consume return type if it's expected.
112 if (!ConsumeToken(tok::kw_auto) && !ConsumeTypename())
113 return std::nullopt;
114
115 size_t return_end = GetCurrentPosition();
116 result.return_type = GetTextForRange(Range(return_start, return_end));
117 }
118
119 auto maybe_name = ParseFullNameImpl();
120 if (!maybe_name) {
121 return std::nullopt;
122 }
123
124 size_t argument_start = GetCurrentPosition();
125 if (!ConsumeArguments()) {
126 return std::nullopt;
127 }
128
129 size_t qualifiers_start = GetCurrentPosition();
131 size_t end_position = GetCurrentPosition();
132
133 result.name.basename = GetTextForRange(maybe_name->basename_range);
134 result.name.context = GetTextForRange(maybe_name->context_range);
135 result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
136 result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
137 start_position.Remove();
138 return result;
139}
140
141std::optional<ParsedFunction>
142CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
143 // This function parses a function definition
144 // that returns a pointer type.
145 // E.g., double (*(*func(long))(int))(float)
146
147 // Step 1:
148 // Remove the return type of the innermost
149 // function pointer type.
150 //
151 // Leaves us with:
152 // (*(*func(long))(int))(float)
153 Bookmark start_position = SetBookmark();
154 if (expect_return_type) {
155 // Consume return type.
156 if (!ConsumeTypename())
157 return std::nullopt;
158 }
159
160 // Step 2:
161 //
162 // Skip a pointer and parenthesis pair.
163 //
164 // Leaves us with:
165 // (*func(long))(int))(float)
166 if (!ConsumeToken(tok::l_paren))
167 return std::nullopt;
168 if (!ConsumePtrsAndRefs())
169 return std::nullopt;
170
171 // Step 3:
172 //
173 // Consume inner function name. This will fail unless
174 // we stripped all the pointers on the left hand side
175 // of the function name.
176 {
177 Bookmark before_inner_function_pos = SetBookmark();
178 auto maybe_inner_function_name = ParseFunctionImpl(false);
179 if (maybe_inner_function_name)
180 if (ConsumeToken(tok::r_paren))
181 if (ConsumeArguments()) {
183 start_position.Remove();
184 before_inner_function_pos.Remove();
185 return maybe_inner_function_name;
186 }
187 }
188
189 // Step 4:
190 //
191 // Parse the remaining string as a function pointer again.
192 // This time don't consume the inner-most typename since
193 // we're left with pointers only. This will strip another
194 // layer of pointers until we're left with the innermost
195 // function name/argument. I.e., func(long))(int))(float)
196 //
197 // Once we successfully stripped all pointers and gotten
198 // the innermost function name from ParseFunctionImpl above,
199 // we consume a single ')' and the arguments '(...)' that follows.
200 //
201 // Leaves us with:
202 // )(float)
203 //
204 // This is the remnant of the outer function pointers' arguments.
205 // Unwinding the recursive calls will remove the remaining
206 // arguments.
207 auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
208 if (maybe_inner_function_ptr_name)
209 if (ConsumeToken(tok::r_paren))
210 if (ConsumeArguments()) {
212 start_position.Remove();
213 return maybe_inner_function_ptr_name;
214 }
215
216 return std::nullopt;
217}
218
220 return ConsumeBrackets(tok::l_paren, tok::r_paren);
221}
222
224 Bookmark start_position = SetBookmark();
225 if (!HasMoreTokens() || Peek().getKind() != tok::less)
226 return false;
227 Advance();
228
229 // Consuming template arguments is a bit trickier than consuming function
230 // arguments, because '<' '>' brackets are not always trivially balanced. In
231 // some rare cases tokens '<' and '>' can appear inside template arguments as
232 // arithmetic or shift operators not as template brackets. Examples:
233 // std::enable_if<(10u)<(64), bool>
234 // f<A<operator<(X,Y)::Subclass>>
235 // Good thing that compiler makes sure that really ambiguous cases of '>'
236 // usage should be enclosed within '()' brackets.
237 int template_counter = 1;
238 bool can_open_template = false;
239 while (HasMoreTokens() && template_counter > 0) {
240 tok::TokenKind kind = Peek().getKind();
241 switch (kind) {
242 case tok::greatergreater:
243 template_counter -= 2;
244 can_open_template = false;
245 Advance();
246 break;
247 case tok::greater:
248 --template_counter;
249 can_open_template = false;
250 Advance();
251 break;
252 case tok::less:
253 // '<' is an attempt to open a subteamplte
254 // check if parser is at the point where it's actually possible,
255 // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No
256 // need to do the same for '>' because compiler actually makes sure that
257 // '>' always surrounded by brackets to avoid ambiguity.
258 if (can_open_template)
259 ++template_counter;
260 can_open_template = false;
261 Advance();
262 break;
263 case tok::kw_operator: // C++ operator overloading.
264 if (!ConsumeOperator())
265 return false;
266 can_open_template = true;
267 break;
268 case tok::raw_identifier:
269 can_open_template = true;
270 Advance();
271 break;
272 case tok::l_square:
273 // Handle templates tagged with an ABI tag.
274 // An example demangled/prettified version is:
275 // func[abi:tag1][abi:tag2]<type[abi:tag3]>(int)
276 if (ConsumeAbiTag())
277 can_open_template = true;
278 else if (ConsumeBrackets(tok::l_square, tok::r_square))
279 can_open_template = false;
280 else
281 return false;
282 break;
283 case tok::l_paren:
284 if (!ConsumeArguments())
285 return false;
286 can_open_template = false;
287 break;
288 default:
289 can_open_template = false;
290 Advance();
291 break;
292 }
293 }
294
295 if (template_counter != 0) {
296 return false;
297 }
298 start_position.Remove();
299 return true;
300}
301
303 Bookmark start_position = SetBookmark();
304 if (!ConsumeToken(tok::l_square))
305 return false;
306
307 if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
308 Peek().getRawIdentifier() == "abi")
309 Advance();
310 else
311 return false;
312
313 if (!ConsumeToken(tok::colon))
314 return false;
315
316 // Consume the actual tag string (and allow some special characters)
317 while (ConsumeToken(tok::raw_identifier, tok::comma, tok::period,
318 tok::numeric_constant))
319 ;
320
321 if (!ConsumeToken(tok::r_square))
322 return false;
323
324 start_position.Remove();
325 return true;
326}
327
329 Bookmark start_position = SetBookmark();
330 if (!ConsumeToken(tok::l_paren)) {
331 return false;
332 }
333 constexpr llvm::StringLiteral g_anonymous("anonymous");
334 if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
335 Peek().getRawIdentifier() == g_anonymous) {
336 Advance();
337 } else {
338 return false;
339 }
340
341 if (!ConsumeToken(tok::kw_namespace)) {
342 return false;
343 }
344
345 if (!ConsumeToken(tok::r_paren)) {
346 return false;
347 }
348 start_position.Remove();
349 return true;
350}
351
353 Bookmark start_position = SetBookmark();
354 if (!ConsumeToken(tok::l_brace)) {
355 return false;
356 }
357 constexpr llvm::StringLiteral g_lambda("lambda");
358 if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
359 Peek().getRawIdentifier() == g_lambda) {
360 // Put the matched brace back so we can use ConsumeBrackets
361 TakeBack();
362 } else {
363 return false;
364 }
365
366 if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
367 return false;
368 }
369
370 start_position.Remove();
371 return true;
372}
373
375 tok::TokenKind right) {
376 Bookmark start_position = SetBookmark();
377 if (!HasMoreTokens() || Peek().getKind() != left)
378 return false;
379 Advance();
380
381 int counter = 1;
382 while (HasMoreTokens() && counter > 0) {
383 tok::TokenKind kind = Peek().getKind();
384 if (kind == right)
385 --counter;
386 else if (kind == left)
387 ++counter;
388 Advance();
389 }
390
391 assert(counter >= 0);
392 if (counter > 0) {
393 return false;
394 }
395 start_position.Remove();
396 return true;
397}
398
400 Bookmark start_position = SetBookmark();
401 if (!ConsumeToken(tok::kw_operator))
402 return false;
403
404 if (!HasMoreTokens()) {
405 return false;
406 }
407
408 const auto &token = Peek();
409
410 // When clang generates debug info it adds template parameters to names.
411 // Since clang doesn't add a space between the name and the template parameter
412 // in some cases we are not generating valid C++ names e.g.:
413 //
414 // operator<<A::B>
415 //
416 // In some of these cases we will not parse them correctly. This fixes the
417 // issue by detecting this case and inserting tok::less in place of
418 // tok::lessless and returning successfully that we consumed the operator.
419 if (token.getKind() == tok::lessless) {
420 // Make sure we have more tokens before attempting to look ahead one more.
421 if (m_next_token_index + 1 < m_tokens.size()) {
422 // Look ahead two tokens.
423 clang::Token n_token = m_tokens[m_next_token_index + 1];
424 // If we find ( or < then this is indeed operator<< no need for fix.
425 if (n_token.getKind() != tok::l_paren && n_token.getKind() != tok::less) {
426 clang::Token tmp_tok;
427 tmp_tok.startToken();
428 tmp_tok.setLength(1);
429 tmp_tok.setLocation(token.getLocation().getLocWithOffset(1));
430 tmp_tok.setKind(tok::less);
431
432 m_tokens[m_next_token_index] = tmp_tok;
433
434 start_position.Remove();
435 return true;
436 }
437 }
438 }
439
440 switch (token.getKind()) {
441 case tok::kw_new:
442 case tok::kw_delete:
443 // This is 'new' or 'delete' operators.
444 Advance();
445 // Check for array new/delete.
446 if (HasMoreTokens() && Peek().is(tok::l_square)) {
447 // Consume the '[' and ']'.
448 if (!ConsumeBrackets(tok::l_square, tok::r_square))
449 return false;
450 }
451 break;
452
453#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \
454 case tok::Token: \
455 Advance(); \
456 break;
457#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
458#include "clang/Basic/OperatorKinds.def"
459#undef OVERLOADED_OPERATOR
460#undef OVERLOADED_OPERATOR_MULTI
461
462 case tok::l_paren:
463 // Call operator consume '(' ... ')'.
464 if (ConsumeBrackets(tok::l_paren, tok::r_paren))
465 break;
466 return false;
467
468 case tok::l_square:
469 // This is a [] operator.
470 // Consume the '[' and ']'.
471 if (ConsumeBrackets(tok::l_square, tok::r_square))
472 break;
473 return false;
474
475 default:
476 // This might be a cast operator.
477 if (ConsumeTypename())
478 break;
479 return false;
480 }
481 start_position.Remove();
482 return true;
483}
484
486 while (ConsumeToken(tok::kw_const, tok::kw_volatile))
487 ;
488}
489
491 while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
492 ;
493}
494
496 bool result = false;
497 bool continue_parsing = true;
498 // Built-in types can be made of a few keywords like 'unsigned long long
499 // int'. This function consumes all built-in type keywords without checking
500 // if they make sense like 'unsigned char void'.
501 while (continue_parsing && HasMoreTokens()) {
502 switch (Peek().getKind()) {
503 case tok::kw_short:
504 case tok::kw_long:
505 case tok::kw___int64:
506 case tok::kw___int128:
507 case tok::kw_signed:
508 case tok::kw_unsigned:
509 case tok::kw_void:
510 case tok::kw_char:
511 case tok::kw_int:
512 case tok::kw_half:
513 case tok::kw_float:
514 case tok::kw_double:
515 case tok::kw___float128:
516 case tok::kw_wchar_t:
517 case tok::kw_bool:
518 case tok::kw_char16_t:
519 case tok::kw_char32_t:
520 result = true;
521 Advance();
522 break;
523 default:
524 continue_parsing = false;
525 break;
526 }
527 }
528 return result;
529}
530
532 // Ignoring result.
534}
535
537 bool found = false;
539 while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
540 tok::kw_volatile)) {
541 found = true;
543 }
544 return found;
545}
546
548 Bookmark start_position = SetBookmark();
549 if (!ConsumeToken(tok::kw_decltype))
550 return false;
551
552 if (!ConsumeArguments())
553 return false;
554
555 start_position.Remove();
556 return true;
557}
558
560 Bookmark start_position = SetBookmark();
562 if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
563 if (!ParseFullNameImpl())
564 return false;
565 }
567 start_position.Remove();
568 return true;
569}
570
571std::optional<CPlusPlusNameParser::ParsedNameRanges>
573 // Name parsing state machine.
574 enum class State {
575 Beginning, // start of the name
576 AfterTwoColons, // right after ::
577 AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
578 AfterTemplate, // right after template brackets (<something>)
579 AfterOperator, // right after name of C++ operator
580 };
581
582 Bookmark start_position = SetBookmark();
583 State state = State::Beginning;
584 bool continue_parsing = true;
585 std::optional<size_t> last_coloncolon_position;
586
587 while (continue_parsing && HasMoreTokens()) {
588 const auto &token = Peek();
589 switch (token.getKind()) {
590 case tok::raw_identifier: // Just a name.
591 if (state != State::Beginning && state != State::AfterTwoColons) {
592 continue_parsing = false;
593 break;
594 }
595 Advance();
596 state = State::AfterIdentifier;
597 break;
598 case tok::l_square: {
599 // Handles types or functions that were tagged
600 // with, e.g.,
601 // [[gnu::abi_tag("tag1","tag2")]] func()
602 // and demangled/prettified into:
603 // func[abi:tag1][abi:tag2]()
604
605 // ABI tags only appear after a method or type name
606 const bool valid_state =
607 state == State::AfterIdentifier || state == State::AfterOperator;
608 if (!valid_state || !ConsumeAbiTag()) {
609 continue_parsing = false;
610 }
611
612 break;
613 }
614 case tok::l_paren: {
615 if (state == State::Beginning || state == State::AfterTwoColons) {
616 // (anonymous namespace)
618 state = State::AfterIdentifier;
619 break;
620 }
621 }
622
623 // Type declared inside a function 'func()::Type'
624 if (state != State::AfterIdentifier && state != State::AfterTemplate &&
625 state != State::AfterOperator) {
626 continue_parsing = false;
627 break;
628 }
629 Bookmark l_paren_position = SetBookmark();
630 // Consume the '(' ... ') [const]'.
631 if (!ConsumeArguments()) {
632 continue_parsing = false;
633 break;
634 }
636
637 // Consume '::'
638 size_t coloncolon_position = GetCurrentPosition();
639 if (!ConsumeToken(tok::coloncolon)) {
640 continue_parsing = false;
641 break;
642 }
643 l_paren_position.Remove();
644 last_coloncolon_position = coloncolon_position;
645 state = State::AfterTwoColons;
646 break;
647 }
648 case tok::l_brace:
649 if (state == State::Beginning || state == State::AfterTwoColons) {
650 if (ConsumeLambda()) {
651 state = State::AfterIdentifier;
652 break;
653 }
654 }
655 continue_parsing = false;
656 break;
657 case tok::coloncolon: // Type nesting delimiter.
658 if (state != State::Beginning && state != State::AfterIdentifier &&
659 state != State::AfterTemplate) {
660 continue_parsing = false;
661 break;
662 }
663 last_coloncolon_position = GetCurrentPosition();
664 Advance();
665 state = State::AfterTwoColons;
666 break;
667 case tok::less: // Template brackets.
668 if (state != State::AfterIdentifier && state != State::AfterOperator) {
669 continue_parsing = false;
670 break;
671 }
672 if (!ConsumeTemplateArgs()) {
673 continue_parsing = false;
674 break;
675 }
676 state = State::AfterTemplate;
677 break;
678 case tok::kw_operator: // C++ operator overloading.
679 if (state != State::Beginning && state != State::AfterTwoColons) {
680 continue_parsing = false;
681 break;
682 }
683 if (!ConsumeOperator()) {
684 continue_parsing = false;
685 break;
686 }
687 state = State::AfterOperator;
688 break;
689 case tok::tilde: // Destructor.
690 if (state != State::Beginning && state != State::AfterTwoColons) {
691 continue_parsing = false;
692 break;
693 }
694 Advance();
695 if (ConsumeToken(tok::raw_identifier)) {
696 state = State::AfterIdentifier;
697 } else {
698 TakeBack();
699 continue_parsing = false;
700 }
701 break;
702 default:
703 continue_parsing = false;
704 break;
705 }
706 }
707
708 if (state == State::AfterIdentifier || state == State::AfterOperator ||
709 state == State::AfterTemplate) {
710 ParsedNameRanges result;
711 if (last_coloncolon_position) {
712 result.context_range =
713 Range(start_position.GetSavedPosition(), *last_coloncolon_position);
714 result.basename_range =
715 Range(*last_coloncolon_position + 1, GetCurrentPosition());
716 } else {
717 result.basename_range =
718 Range(start_position.GetSavedPosition(), GetCurrentPosition());
719 }
720 start_position.Remove();
721 return result;
722 } else {
723 return std::nullopt;
724 }
725}
726
727llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
728 if (range.empty())
729 return llvm::StringRef();
730 assert(range.begin_index < range.end_index);
731 assert(range.begin_index < m_tokens.size());
732 assert(range.end_index <= m_tokens.size());
733 clang::Token &first_token = m_tokens[range.begin_index];
734 clang::Token &last_token = m_tokens[range.end_index - 1];
735 clang::SourceLocation start_loc = first_token.getLocation();
736 clang::SourceLocation end_loc = last_token.getLocation();
737 unsigned start_pos = start_loc.getRawEncoding();
738 unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
739 return m_text.take_front(end_pos).drop_front(start_pos);
740}
741
742static const clang::LangOptions &GetLangOptions() {
743 static clang::LangOptions g_options;
744 static llvm::once_flag g_once_flag;
745 llvm::call_once(g_once_flag, []() {
746 g_options.LineComment = true;
747 g_options.C99 = true;
748 g_options.C11 = true;
749 g_options.CPlusPlus = true;
750 g_options.CPlusPlus11 = true;
751 g_options.CPlusPlus14 = true;
752 g_options.CPlusPlus17 = true;
753 g_options.CPlusPlus20 = true;
754 });
755 return g_options;
756}
757
758static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
759 static llvm::StringMap<tok::TokenKind> g_map{
760#define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
761#include "clang/Basic/TokenKinds.def"
762#undef KEYWORD
763 };
764 return g_map;
765}
766
768 if (m_text.empty())
769 return;
770 clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
771 m_text.data(), m_text.data() + m_text.size());
772 const auto &kw_map = GetKeywordsMap();
773 clang::Token token;
774 for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
775 lexer.LexFromRawLexer(token)) {
776 if (token.is(clang::tok::raw_identifier)) {
777 auto it = kw_map.find(token.getRawIdentifier());
778 if (it != kw_map.end()) {
779 token.setKind(it->getValue());
780 }
781 }
782
783 m_tokens.push_back(token);
784 }
785}
static const clang::LangOptions & GetLangOptions()
static const llvm::StringMap< tok::TokenKind > & GetKeywordsMap()
std::optional< ParsedNameRanges > ParseFullNameImpl()
bool ConsumeToken(clang::tok::TokenKind kind)
std::optional< ParsedFunction > ParseFunctionImpl(bool expect_return_type)
std::optional< ParsedFunction > ParseAsFunctionDefinition()
std::optional< ParsedName > ParseAsFullName()
bool ConsumeAbiTag()
Consumes ABI tags enclosed within '[abi:' ... ']'.
bool ConsumeBrackets(clang::tok::TokenKind left, clang::tok::TokenKind right)
llvm::StringRef GetTextForRange(const Range &range)
std::optional< ParsedFunction > ParseFuncPtr(bool expect_return_type)
llvm::SmallVector< clang::Token, 30 > m_tokens
A class that represents a running process on the host machine.
Definition: SBAddress.h:15