LLDB mainline
DILParser.cpp
Go to the documentation of this file.
1//===-- DILParser.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// This implements the recursive descent parser for the Data Inspection
8// Language (DIL), and its helper functions, which will eventually underlie the
9// 'frame variable' command. The language that this parser recognizes is
10// described in lldb/docs/dil-expr-lang.ebnf
11//
12//===----------------------------------------------------------------------===//
13
21#include "llvm/ADT/StringRef.h"
22#include "llvm/Support/FormatAdapters.h"
23#include <cstdlib>
24#include <limits.h>
25#include <memory>
26#include <sstream>
27#include <string>
28
29namespace lldb_private::dil {
30
32 const std::string &message, uint32_t loc,
33 uint16_t err_len)
34 : ErrorInfo(make_error_code(std::errc::invalid_argument)) {
36 FileSpec{}, /*line=*/1, static_cast<uint16_t>(loc + 1),
37 err_len, false, /*in_user_input=*/true};
38 std::string rendered_msg =
39 llvm::formatv("<user expression 0>:1:{0}: {1}\n 1 | {2}\n | ^",
40 loc + 1, message, expr);
41 m_detail.source_location = sloc;
43 m_detail.message = message;
44 m_detail.rendered = std::move(rendered_msg);
45}
46
47llvm::Expected<ASTNodeUP>
48DILParser::Parse(llvm::StringRef dil_input_expr, DILLexer lexer,
49 std::shared_ptr<StackFrame> frame_sp,
50 lldb::DynamicValueType use_dynamic, bool use_synthetic,
51 bool fragile_ivar, bool check_ptr_vs_member) {
52 llvm::Error error = llvm::Error::success();
53 DILParser parser(dil_input_expr, lexer, frame_sp, use_dynamic, use_synthetic,
54 fragile_ivar, check_ptr_vs_member, error);
55
56 ASTNodeUP node_up = parser.Run();
57 assert(node_up && "ASTNodeUP must not contain a nullptr");
58
59 if (error)
60 return error;
61
62 return node_up;
63}
64
65DILParser::DILParser(llvm::StringRef dil_input_expr, DILLexer lexer,
66 std::shared_ptr<StackFrame> frame_sp,
67 lldb::DynamicValueType use_dynamic, bool use_synthetic,
68 bool fragile_ivar, bool check_ptr_vs_member,
69 llvm::Error &error)
70 : m_ctx_scope(frame_sp), m_input_expr(dil_input_expr),
71 m_dil_lexer(std::move(lexer)), m_error(error), m_use_dynamic(use_dynamic),
72 m_use_synthetic(use_synthetic), m_fragile_ivar(fragile_ivar),
73 m_check_ptr_vs_member(check_ptr_vs_member) {}
74
77
79
80 return expr;
81}
82
83// Parse an expression.
84//
85// expression:
86// cast_expression
87//
89
90// Parse a cast_expression.
91//
92// cast_expression:
93// unary_expression
94// "(" type_id ")" cast_expression
95
97 if (!CurToken().Is(Token::l_paren))
98 return ParseUnaryExpression();
99
100 // This could be a type cast, try parsing the contents as a type declaration.
101 Token token = CurToken();
102 uint32_t loc = token.GetLocation();
103
104 // Enable lexer backtracking, so that we can rollback in case it's not
105 // actually a type declaration.
106
107 // Start tentative parsing (save token location/idx, for possible rollback).
108 uint32_t save_token_idx = m_dil_lexer.GetCurrentTokenIdx();
109
110 // Consume the token only after enabling the backtracking.
111 m_dil_lexer.Advance();
112
113 // Try parsing the type declaration. If the returned value is not valid,
114 // then we should rollback and try parsing the expression.
115 auto type_id = ParseTypeId();
116 if (type_id) {
117 // Successfully parsed the type declaration. Commit the backtracked
118 // tokens and parse the cast_expression.
119
120 if (!type_id.value().IsValid())
121 return std::make_unique<ErrorNode>();
122
124 m_dil_lexer.Advance();
125 auto rhs = ParseCastExpression();
126 assert(rhs && "ASTNodeUP must not contain a nullptr");
127 return std::make_unique<CastNode>(loc, type_id.value(), std::move(rhs),
129 }
130
131 // Failed to parse the contents of the parentheses as a type declaration.
132 // Rollback the lexer and try parsing it as unary_expression.
133 TentativeParsingRollback(save_token_idx);
134
135 return ParseUnaryExpression();
136}
137
138// Parse an unary_expression.
139//
140// unary_expression:
141// postfix_expression
142// unary_operator cast_expression
143//
144// unary_operator:
145// "&"
146// "*"
147// "+"
148// "-"
149//
151 if (CurToken().IsOneOf(
153 Token token = CurToken();
154 uint32_t loc = token.GetLocation();
155 m_dil_lexer.Advance();
156 auto rhs = ParseCastExpression();
157 assert(rhs && "ASTNodeUP must not contain a nullptr");
158 switch (token.GetKind()) {
159 case Token::star:
160 return std::make_unique<UnaryOpNode>(loc, UnaryOpKind::Deref,
161 std::move(rhs));
162 case Token::amp:
163 return std::make_unique<UnaryOpNode>(loc, UnaryOpKind::AddrOf,
164 std::move(rhs));
165 case Token::minus:
166 return std::make_unique<UnaryOpNode>(loc, UnaryOpKind::Minus,
167 std::move(rhs));
168 case Token::plus:
169 return std::make_unique<UnaryOpNode>(loc, UnaryOpKind::Plus,
170 std::move(rhs));
171 default:
172 llvm_unreachable("invalid token kind");
173 }
174 }
175 return ParsePostfixExpression();
176}
177
178// Parse a postfix_expression.
179//
180// postfix_expression:
181// primary_expression
182// postfix_expression "[" expression "]"
183// postfix_expression "[" expression ":" expression "]"
184// postfix_expression "." id_expression
185// postfix_expression "->" id_expression
186//
189 assert(lhs && "ASTNodeUP must not contain a nullptr");
190 while (CurToken().IsOneOf({Token::l_square, Token::period, Token::arrow})) {
191 uint32_t loc = CurToken().GetLocation();
192 Token token = CurToken();
193 switch (token.GetKind()) {
194 case Token::l_square: {
195 m_dil_lexer.Advance();
196 ASTNodeUP index = ParseExpression();
197 assert(index && "ASTNodeUP must not contain a nullptr");
198 if (CurToken().GetKind() == Token::colon) {
199 m_dil_lexer.Advance();
200 ASTNodeUP last_index = ParseExpression();
201 assert(last_index && "ASTNodeUP must not contain a nullptr");
202 lhs = std::make_unique<BitFieldExtractionNode>(
203 loc, std::move(lhs), std::move(index), std::move(last_index));
204 } else if (CurToken().GetKind() == Token::minus) {
205 BailOut("use of '-' for bitfield range is deprecated; use ':' instead",
206 CurToken().GetLocation(), CurToken().GetSpelling().length());
207 return std::make_unique<ErrorNode>();
208 } else {
209 lhs = std::make_unique<ArraySubscriptNode>(loc, std::move(lhs),
210 std::move(index));
211 }
213 m_dil_lexer.Advance();
214 break;
215 }
216 case Token::period:
217 case Token::arrow: {
218 m_dil_lexer.Advance();
219 Token member_token = CurToken();
220 std::string member_id = ParseIdExpression();
221 lhs = std::make_unique<MemberOfNode>(
222 member_token.GetLocation(), std::move(lhs),
223 token.GetKind() == Token::arrow, member_id);
224 break;
225 }
226 default:
227 llvm_unreachable("invalid token");
228 }
229 }
230
231 return lhs;
232}
233
234// Parse a primary_expression.
235//
236// primary_expression:
237// numeric_literal
238// boolean_literal
239// id_expression
240// "(" expression ")"
241//
244 return ParseNumericLiteral();
245 if (CurToken().IsOneOf({Token::kw_true, Token::kw_false}))
246 return ParseBooleanLiteral();
247 if (CurToken().IsOneOf(
249 // Save the source location for the diagnostics message.
250 uint32_t loc = CurToken().GetLocation();
251 std::string identifier = ParseIdExpression();
252
253 if (!identifier.empty())
254 return std::make_unique<IdentifierNode>(loc, identifier);
255 }
256
257 if (CurToken().Is(Token::l_paren)) {
258 m_dil_lexer.Advance();
259 auto expr = ParseExpression();
261 m_dil_lexer.Advance();
262 return expr;
263 }
264
265 BailOut(llvm::formatv("Unexpected token: {0}", CurToken()),
266 CurToken().GetLocation(), CurToken().GetSpelling().length());
267 return std::make_unique<ErrorNode>();
268}
269
270// Parse nested_name_specifier.
271//
272// nested_name_specifier:
273// type_name "::"
274// namespace_name "::"
275// nested_name_specifier identifier "::"
276//
278 // The first token in nested_name_specifier is always an identifier, or
279 // '(anonymous namespace)'.
280 switch (CurToken().GetKind()) {
281 case Token::l_paren: {
282 // Anonymous namespaces need to be treated specially: They are
283 // represented the the string '(anonymous namespace)', which has a
284 // space in it (throwing off normal parsing) and is not actually
285 // proper C++> Check to see if we're looking at
286 // '(anonymous namespace)::...'
287
288 // Look for all the pieces, in order:
289 // l_paren 'anonymous' 'namespace' r_paren coloncolon
290 if (m_dil_lexer.LookAhead(1).Is(Token::identifier) &&
291 (m_dil_lexer.LookAhead(1).GetSpelling() == "anonymous") &&
292 m_dil_lexer.LookAhead(2).Is(Token::identifier) &&
293 (m_dil_lexer.LookAhead(2).GetSpelling() == "namespace") &&
294 m_dil_lexer.LookAhead(3).Is(Token::r_paren) &&
295 m_dil_lexer.LookAhead(4).Is(Token::coloncolon)) {
296 m_dil_lexer.Advance(4);
297
299 m_dil_lexer.Advance();
300 if (!CurToken().Is(Token::identifier) && !CurToken().Is(Token::l_paren)) {
301 BailOut("Expected an identifier or anonymous namespace, but not found.",
302 CurToken().GetLocation(), CurToken().GetSpelling().length());
303 }
304 // Continue parsing the nested_namespace_specifier.
305 std::string identifier2 = ParseNestedNameSpecifier();
306
307 return "(anonymous namespace)::" + identifier2;
308 }
309
310 return "";
311 } // end of special handling for '(anonymous namespace)'
312 case Token::identifier: {
313 // If the next token is scope ("::"), then this is indeed a
314 // nested_name_specifier
315 if (m_dil_lexer.LookAhead(1).Is(Token::coloncolon)) {
316 // This nested_name_specifier is a single identifier.
317 std::string identifier = CurToken().GetSpelling();
318 m_dil_lexer.Advance(1);
320 m_dil_lexer.Advance();
321 // Continue parsing the nested_name_specifier.
322 return identifier + "::" + ParseNestedNameSpecifier();
323 }
324
325 return "";
326 }
327 default:
328 return "";
329 }
330}
331
332// Parse a type_id.
333//
334// type_id:
335// type_specifier_seq [abstract_declarator]
336//
337// type_specifier_seq:
338// type_specifier [type_specifier]
339//
340// type_specifier:
341// ["::"] [nested_name_specifier] type_name // not handled for now!
342// builtin_typename
343//
344std::optional<CompilerType> DILParser::ParseTypeId() {
345 CompilerType type;
346 // For now only allow builtin types -- will expand add to this later.
347 auto maybe_builtin_type = ParseBuiltinType();
348 if (maybe_builtin_type) {
349 type = *maybe_builtin_type;
350 } else
351 return {};
352
353 //
354 // abstract_declarator:
355 // ptr_operator [abstract_declarator]
356 //
357 std::vector<Token> ptr_operators;
358 while (CurToken().IsOneOf({Token::star, Token::amp})) {
359 Token tok = CurToken();
360 ptr_operators.push_back(std::move(tok));
361 m_dil_lexer.Advance();
362 }
363 type = ResolveTypeDeclarators(type, ptr_operators);
364
365 return type;
366}
367
368// Parse a built-in type
369//
370// builtin_typename:
371// identifer_seq
372//
373// identifier_seq
374// identifer [identifier_seq]
375//
376// A built-in type can be a single identifier or a space-separated
377// list of identifiers (e.g. "short" or "long long").
378std::optional<CompilerType> DILParser::ParseBuiltinType() {
379 std::string type_name = "";
380 uint32_t save_token_idx = m_dil_lexer.GetCurrentTokenIdx();
381 bool first_word = true;
382 while (CurToken().GetKind() == Token::identifier) {
383 if (CurToken().GetSpelling() == "const" ||
384 CurToken().GetSpelling() == "volatile")
385 continue;
386 if (!first_word)
387 type_name.push_back(' ');
388 else
389 first_word = false;
390 type_name.append(CurToken().GetSpelling());
391 m_dil_lexer.Advance();
392 }
393
394 if (type_name.size() > 0) {
395 lldb::TargetSP target_sp = m_ctx_scope->CalculateTarget();
396 ConstString const_type_name(type_name.c_str());
397 for (auto type_system_sp : target_sp->GetScratchTypeSystems())
398 if (auto compiler_type =
399 type_system_sp->GetBuiltinTypeByName(const_type_name))
400 return compiler_type;
401 }
402
403 TentativeParsingRollback(save_token_idx);
404 return {};
405}
406
407// Parse an id_expression.
408//
409// id_expression:
410// unqualified_id
411// qualified_id
412//
413// qualified_id:
414// ["::"] [nested_name_specifier] unqualified_id
415// ["::"] identifier
416//
417// identifier:
418// ? Token::identifier ?
419//
421 // Try parsing optional global scope operator.
422 bool global_scope = false;
423 if (CurToken().Is(Token::coloncolon)) {
424 global_scope = true;
425 m_dil_lexer.Advance();
426 }
427
428 // Try parsing optional nested_name_specifier.
429 std::string nested_name_specifier = ParseNestedNameSpecifier();
430
431 // If nested_name_specifier is present, then it's qualified_id production.
432 // Follow the first production rule.
433 if (!nested_name_specifier.empty()) {
434 // Parse unqualified_id and construct a fully qualified id expression.
435 auto unqualified_id = ParseUnqualifiedId();
436
437 return llvm::formatv("{0}{1}{2}", global_scope ? "::" : "",
438 nested_name_specifier, unqualified_id);
439 }
440
441 if (!CurToken().Is(Token::identifier))
442 return "";
443
444 // No nested_name_specifier, but with global scope -- this is also a
445 // qualified_id production. Follow the second production rule.
446 if (global_scope) {
448 std::string identifier = CurToken().GetSpelling();
449 m_dil_lexer.Advance();
450 return llvm::formatv("{0}{1}", global_scope ? "::" : "", identifier);
451 }
452
453 // This is unqualified_id production.
454 return ParseUnqualifiedId();
455}
456
457// Parse an unqualified_id.
458//
459// unqualified_id:
460// identifier
461//
462// identifier:
463// ? Token::identifier ?
464//
467 std::string identifier = CurToken().GetSpelling();
468 m_dil_lexer.Advance();
469 return identifier;
470}
471
474 const std::vector<Token> &ptr_operators) {
475 // Resolve pointers/references.
476 for (Token tk : ptr_operators) {
477 uint32_t loc = tk.GetLocation();
478 if (tk.GetKind() == Token::star) {
479 // Pointers to reference types are forbidden.
480 if (type.IsReferenceType()) {
481 BailOut(llvm::formatv("'type name' declared as a pointer to a "
482 "reference of type {0}",
483 type.TypeDescription()),
484 loc, CurToken().GetSpelling().length());
485 return {};
486 }
487 // Get pointer type for the base type: e.g. int* -> int**.
488 type = type.GetPointerType();
489
490 } else if (tk.GetKind() == Token::amp) {
491 // References to references are forbidden.
492 // FIXME: In future we may want to allow rvalue references (i.e. &&).
493 if (type.IsReferenceType()) {
494 BailOut("type name declared as a reference to a reference", loc,
495 CurToken().GetSpelling().length());
496 return {};
497 }
498 // Get reference type for the base type: e.g. int -> int&.
499 type = type.GetLValueReferenceType();
500 }
501 }
502
503 return type;
504}
505
506// Parse an boolean_literal.
507//
508// boolean_literal:
509// "true"
510// "false"
511//
513 ExpectOneOf(std::vector<Token::Kind>{Token::kw_true, Token::kw_false});
514 uint32_t loc = CurToken().GetLocation();
515 bool literal_value = CurToken().Is(Token::kw_true);
516 m_dil_lexer.Advance();
517 return std::make_unique<BooleanLiteralNode>(loc, literal_value);
518}
519
520void DILParser::BailOut(const std::string &error, uint32_t loc,
521 uint16_t err_len) {
522 if (m_error)
523 // If error is already set, then the parser is in the "bail-out" mode. Don't
524 // do anything and keep the original error.
525 return;
526
527 m_error =
528 llvm::make_error<DILDiagnosticError>(m_input_expr, error, loc, err_len);
529 // Advance the lexer token index to the end of the lexed tokens vector.
530 m_dil_lexer.ResetTokenIdx(m_dil_lexer.NumLexedTokens() - 1);
531}
532
533// Parse a numeric_literal.
534//
535// numeric_literal:
536// ? Token::integer_constant ?
537// ? Token::floating_constant ?
538//
540 ASTNodeUP numeric_constant;
542 numeric_constant = ParseIntegerLiteral();
543 else
544 numeric_constant = ParseFloatingPointLiteral();
545 if (numeric_constant->GetKind() == NodeKind::eErrorNode) {
546 BailOut(llvm::formatv("Failed to parse token as numeric-constant: {0}",
547 CurToken()),
548 CurToken().GetLocation(), CurToken().GetSpelling().length());
549 return numeric_constant;
550 }
551 m_dil_lexer.Advance();
552 return numeric_constant;
553}
554
556 Token token = CurToken();
557 auto spelling = token.GetSpelling();
558 llvm::StringRef spelling_ref = spelling;
559
560 auto radix = llvm::getAutoSenseRadix(spelling_ref);
562 bool is_unsigned = false;
563 if (spelling_ref.consume_back_insensitive("u"))
564 is_unsigned = true;
565 if (spelling_ref.consume_back_insensitive("ll"))
567 else if (spelling_ref.consume_back_insensitive("l"))
569 // Suffix 'u' can be only specified only once, before or after 'l'
570 if (!is_unsigned && spelling_ref.consume_back_insensitive("u"))
571 is_unsigned = true;
572
573 llvm::APInt raw_value;
574 if (!spelling_ref.getAsInteger(radix, raw_value))
575 return std::make_unique<IntegerLiteralNode>(token.GetLocation(), raw_value,
576 radix, is_unsigned, type);
577 return std::make_unique<ErrorNode>();
578}
579
581 Token token = CurToken();
582 auto spelling = token.GetSpelling();
583 llvm::StringRef spelling_ref = spelling;
584
585 llvm::APFloat raw_float(llvm::APFloat::IEEEdouble());
586 if (spelling_ref.consume_back_insensitive("f"))
587 raw_float = llvm::APFloat(llvm::APFloat::IEEEsingle());
588
589 auto StatusOrErr = raw_float.convertFromString(
590 spelling_ref, llvm::APFloat::rmNearestTiesToEven);
591 if (!errorToBool(StatusOrErr.takeError()))
592 return std::make_unique<FloatLiteralNode>(token.GetLocation(), raw_float);
593 return std::make_unique<ErrorNode>();
594}
595
597 if (CurToken().IsNot(kind)) {
598 BailOut(llvm::formatv("expected {0}, got: {1}", kind, CurToken()),
599 CurToken().GetLocation(), CurToken().GetSpelling().length());
600 }
601}
602
603void DILParser::ExpectOneOf(std::vector<Token::Kind> kinds_vec) {
604 if (!CurToken().IsOneOf(kinds_vec)) {
605 BailOut(llvm::formatv("expected any of ({0}), got: {1}",
606 llvm::iterator_range(kinds_vec), CurToken()),
607 CurToken().GetLocation(), CurToken().GetSpelling().length());
608 }
609}
610
611} // namespace lldb_private::dil
static llvm::raw_ostream & error(Stream &strm)
uint32_t GetKind(uint32_t data)
Return the type kind encoded in the given data.
Generic representation of a type in a programming language.
CompilerType GetPointerType() const
Return a new CompilerType that is a pointer to this type.
CompilerType GetLValueReferenceType() const
Return a new CompilerType that is a L value reference to this type if this type is valid and the type...
bool IsReferenceType(CompilerType *pointee_type=nullptr, bool *is_rvalue=nullptr) const
A uniqued constant string class.
Definition ConstString.h:40
A file utility class.
Definition FileSpec.h:57
DILDiagnosticError(DiagnosticDetail detail)
Definition DILParser.h:41
std::string message() const override
Definition DILParser.h:56
Class for doing the simple lexing required by DIL.
Definition DILLexer.h:73
void Expect(Token::Kind kind)
static llvm::Expected< ASTNodeUP > Parse(llvm::StringRef dil_input_expr, DILLexer lexer, std::shared_ptr< StackFrame > frame_sp, lldb::DynamicValueType use_dynamic, bool use_synthetic, bool fragile_ivar, bool check_ptr_vs_member)
Definition DILParser.cpp:48
std::optional< CompilerType > ParseTypeId()
void TentativeParsingRollback(uint32_t saved_idx)
Definition DILParser.h:115
ASTNodeUP ParseFloatingPointLiteral()
DILParser(llvm::StringRef dil_input_expr, DILLexer lexer, std::shared_ptr< StackFrame > frame_sp, lldb::DynamicValueType use_dynamic, bool use_synthetic, bool fragile_ivar, bool check_ptr_vs_member, llvm::Error &error)
Definition DILParser.cpp:65
void ExpectOneOf(std::vector< Token::Kind > kinds_vec)
std::optional< CompilerType > ParseBuiltinType()
std::shared_ptr< StackFrame > m_ctx_scope
Definition DILParser.h:126
void BailOut(const std::string &error, uint32_t loc, uint16_t err_len)
CompilerType ResolveTypeDeclarators(CompilerType type, const std::vector< Token > &ptr_operators)
lldb::DynamicValueType m_use_dynamic
Definition DILParser.h:135
llvm::StringRef m_input_expr
Definition DILParser.h:128
std::string ParseNestedNameSpecifier()
Class defining the tokens generated by the DIL lexer and used by the DIL parser.
Definition DILLexer.h:24
bool Is(Kind kind) const
Definition DILLexer.h:54
uint32_t GetLocation() const
Definition DILLexer.h:62
Kind GetKind() const
Definition DILLexer.h:50
std::string GetSpelling() const
Definition DILLexer.h:52
@ eNone
Invalid promotion type (results in error).
Definition DILAST.h:46
std::unique_ptr< ASTNode > ASTNodeUP
Definition DILAST.h:79
std::shared_ptr< lldb_private::Target > TargetSP
A source location consisting of a file name and position.