LLDB mainline
DILParser.cpp
Go to the documentation of this file.
1//===-- DILParser.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// This implements the recursive descent parser for the Data Inspection
8// Language (DIL), and its helper functions, which will eventually underlie the
9// 'frame variable' command. The language that this parser recognizes is
10// described in lldb/docs/dil-expr-lang.ebnf
11//
12//===----------------------------------------------------------------------===//
13
21#include "llvm/ADT/StringRef.h"
22#include "llvm/Support/FormatAdapters.h"
23#include <cstdlib>
24#include <limits.h>
25#include <memory>
26#include <sstream>
27#include <string>
28
29namespace lldb_private::dil {
30
32 const std::string &message, uint32_t loc,
33 uint16_t err_len)
34 : ErrorInfo(make_error_code(std::errc::invalid_argument)) {
36 FileSpec{}, /*line=*/1, static_cast<uint16_t>(loc + 1),
37 err_len, false, /*in_user_input=*/true};
38 std::string rendered_msg =
39 llvm::formatv("<user expression 0>:1:{0}: {1}\n 1 | {2}\n | ^",
40 loc + 1, message, expr);
41 m_detail.source_location = sloc;
43 m_detail.message = message;
44 m_detail.rendered = std::move(rendered_msg);
45}
46
47llvm::Expected<ASTNodeUP>
48DILParser::Parse(llvm::StringRef dil_input_expr, DILLexer lexer,
49 std::shared_ptr<StackFrame> frame_sp,
50 lldb::DynamicValueType use_dynamic, bool use_synthetic,
51 bool fragile_ivar, bool check_ptr_vs_member) {
52 llvm::Error error = llvm::Error::success();
53 DILParser parser(dil_input_expr, lexer, frame_sp, use_dynamic, use_synthetic,
54 fragile_ivar, check_ptr_vs_member, error);
55
56 ASTNodeUP node_up = parser.Run();
57
58 if (error)
59 return error;
60
61 return node_up;
62}
63
64DILParser::DILParser(llvm::StringRef dil_input_expr, DILLexer lexer,
65 std::shared_ptr<StackFrame> frame_sp,
66 lldb::DynamicValueType use_dynamic, bool use_synthetic,
67 bool fragile_ivar, bool check_ptr_vs_member,
68 llvm::Error &error)
69 : m_ctx_scope(frame_sp), m_input_expr(dil_input_expr),
70 m_dil_lexer(std::move(lexer)), m_error(error), m_use_dynamic(use_dynamic),
71 m_use_synthetic(use_synthetic), m_fragile_ivar(fragile_ivar),
72 m_check_ptr_vs_member(check_ptr_vs_member) {}
73
76
78
79 return expr;
80}
81
82// Parse an expression.
83//
84// expression:
85// cast_expression
86//
88
89// Parse a cast_expression.
90//
91// cast_expression:
92// unary_expression
93// "(" type_id ")" cast_expression
94
96 if (!CurToken().Is(Token::l_paren))
97 return ParseUnaryExpression();
98
99 // This could be a type cast, try parsing the contents as a type declaration.
100 Token token = CurToken();
101 uint32_t loc = token.GetLocation();
102
103 // Enable lexer backtracking, so that we can rollback in case it's not
104 // actually a type declaration.
105
106 // Start tentative parsing (save token location/idx, for possible rollback).
107 uint32_t save_token_idx = m_dil_lexer.GetCurrentTokenIdx();
108
109 // Consume the token only after enabling the backtracking.
110 m_dil_lexer.Advance();
111
112 // Try parsing the type declaration. If the returned value is not valid,
113 // then we should rollback and try parsing the expression.
114 auto type_id = ParseTypeId();
115 if (type_id) {
116 // Successfully parsed the type declaration. Commit the backtracked
117 // tokens and parse the cast_expression.
118
119 if (!type_id.value().IsValid())
120 return std::make_unique<ErrorNode>();
121
123 m_dil_lexer.Advance();
124 auto rhs = ParseCastExpression();
125
126 return std::make_unique<CastNode>(loc, type_id.value(), std::move(rhs),
128 }
129
130 // Failed to parse the contents of the parentheses as a type declaration.
131 // Rollback the lexer and try parsing it as unary_expression.
132 TentativeParsingRollback(save_token_idx);
133
134 return ParseUnaryExpression();
135}
136
137// Parse an unary_expression.
138//
139// unary_expression:
140// postfix_expression
141// unary_operator cast_expression
142//
143// unary_operator:
144// "&"
145// "*"
146// "+"
147// "-"
148//
150 if (CurToken().IsOneOf(
152 Token token = CurToken();
153 uint32_t loc = token.GetLocation();
154 m_dil_lexer.Advance();
155 auto rhs = ParseCastExpression();
156 switch (token.GetKind()) {
157 case Token::star:
158 return std::make_unique<UnaryOpNode>(loc, UnaryOpKind::Deref,
159 std::move(rhs));
160 case Token::amp:
161 return std::make_unique<UnaryOpNode>(loc, UnaryOpKind::AddrOf,
162 std::move(rhs));
163 case Token::minus:
164 return std::make_unique<UnaryOpNode>(loc, UnaryOpKind::Minus,
165 std::move(rhs));
166 case Token::plus:
167 return std::make_unique<UnaryOpNode>(loc, UnaryOpKind::Plus,
168 std::move(rhs));
169 default:
170 llvm_unreachable("invalid token kind");
171 }
172 }
173 return ParsePostfixExpression();
174}
175
176// Parse a postfix_expression.
177//
178// postfix_expression:
179// primary_expression
180// postfix_expression "[" integer_literal "]"
181// postfix_expression "[" integer_literal "-" integer_literal "]"
182// postfix_expression "." id_expression
183// postfix_expression "->" id_expression
184//
187 while (CurToken().IsOneOf({Token::l_square, Token::period, Token::arrow})) {
188 uint32_t loc = CurToken().GetLocation();
189 Token token = CurToken();
190 switch (token.GetKind()) {
191 case Token::l_square: {
192 m_dil_lexer.Advance();
193 std::optional<int64_t> index = ParseIntegerConstant();
194 if (!index) {
195 BailOut(
196 llvm::formatv("failed to parse integer constant: {0}", CurToken()),
197 CurToken().GetLocation(), CurToken().GetSpelling().length());
198 return std::make_unique<ErrorNode>();
199 }
200 if (CurToken().GetKind() == Token::minus) {
201 m_dil_lexer.Advance();
202 std::optional<int64_t> last_index = ParseIntegerConstant();
203 if (!last_index) {
204 BailOut(llvm::formatv("failed to parse integer constant: {0}",
205 CurToken()),
206 CurToken().GetLocation(), CurToken().GetSpelling().length());
207 return std::make_unique<ErrorNode>();
208 }
209 lhs = std::make_unique<BitFieldExtractionNode>(
210 loc, std::move(lhs), std::move(*index), std::move(*last_index));
211 } else {
212 lhs = std::make_unique<ArraySubscriptNode>(loc, std::move(lhs),
213 std::move(*index));
214 }
216 m_dil_lexer.Advance();
217 break;
218 }
219 case Token::period:
220 case Token::arrow: {
221 m_dil_lexer.Advance();
222 Token member_token = CurToken();
223 std::string member_id = ParseIdExpression();
224 lhs = std::make_unique<MemberOfNode>(
225 member_token.GetLocation(), std::move(lhs),
226 token.GetKind() == Token::arrow, member_id);
227 break;
228 }
229 default:
230 llvm_unreachable("invalid token");
231 }
232 }
233
234 return lhs;
235}
236
237// Parse a primary_expression.
238//
239// primary_expression:
240// numeric_literal
241// boolean_literal
242// id_expression
243// "(" expression ")"
244//
247 return ParseNumericLiteral();
248 if (CurToken().IsOneOf({Token::kw_true, Token::kw_false}))
249 return ParseBooleanLiteral();
250 if (CurToken().IsOneOf(
252 // Save the source location for the diagnostics message.
253 uint32_t loc = CurToken().GetLocation();
254 std::string identifier = ParseIdExpression();
255
256 if (!identifier.empty())
257 return std::make_unique<IdentifierNode>(loc, identifier);
258 }
259
260 if (CurToken().Is(Token::l_paren)) {
261 m_dil_lexer.Advance();
262 auto expr = ParseExpression();
264 m_dil_lexer.Advance();
265 return expr;
266 }
267
268 BailOut(llvm::formatv("Unexpected token: {0}", CurToken()),
269 CurToken().GetLocation(), CurToken().GetSpelling().length());
270 return std::make_unique<ErrorNode>();
271}
272
273// Parse nested_name_specifier.
274//
275// nested_name_specifier:
276// type_name "::"
277// namespace_name "::"
278// nested_name_specifier identifier "::"
279//
281 // The first token in nested_name_specifier is always an identifier, or
282 // '(anonymous namespace)'.
283 switch (CurToken().GetKind()) {
284 case Token::l_paren: {
285 // Anonymous namespaces need to be treated specially: They are
286 // represented the the string '(anonymous namespace)', which has a
287 // space in it (throwing off normal parsing) and is not actually
288 // proper C++> Check to see if we're looking at
289 // '(anonymous namespace)::...'
290
291 // Look for all the pieces, in order:
292 // l_paren 'anonymous' 'namespace' r_paren coloncolon
293 if (m_dil_lexer.LookAhead(1).Is(Token::identifier) &&
294 (m_dil_lexer.LookAhead(1).GetSpelling() == "anonymous") &&
295 m_dil_lexer.LookAhead(2).Is(Token::identifier) &&
296 (m_dil_lexer.LookAhead(2).GetSpelling() == "namespace") &&
297 m_dil_lexer.LookAhead(3).Is(Token::r_paren) &&
298 m_dil_lexer.LookAhead(4).Is(Token::coloncolon)) {
299 m_dil_lexer.Advance(4);
300
302 m_dil_lexer.Advance();
303 if (!CurToken().Is(Token::identifier) && !CurToken().Is(Token::l_paren)) {
304 BailOut("Expected an identifier or anonymous namespace, but not found.",
305 CurToken().GetLocation(), CurToken().GetSpelling().length());
306 }
307 // Continue parsing the nested_namespace_specifier.
308 std::string identifier2 = ParseNestedNameSpecifier();
309
310 return "(anonymous namespace)::" + identifier2;
311 }
312
313 return "";
314 } // end of special handling for '(anonymous namespace)'
315 case Token::identifier: {
316 // If the next token is scope ("::"), then this is indeed a
317 // nested_name_specifier
318 if (m_dil_lexer.LookAhead(1).Is(Token::coloncolon)) {
319 // This nested_name_specifier is a single identifier.
320 std::string identifier = CurToken().GetSpelling();
321 m_dil_lexer.Advance(1);
323 m_dil_lexer.Advance();
324 // Continue parsing the nested_name_specifier.
325 return identifier + "::" + ParseNestedNameSpecifier();
326 }
327
328 return "";
329 }
330 default:
331 return "";
332 }
333}
334
335// Parse a type_id.
336//
337// type_id:
338// type_specifier_seq [abstract_declarator]
339//
340// type_specifier_seq:
341// type_specifier [type_specifier]
342//
343// type_specifier:
344// ["::"] [nested_name_specifier] type_name // not handled for now!
345// builtin_typename
346//
347std::optional<CompilerType> DILParser::ParseTypeId() {
348 CompilerType type;
349 // For now only allow builtin types -- will expand add to this later.
350 auto maybe_builtin_type = ParseBuiltinType();
351 if (maybe_builtin_type) {
352 type = *maybe_builtin_type;
353 } else
354 return {};
355
356 //
357 // abstract_declarator:
358 // ptr_operator [abstract_declarator]
359 //
360 std::vector<Token> ptr_operators;
361 while (CurToken().IsOneOf({Token::star, Token::amp})) {
362 Token tok = CurToken();
363 ptr_operators.push_back(std::move(tok));
364 m_dil_lexer.Advance();
365 }
366 type = ResolveTypeDeclarators(type, ptr_operators);
367
368 return type;
369}
370
371// Parse a built-in type
372//
373// builtin_typename:
374// identifer_seq
375//
376// identifier_seq
377// identifer [identifier_seq]
378//
379// A built-in type can be a single identifier or a space-separated
380// list of identifiers (e.g. "short" or "long long").
381std::optional<CompilerType> DILParser::ParseBuiltinType() {
382 std::string type_name = "";
383 uint32_t save_token_idx = m_dil_lexer.GetCurrentTokenIdx();
384 bool first_word = true;
385 while (CurToken().GetKind() == Token::identifier) {
386 if (CurToken().GetSpelling() == "const" ||
387 CurToken().GetSpelling() == "volatile")
388 continue;
389 if (!first_word)
390 type_name.push_back(' ');
391 else
392 first_word = false;
393 type_name.append(CurToken().GetSpelling());
394 m_dil_lexer.Advance();
395 }
396
397 if (type_name.size() > 0) {
398 lldb::TargetSP target_sp = m_ctx_scope->CalculateTarget();
399 ConstString const_type_name(type_name.c_str());
400 for (auto type_system_sp : target_sp->GetScratchTypeSystems())
401 if (auto compiler_type =
402 type_system_sp->GetBuiltinTypeByName(const_type_name))
403 return compiler_type;
404 }
405
406 TentativeParsingRollback(save_token_idx);
407 return {};
408}
409
410// Parse an id_expression.
411//
412// id_expression:
413// unqualified_id
414// qualified_id
415//
416// qualified_id:
417// ["::"] [nested_name_specifier] unqualified_id
418// ["::"] identifier
419//
420// identifier:
421// ? Token::identifier ?
422//
424 // Try parsing optional global scope operator.
425 bool global_scope = false;
426 if (CurToken().Is(Token::coloncolon)) {
427 global_scope = true;
428 m_dil_lexer.Advance();
429 }
430
431 // Try parsing optional nested_name_specifier.
432 std::string nested_name_specifier = ParseNestedNameSpecifier();
433
434 // If nested_name_specifier is present, then it's qualified_id production.
435 // Follow the first production rule.
436 if (!nested_name_specifier.empty()) {
437 // Parse unqualified_id and construct a fully qualified id expression.
438 auto unqualified_id = ParseUnqualifiedId();
439
440 return llvm::formatv("{0}{1}{2}", global_scope ? "::" : "",
441 nested_name_specifier, unqualified_id);
442 }
443
444 if (!CurToken().Is(Token::identifier))
445 return "";
446
447 // No nested_name_specifier, but with global scope -- this is also a
448 // qualified_id production. Follow the second production rule.
449 if (global_scope) {
451 std::string identifier = CurToken().GetSpelling();
452 m_dil_lexer.Advance();
453 return llvm::formatv("{0}{1}", global_scope ? "::" : "", identifier);
454 }
455
456 // This is unqualified_id production.
457 return ParseUnqualifiedId();
458}
459
460// Parse an unqualified_id.
461//
462// unqualified_id:
463// identifier
464//
465// identifier:
466// ? Token::identifier ?
467//
470 std::string identifier = CurToken().GetSpelling();
471 m_dil_lexer.Advance();
472 return identifier;
473}
474
477 const std::vector<Token> &ptr_operators) {
478 // Resolve pointers/references.
479 for (Token tk : ptr_operators) {
480 uint32_t loc = tk.GetLocation();
481 if (tk.GetKind() == Token::star) {
482 // Pointers to reference types are forbidden.
483 if (type.IsReferenceType()) {
484 BailOut(llvm::formatv("'type name' declared as a pointer to a "
485 "reference of type {0}",
486 type.TypeDescription()),
487 loc, CurToken().GetSpelling().length());
488 return {};
489 }
490 // Get pointer type for the base type: e.g. int* -> int**.
491 type = type.GetPointerType();
492
493 } else if (tk.GetKind() == Token::amp) {
494 // References to references are forbidden.
495 // FIXME: In future we may want to allow rvalue references (i.e. &&).
496 if (type.IsReferenceType()) {
497 BailOut("type name declared as a reference to a reference", loc,
498 CurToken().GetSpelling().length());
499 return {};
500 }
501 // Get reference type for the base type: e.g. int -> int&.
502 type = type.GetLValueReferenceType();
503 }
504 }
505
506 return type;
507}
508
509// Parse an boolean_literal.
510//
511// boolean_literal:
512// "true"
513// "false"
514//
516 ExpectOneOf(std::vector<Token::Kind>{Token::kw_true, Token::kw_false});
517 uint32_t loc = CurToken().GetLocation();
518 bool literal_value = CurToken().Is(Token::kw_true);
519 m_dil_lexer.Advance();
520 return std::make_unique<BooleanLiteralNode>(loc, literal_value);
521}
522
523void DILParser::BailOut(const std::string &error, uint32_t loc,
524 uint16_t err_len) {
525 if (m_error)
526 // If error is already set, then the parser is in the "bail-out" mode. Don't
527 // do anything and keep the original error.
528 return;
529
530 m_error =
531 llvm::make_error<DILDiagnosticError>(m_input_expr, error, loc, err_len);
532 // Advance the lexer token index to the end of the lexed tokens vector.
533 m_dil_lexer.ResetTokenIdx(m_dil_lexer.NumLexedTokens() - 1);
534}
535
536// FIXME: Remove this once subscript operator uses ScalarLiteralNode.
537// Parse a integer_literal.
538//
539// integer_literal:
540// ? Integer constant ?
541//
542std::optional<int64_t> DILParser::ParseIntegerConstant() {
543 std::string number_spelling;
544 if (CurToken().GetKind() == Token::minus) {
545 // StringRef::getAsInteger<>() can parse negative numbers.
546 // FIXME: Remove this once unary minus operator is added.
547 number_spelling = "-";
548 m_dil_lexer.Advance();
549 }
550 number_spelling.append(CurToken().GetSpelling());
551 llvm::StringRef spelling_ref = number_spelling;
552 int64_t raw_value;
553 if (!spelling_ref.getAsInteger<int64_t>(0, raw_value)) {
554 m_dil_lexer.Advance();
555 return raw_value;
556 }
557
558 return std::nullopt;
559}
560
561// Parse a numeric_literal.
562//
563// numeric_literal:
564// ? Token::integer_constant ?
565// ? Token::floating_constant ?
566//
568 ASTNodeUP numeric_constant;
570 numeric_constant = ParseIntegerLiteral();
571 else
572 numeric_constant = ParseFloatingPointLiteral();
573 if (!numeric_constant) {
574 BailOut(llvm::formatv("Failed to parse token as numeric-constant: {0}",
575 CurToken()),
576 CurToken().GetLocation(), CurToken().GetSpelling().length());
577 return std::make_unique<ErrorNode>();
578 }
579 m_dil_lexer.Advance();
580 return numeric_constant;
581}
582
584 Token token = CurToken();
585 auto spelling = token.GetSpelling();
586 llvm::StringRef spelling_ref = spelling;
587
588 auto radix = llvm::getAutoSenseRadix(spelling_ref);
590 bool is_unsigned = false;
591 if (spelling_ref.consume_back_insensitive("u"))
592 is_unsigned = true;
593 if (spelling_ref.consume_back_insensitive("ll"))
595 else if (spelling_ref.consume_back_insensitive("l"))
597 // Suffix 'u' can be only specified only once, before or after 'l'
598 if (!is_unsigned && spelling_ref.consume_back_insensitive("u"))
599 is_unsigned = true;
600
601 llvm::APInt raw_value;
602 if (!spelling_ref.getAsInteger(radix, raw_value))
603 return std::make_unique<IntegerLiteralNode>(token.GetLocation(), raw_value,
604 radix, is_unsigned, type);
605 return nullptr;
606}
607
609 Token token = CurToken();
610 auto spelling = token.GetSpelling();
611 llvm::StringRef spelling_ref = spelling;
612
613 llvm::APFloat raw_float(llvm::APFloat::IEEEdouble());
614 if (spelling_ref.consume_back_insensitive("f"))
615 raw_float = llvm::APFloat(llvm::APFloat::IEEEsingle());
616
617 auto StatusOrErr = raw_float.convertFromString(
618 spelling_ref, llvm::APFloat::rmNearestTiesToEven);
619 if (!errorToBool(StatusOrErr.takeError()))
620 return std::make_unique<FloatLiteralNode>(token.GetLocation(), raw_float);
621 return nullptr;
622}
623
625 if (CurToken().IsNot(kind)) {
626 BailOut(llvm::formatv("expected {0}, got: {1}", kind, CurToken()),
627 CurToken().GetLocation(), CurToken().GetSpelling().length());
628 }
629}
630
631void DILParser::ExpectOneOf(std::vector<Token::Kind> kinds_vec) {
632 if (!CurToken().IsOneOf(kinds_vec)) {
633 BailOut(llvm::formatv("expected any of ({0}), got: {1}",
634 llvm::iterator_range(kinds_vec), CurToken()),
635 CurToken().GetLocation(), CurToken().GetSpelling().length());
636 }
637}
638
639} // namespace lldb_private::dil
static llvm::raw_ostream & error(Stream &strm)
uint32_t GetKind(uint32_t data)
Return the type kind encoded in the given data.
Generic representation of a type in a programming language.
CompilerType GetPointerType() const
Return a new CompilerType that is a pointer to this type.
CompilerType GetLValueReferenceType() const
Return a new CompilerType that is a L value reference to this type if this type is valid and the type...
bool IsReferenceType(CompilerType *pointee_type=nullptr, bool *is_rvalue=nullptr) const
A uniqued constant string class.
Definition ConstString.h:40
A file utility class.
Definition FileSpec.h:57
DILDiagnosticError(DiagnosticDetail detail)
Definition DILParser.h:41
std::string message() const override
Definition DILParser.h:56
Class for doing the simple lexing required by DIL.
Definition DILLexer.h:72
void Expect(Token::Kind kind)
static llvm::Expected< ASTNodeUP > Parse(llvm::StringRef dil_input_expr, DILLexer lexer, std::shared_ptr< StackFrame > frame_sp, lldb::DynamicValueType use_dynamic, bool use_synthetic, bool fragile_ivar, bool check_ptr_vs_member)
Definition DILParser.cpp:48
std::optional< CompilerType > ParseTypeId()
void TentativeParsingRollback(uint32_t saved_idx)
Definition DILParser.h:116
ASTNodeUP ParseFloatingPointLiteral()
DILParser(llvm::StringRef dil_input_expr, DILLexer lexer, std::shared_ptr< StackFrame > frame_sp, lldb::DynamicValueType use_dynamic, bool use_synthetic, bool fragile_ivar, bool check_ptr_vs_member, llvm::Error &error)
Definition DILParser.cpp:64
void ExpectOneOf(std::vector< Token::Kind > kinds_vec)
std::optional< CompilerType > ParseBuiltinType()
std::shared_ptr< StackFrame > m_ctx_scope
Definition DILParser.h:127
void BailOut(const std::string &error, uint32_t loc, uint16_t err_len)
CompilerType ResolveTypeDeclarators(CompilerType type, const std::vector< Token > &ptr_operators)
std::optional< int64_t > ParseIntegerConstant()
lldb::DynamicValueType m_use_dynamic
Definition DILParser.h:136
llvm::StringRef m_input_expr
Definition DILParser.h:129
std::string ParseNestedNameSpecifier()
Class defining the tokens generated by the DIL lexer and used by the DIL parser.
Definition DILLexer.h:24
bool Is(Kind kind) const
Definition DILLexer.h:53
uint32_t GetLocation() const
Definition DILLexer.h:61
Kind GetKind() const
Definition DILLexer.h:49
std::string GetSpelling() const
Definition DILLexer.h:51
@ eNone
Type promotion casting.
Definition DILAST.h:46
std::unique_ptr< ASTNode > ASTNodeUP
Definition DILAST.h:79
std::shared_ptr< lldb_private::Target > TargetSP
A source location consisting of a file name and position.