LLDB mainline
DILParser.cpp
Go to the documentation of this file.
1//===-- DILParser.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// This implements the recursive descent parser for the Data Inspection
8// Language (DIL), and its helper functions, which will eventually underlie the
9// 'frame variable' command. The language that this parser recognizes is
10// described in lldb/docs/dil-expr-lang.ebnf
11//
12//===----------------------------------------------------------------------===//
13
19#include "llvm/ADT/StringRef.h"
20#include "llvm/Support/FormatAdapters.h"
21#include <cstdlib>
22#include <limits.h>
23#include <memory>
24#include <sstream>
25#include <string>
26
27namespace lldb_private::dil {
28
30 const std::string &message, uint32_t loc,
31 uint16_t err_len)
32 : ErrorInfo(make_error_code(std::errc::invalid_argument)) {
34 FileSpec{}, /*line=*/1, static_cast<uint16_t>(loc + 1),
35 err_len, false, /*in_user_input=*/true};
36 std::string rendered_msg =
37 llvm::formatv("<user expression 0>:1:{0}: {1}\n 1 | {2}\n | ^",
38 loc + 1, message, expr);
39 m_detail.source_location = sloc;
41 m_detail.message = message;
42 m_detail.rendered = std::move(rendered_msg);
43}
44
45llvm::Expected<ASTNodeUP>
46DILParser::Parse(llvm::StringRef dil_input_expr, DILLexer lexer,
47 std::shared_ptr<StackFrame> frame_sp,
48 lldb::DynamicValueType use_dynamic, bool use_synthetic,
49 bool fragile_ivar, bool check_ptr_vs_member) {
50 llvm::Error error = llvm::Error::success();
51 DILParser parser(dil_input_expr, lexer, frame_sp, use_dynamic, use_synthetic,
52 fragile_ivar, check_ptr_vs_member, error);
53
54 ASTNodeUP node_up = parser.Run();
55
56 if (error)
57 return error;
58
59 return node_up;
60}
61
62DILParser::DILParser(llvm::StringRef dil_input_expr, DILLexer lexer,
63 std::shared_ptr<StackFrame> frame_sp,
64 lldb::DynamicValueType use_dynamic, bool use_synthetic,
65 bool fragile_ivar, bool check_ptr_vs_member,
66 llvm::Error &error)
67 : m_ctx_scope(frame_sp), m_input_expr(dil_input_expr),
68 m_dil_lexer(std::move(lexer)), m_error(error), m_use_dynamic(use_dynamic),
69 m_use_synthetic(use_synthetic), m_fragile_ivar(fragile_ivar),
70 m_check_ptr_vs_member(check_ptr_vs_member) {}
71
74
76
77 return expr;
78}
79
80// Parse an expression.
81//
82// expression:
83// unary_expression
84//
86
87// Parse an unary_expression.
88//
89// unary_expression:
90// postfix_expression
91// unary_operator expression
92//
93// unary_operator:
94// "&"
95// "*"
96//
98 if (CurToken().IsOneOf({Token::amp, Token::star})) {
99 Token token = CurToken();
100 uint32_t loc = token.GetLocation();
101 m_dil_lexer.Advance();
102 auto rhs = ParseExpression();
103 switch (token.GetKind()) {
104 case Token::star:
105 return std::make_unique<UnaryOpNode>(loc, UnaryOpKind::Deref,
106 std::move(rhs));
107 case Token::amp:
108 return std::make_unique<UnaryOpNode>(loc, UnaryOpKind::AddrOf,
109 std::move(rhs));
110
111 default:
112 llvm_unreachable("invalid token kind");
113 }
114 }
115 return ParsePostfixExpression();
116}
117
118// Parse a postfix_expression.
119//
120// postfix_expression:
121// primary_expression
122// postfix_expression "[" integer_literal "]"
123// postfix_expression "[" integer_literal "-" integer_literal "]"
124// postfix_expression "." id_expression
125// postfix_expression "->" id_expression
126//
129 while (CurToken().IsOneOf({Token::l_square, Token::period, Token::arrow})) {
130 uint32_t loc = CurToken().GetLocation();
131 Token token = CurToken();
132 switch (token.GetKind()) {
133 case Token::l_square: {
134 m_dil_lexer.Advance();
135 std::optional<int64_t> index = ParseIntegerConstant();
136 if (!index) {
137 BailOut(
138 llvm::formatv("failed to parse integer constant: {0}", CurToken()),
139 CurToken().GetLocation(), CurToken().GetSpelling().length());
140 return std::make_unique<ErrorNode>();
141 }
142 if (CurToken().GetKind() == Token::minus) {
143 m_dil_lexer.Advance();
144 std::optional<int64_t> last_index = ParseIntegerConstant();
145 if (!last_index) {
146 BailOut(llvm::formatv("failed to parse integer constant: {0}",
147 CurToken()),
148 CurToken().GetLocation(), CurToken().GetSpelling().length());
149 return std::make_unique<ErrorNode>();
150 }
151 lhs = std::make_unique<BitFieldExtractionNode>(
152 loc, std::move(lhs), std::move(*index), std::move(*last_index));
153 } else {
154 lhs = std::make_unique<ArraySubscriptNode>(loc, std::move(lhs),
155 std::move(*index));
156 }
158 m_dil_lexer.Advance();
159 break;
160 }
161 case Token::period:
162 case Token::arrow: {
163 m_dil_lexer.Advance();
164 Token member_token = CurToken();
165 std::string member_id = ParseIdExpression();
166 lhs = std::make_unique<MemberOfNode>(
167 member_token.GetLocation(), std::move(lhs),
168 token.GetKind() == Token::arrow, member_id);
169 break;
170 }
171 default:
172 llvm_unreachable("invalid token");
173 }
174 }
175
176 return lhs;
177}
178
179// Parse a primary_expression.
180//
181// primary_expression:
182// numeric_literal
183// boolean_literal
184// id_expression
185// "(" expression ")"
186//
189 return ParseNumericLiteral();
190 if (CurToken().IsOneOf({Token::kw_true, Token::kw_false}))
191 return ParseBooleanLiteral();
192 if (CurToken().IsOneOf(
194 // Save the source location for the diagnostics message.
195 uint32_t loc = CurToken().GetLocation();
196 std::string identifier = ParseIdExpression();
197
198 if (!identifier.empty())
199 return std::make_unique<IdentifierNode>(loc, identifier);
200 }
201
202 if (CurToken().Is(Token::l_paren)) {
203 m_dil_lexer.Advance();
204 auto expr = ParseExpression();
206 m_dil_lexer.Advance();
207 return expr;
208 }
209
210 BailOut(llvm::formatv("Unexpected token: {0}", CurToken()),
211 CurToken().GetLocation(), CurToken().GetSpelling().length());
212 return std::make_unique<ErrorNode>();
213}
214
215// Parse nested_name_specifier.
216//
217// nested_name_specifier:
218// type_name "::"
219// namespace_name "::"
220// nested_name_specifier identifier "::"
221//
223 // The first token in nested_name_specifier is always an identifier, or
224 // '(anonymous namespace)'.
225 switch (CurToken().GetKind()) {
226 case Token::l_paren: {
227 // Anonymous namespaces need to be treated specially: They are
228 // represented the the string '(anonymous namespace)', which has a
229 // space in it (throwing off normal parsing) and is not actually
230 // proper C++> Check to see if we're looking at
231 // '(anonymous namespace)::...'
232
233 // Look for all the pieces, in order:
234 // l_paren 'anonymous' 'namespace' r_paren coloncolon
235 if (m_dil_lexer.LookAhead(1).Is(Token::identifier) &&
236 (m_dil_lexer.LookAhead(1).GetSpelling() == "anonymous") &&
237 m_dil_lexer.LookAhead(2).Is(Token::identifier) &&
238 (m_dil_lexer.LookAhead(2).GetSpelling() == "namespace") &&
239 m_dil_lexer.LookAhead(3).Is(Token::r_paren) &&
240 m_dil_lexer.LookAhead(4).Is(Token::coloncolon)) {
241 m_dil_lexer.Advance(4);
242
244 m_dil_lexer.Advance();
245 if (!CurToken().Is(Token::identifier) && !CurToken().Is(Token::l_paren)) {
246 BailOut("Expected an identifier or anonymous namespace, but not found.",
247 CurToken().GetLocation(), CurToken().GetSpelling().length());
248 }
249 // Continue parsing the nested_namespace_specifier.
250 std::string identifier2 = ParseNestedNameSpecifier();
251
252 return "(anonymous namespace)::" + identifier2;
253 }
254
255 return "";
256 } // end of special handling for '(anonymous namespace)'
257 case Token::identifier: {
258 // If the next token is scope ("::"), then this is indeed a
259 // nested_name_specifier
260 if (m_dil_lexer.LookAhead(1).Is(Token::coloncolon)) {
261 // This nested_name_specifier is a single identifier.
262 std::string identifier = CurToken().GetSpelling();
263 m_dil_lexer.Advance(1);
265 m_dil_lexer.Advance();
266 // Continue parsing the nested_name_specifier.
267 return identifier + "::" + ParseNestedNameSpecifier();
268 }
269
270 return "";
271 }
272 default:
273 return "";
274 }
275}
276
277// Parse an id_expression.
278//
279// id_expression:
280// unqualified_id
281// qualified_id
282//
283// qualified_id:
284// ["::"] [nested_name_specifier] unqualified_id
285// ["::"] identifier
286//
287// identifier:
288// ? Token::identifier ?
289//
291 // Try parsing optional global scope operator.
292 bool global_scope = false;
293 if (CurToken().Is(Token::coloncolon)) {
294 global_scope = true;
295 m_dil_lexer.Advance();
296 }
297
298 // Try parsing optional nested_name_specifier.
299 std::string nested_name_specifier = ParseNestedNameSpecifier();
300
301 // If nested_name_specifier is present, then it's qualified_id production.
302 // Follow the first production rule.
303 if (!nested_name_specifier.empty()) {
304 // Parse unqualified_id and construct a fully qualified id expression.
305 auto unqualified_id = ParseUnqualifiedId();
306
307 return llvm::formatv("{0}{1}{2}", global_scope ? "::" : "",
308 nested_name_specifier, unqualified_id);
309 }
310
311 if (!CurToken().Is(Token::identifier))
312 return "";
313
314 // No nested_name_specifier, but with global scope -- this is also a
315 // qualified_id production. Follow the second production rule.
316 if (global_scope) {
318 std::string identifier = CurToken().GetSpelling();
319 m_dil_lexer.Advance();
320 return llvm::formatv("{0}{1}", global_scope ? "::" : "", identifier);
321 }
322
323 // This is unqualified_id production.
324 return ParseUnqualifiedId();
325}
326
327// Parse an unqualified_id.
328//
329// unqualified_id:
330// identifier
331//
332// identifier:
333// ? Token::identifier ?
334//
337 std::string identifier = CurToken().GetSpelling();
338 m_dil_lexer.Advance();
339 return identifier;
340}
341
342// Parse an boolean_literal.
343//
344// boolean_literal:
345// "true"
346// "false"
347//
349 ExpectOneOf(std::vector<Token::Kind>{Token::kw_true, Token::kw_false});
350 uint32_t loc = CurToken().GetLocation();
351 bool literal_value = CurToken().Is(Token::kw_true);
352 m_dil_lexer.Advance();
353 return std::make_unique<BooleanLiteralNode>(loc, literal_value);
354}
355
356void DILParser::BailOut(const std::string &error, uint32_t loc,
357 uint16_t err_len) {
358 if (m_error)
359 // If error is already set, then the parser is in the "bail-out" mode. Don't
360 // do anything and keep the original error.
361 return;
362
363 m_error =
364 llvm::make_error<DILDiagnosticError>(m_input_expr, error, loc, err_len);
365 // Advance the lexer token index to the end of the lexed tokens vector.
366 m_dil_lexer.ResetTokenIdx(m_dil_lexer.NumLexedTokens() - 1);
367}
368
369// FIXME: Remove this once subscript operator uses ScalarLiteralNode.
370// Parse a integer_literal.
371//
372// integer_literal:
373// ? Integer constant ?
374//
375std::optional<int64_t> DILParser::ParseIntegerConstant() {
376 std::string number_spelling;
377 if (CurToken().GetKind() == Token::minus) {
378 // StringRef::getAsInteger<>() can parse negative numbers.
379 // FIXME: Remove this once unary minus operator is added.
380 number_spelling = "-";
381 m_dil_lexer.Advance();
382 }
383 number_spelling.append(CurToken().GetSpelling());
384 llvm::StringRef spelling_ref = number_spelling;
385 int64_t raw_value;
386 if (!spelling_ref.getAsInteger<int64_t>(0, raw_value)) {
387 m_dil_lexer.Advance();
388 return raw_value;
389 }
390
391 return std::nullopt;
392}
393
394// Parse a numeric_literal.
395//
396// numeric_literal:
397// ? Token::integer_constant ?
398// ? Token::floating_constant ?
399//
401 ASTNodeUP numeric_constant;
403 numeric_constant = ParseIntegerLiteral();
404 else
405 numeric_constant = ParseFloatingPointLiteral();
406 if (!numeric_constant) {
407 BailOut(llvm::formatv("Failed to parse token as numeric-constant: {0}",
408 CurToken()),
409 CurToken().GetLocation(), CurToken().GetSpelling().length());
410 return std::make_unique<ErrorNode>();
411 }
412 m_dil_lexer.Advance();
413 return numeric_constant;
414}
415
417 Token token = CurToken();
418 auto spelling = token.GetSpelling();
419 llvm::StringRef spelling_ref = spelling;
420
421 auto radix = llvm::getAutoSenseRadix(spelling_ref);
423 bool is_unsigned = false;
424 if (spelling_ref.consume_back_insensitive("u"))
425 is_unsigned = true;
426 if (spelling_ref.consume_back_insensitive("ll"))
428 else if (spelling_ref.consume_back_insensitive("l"))
430 // Suffix 'u' can be only specified only once, before or after 'l'
431 if (!is_unsigned && spelling_ref.consume_back_insensitive("u"))
432 is_unsigned = true;
433
434 llvm::APInt raw_value;
435 if (!spelling_ref.getAsInteger(radix, raw_value))
436 return std::make_unique<IntegerLiteralNode>(token.GetLocation(), raw_value,
437 radix, is_unsigned, type);
438 return nullptr;
439}
440
442 Token token = CurToken();
443 auto spelling = token.GetSpelling();
444 llvm::StringRef spelling_ref = spelling;
445
446 llvm::APFloat raw_float(llvm::APFloat::IEEEdouble());
447 if (spelling_ref.consume_back_insensitive("f"))
448 raw_float = llvm::APFloat(llvm::APFloat::IEEEsingle());
449
450 auto StatusOrErr = raw_float.convertFromString(
451 spelling_ref, llvm::APFloat::rmNearestTiesToEven);
452 if (!errorToBool(StatusOrErr.takeError()))
453 return std::make_unique<FloatLiteralNode>(token.GetLocation(), raw_float);
454 return nullptr;
455}
456
458 if (CurToken().IsNot(kind)) {
459 BailOut(llvm::formatv("expected {0}, got: {1}", kind, CurToken()),
460 CurToken().GetLocation(), CurToken().GetSpelling().length());
461 }
462}
463
464void DILParser::ExpectOneOf(std::vector<Token::Kind> kinds_vec) {
465 if (!CurToken().IsOneOf(kinds_vec)) {
466 BailOut(llvm::formatv("expected any of ({0}), got: {1}",
467 llvm::iterator_range(kinds_vec), CurToken()),
468 CurToken().GetLocation(), CurToken().GetSpelling().length());
469 }
470}
471
472} // namespace lldb_private::dil
static llvm::raw_ostream & error(Stream &strm)
uint32_t GetKind(uint32_t data)
Return the type kind encoded in the given data.
A file utility class.
Definition FileSpec.h:57
DILDiagnosticError(DiagnosticDetail detail)
Definition DILParser.h:41
std::string message() const override
Definition DILParser.h:56
Class for doing the simple lexing required by DIL.
Definition DILLexer.h:72
void Expect(Token::Kind kind)
static llvm::Expected< ASTNodeUP > Parse(llvm::StringRef dil_input_expr, DILLexer lexer, std::shared_ptr< StackFrame > frame_sp, lldb::DynamicValueType use_dynamic, bool use_synthetic, bool fragile_ivar, bool check_ptr_vs_member)
Definition DILParser.cpp:46
ASTNodeUP ParseFloatingPointLiteral()
DILParser(llvm::StringRef dil_input_expr, DILLexer lexer, std::shared_ptr< StackFrame > frame_sp, lldb::DynamicValueType use_dynamic, bool use_synthetic, bool fragile_ivar, bool check_ptr_vs_member, llvm::Error &error)
Definition DILParser.cpp:62
void ExpectOneOf(std::vector< Token::Kind > kinds_vec)
std::shared_ptr< StackFrame > m_ctx_scope
Definition DILParser.h:121
void BailOut(const std::string &error, uint32_t loc, uint16_t err_len)
std::optional< int64_t > ParseIntegerConstant()
lldb::DynamicValueType m_use_dynamic
Definition DILParser.h:130
llvm::StringRef m_input_expr
Definition DILParser.h:123
std::string ParseNestedNameSpecifier()
Class defining the tokens generated by the DIL lexer and used by the DIL parser.
Definition DILLexer.h:24
bool Is(Kind kind) const
Definition DILLexer.h:53
uint32_t GetLocation() const
Definition DILLexer.h:61
Kind GetKind() const
Definition DILLexer.h:49
std::string GetSpelling() const
Definition DILLexer.h:51
std::unique_ptr< ASTNode > ASTNodeUP
Definition DILAST.h:68
A source location consisting of a file name and position.