LLDB mainline
DILLexer.cpp
Go to the documentation of this file.
1//===-- DILLexer.cpp ------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// This implements the recursive descent parser for the Data Inspection
8// Language (DIL), and its helper functions, which will eventually underlie the
9// 'frame variable' command. The language that this parser recognizes is
10// described in lldb/docs/dil-expr-lang.ebnf
11//
12//===----------------------------------------------------------------------===//
13
15#include "lldb/Utility/Status.h"
17#include "llvm/ADT/StringSwitch.h"
18
19namespace lldb_private::dil {
20
21llvm::StringRef Token::GetTokenName(Kind kind) {
22 switch (kind) {
23 case Kind::amp:
24 return "amp";
25 case Kind::arrow:
26 return "arrow";
27 case Kind::colon:
28 return "colon";
30 return "coloncolon";
31 case Kind::equal:
32 return "equal";
33 case Kind::eof:
34 return "eof";
36 return "float_constant";
38 return "greatergreater";
40 return "identifier";
42 return "integer_constant";
43 case Kind::kw_false:
44 return "false";
45 case Kind::kw_true:
46 return "true";
47 case Kind::l_paren:
48 return "l_paren";
49 case Kind::l_square:
50 return "l_square";
51 case Kind::lessless:
52 return "lessless";
53 case Kind::minus:
54 return "minus";
56 return "minusequal";
57 case Token::percent:
58 return "percent";
59 case Kind::period:
60 return "period";
61 case Kind::plus:
62 return "plus";
63 case Kind::plusequal:
64 return "plusequal";
65 case Kind::r_paren:
66 return "r_paren";
67 case Kind::r_square:
68 return "r_square";
69 case Token::slash:
70 return "slash";
71 case Token::star:
72 return "star";
73 }
74 llvm_unreachable("Unknown token name");
75}
76
77static bool IsLetter(char c) {
78 return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
79}
80
81static bool IsDigit(char c) { return '0' <= c && c <= '9'; }
82
83// A word starts with a letter, underscore, or dollar sign, followed by
84// letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or underscores.
85static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr,
86 llvm::StringRef &remainder) {
87 // Find the longest prefix consisting of letters, digits, underscors and
88 // '$'. If it doesn't start with a digit, then it's a word.
89 llvm::StringRef candidate = remainder.take_while(
90 [](char c) { return IsDigit(c) || IsLetter(c) || c == '_' || c == '$'; });
91 if (candidate.empty() || IsDigit(candidate[0]))
92 return std::nullopt;
93 remainder = remainder.drop_front(candidate.size());
94 return candidate;
95}
96
97static bool IsNumberBodyChar(char ch) {
98 return IsDigit(ch) || IsLetter(ch) || ch == '.';
99}
100
101static std::optional<llvm::StringRef> IsNumber(llvm::StringRef &remainder,
102 bool &isFloat) {
103 llvm::StringRef tail = remainder;
104 llvm::StringRef body = tail.take_while(IsNumberBodyChar);
105 size_t dots = body.count('.');
106 if (dots > 1 || dots == body.size())
107 return std::nullopt;
108 if (IsDigit(body.front()) || (body[0] == '.' && IsDigit(body[1]))) {
109 isFloat = dots == 1;
110 tail = tail.drop_front(body.size());
111 bool isHex = body.contains_insensitive('x');
112 bool hasExp = !isHex && body.contains_insensitive('e');
113 bool hasHexExp = isHex && body.contains_insensitive('p');
114 if (hasExp || hasHexExp) {
115 isFloat = true; // This marks numbers like 0x1p1 and 1e1 as float
116 if (body.ends_with_insensitive("e") || body.ends_with_insensitive("p"))
117 if (tail.consume_front("+") || tail.consume_front("-"))
118 tail = tail.drop_while(IsNumberBodyChar);
119 }
120 size_t number_length = remainder.size() - tail.size();
121 llvm::StringRef number = remainder.take_front(number_length);
122 remainder = remainder.drop_front(number_length);
123 return number;
124 }
125 return std::nullopt;
126}
127
128static llvm::Error IsNotAllowedByMode(llvm::StringRef expr, Token token,
129 lldb::DILMode mode) {
130 switch (mode) {
132 if (!token.IsOneOf({Token::identifier, Token::period, Token::eof})) {
133 return llvm::make_error<DILDiagnosticError>(
134 expr, llvm::formatv("{0} is not allowed in DIL simple mode", token),
135 token.GetLocation());
136 }
137 break;
139 if (!token.IsOneOf({Token::identifier, Token::integer_constant,
140 Token::period, Token::arrow, Token::star, Token::amp,
141 Token::l_square, Token::r_square, Token::eof})) {
142 return llvm::make_error<DILDiagnosticError>(
143 expr, llvm::formatv("{0} is not allowed in DIL legacy mode", token),
144 token.GetLocation());
145 }
146 break;
148 break;
149 }
150 return llvm::Error::success();
151}
152
153llvm::Expected<DILLexer> DILLexer::Create(llvm::StringRef expr,
154 lldb::DILMode mode) {
155 std::vector<Token> tokens;
156 llvm::StringRef remainder = expr;
157 do {
158 if (llvm::Expected<Token> t = Lex(expr, remainder)) {
159 Token token = *t;
160 if (llvm::Error error = IsNotAllowedByMode(expr, token, mode))
161 return error;
162 tokens.push_back(std::move(token));
163 } else {
164 return t.takeError();
165 }
166 } while (tokens.back().GetKind() != Token::eof);
167 return DILLexer(expr, std::move(tokens));
168}
169
170llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr,
171 llvm::StringRef &remainder) {
172 // Skip over whitespace (spaces).
173 remainder = remainder.ltrim();
174 llvm::StringRef::iterator cur_pos = remainder.begin();
175
176 // Check to see if we've reached the end of our input string.
177 if (remainder.empty())
178 return Token(Token::eof, "", (uint32_t)expr.size());
179
180 uint32_t position = cur_pos - expr.begin();
181 bool isFloat = false;
182 std::optional<llvm::StringRef> maybe_number = IsNumber(remainder, isFloat);
183 if (maybe_number) {
184 auto kind = isFloat ? Token::float_constant : Token::integer_constant;
185 return Token(kind, maybe_number->str(), position);
186 }
187 std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder);
188 if (maybe_word) {
189 llvm::StringRef word = *maybe_word;
190 Token::Kind kind = llvm::StringSwitch<Token::Kind>(word)
191 .Case("false", Token::kw_false)
192 .Case("true", Token::kw_true)
193 .Default(Token::identifier);
194 return Token(kind, word.str(), position);
195 }
196
197 // IMPORTANT: If two or more tokens share the same prefix, the tokens need to
198 // be ordered longest-to-shortest in the list below. E.g. '::' must come
199 // before ':', and '+=' must come before '+'.
200 constexpr std::pair<Token::Kind, const char *> operators[] = {
201 {Token::arrow, "->"},
202 {Token::coloncolon, "::"},
203 {Token::greatergreater, ">>"},
204 {Token::lessless, "<<"},
205 {Token::minusequal, "-="},
206 {Token::plusequal, "+="},
207 {Token::amp, "&"},
208 {Token::colon, ":"},
209 {Token::equal, "="},
210 {Token::l_paren, "("},
211 {Token::l_square, "["},
212 {Token::minus, "-"},
213 {Token::percent, "%"},
214 {Token::period, "."},
215 {Token::plus, "+"},
216 {Token::r_paren, ")"},
217 {Token::r_square, "]"},
218 {Token::slash, "/"},
219 {Token::star, "*"},
220 };
221 for (auto [kind, str] : operators) {
222 if (remainder.consume_front(str))
223 return Token(kind, str, position);
224 }
225
226 // Unrecognized character(s) in string; unable to lex it.
227 return llvm::make_error<DILDiagnosticError>(expr, "unrecognized token",
228 position);
229}
230
231} // namespace lldb_private::dil
static llvm::raw_ostream & error(Stream &strm)
static llvm::Expected< DILLexer > Create(llvm::StringRef expr, lldb::DILMode mode=lldb::eDILModeFull)
Lexes all the tokens in expr and calls the private constructor with the lexed tokens.
Definition DILLexer.cpp:153
DILLexer(llvm::StringRef dil_expr, std::vector< Token > lexed_tokens)
Definition DILLexer.h:124
static llvm::Expected< Token > Lex(llvm::StringRef expr, llvm::StringRef &remainder)
Definition DILLexer.cpp:170
Class defining the tokens generated by the DIL lexer and used by the DIL parser.
Definition DILLexer.h:25
static llvm::StringRef GetTokenName(Kind kind)
Definition DILLexer.cpp:21
uint32_t GetLocation() const
Definition DILLexer.h:70
bool IsOneOf(llvm::ArrayRef< Kind > kinds) const
Definition DILLexer.h:66
static std::optional< llvm::StringRef > IsWord(llvm::StringRef expr, llvm::StringRef &remainder)
Definition DILLexer.cpp:85
static bool IsNumberBodyChar(char ch)
Definition DILLexer.cpp:97
static llvm::Error IsNotAllowedByMode(llvm::StringRef expr, Token token, lldb::DILMode mode)
Definition DILLexer.cpp:128
static bool IsLetter(char c)
Definition DILLexer.cpp:77
static std::optional< llvm::StringRef > IsNumber(llvm::StringRef &remainder, bool &isFloat)
Definition DILLexer.cpp:101
static bool IsDigit(char c)
Definition DILLexer.cpp:81
DILMode
Data Inspection Language (DIL) evaluation modes.
@ eDILModeFull
Allowed: everything supported by DIL.
@ eDILModeLegacy
Allowed: identifiers, integers, operators: '.', '->', '*', '&', '[]'.
@ eDILModeSimple
Allowed: identifiers, operators: '.'.