LLDB mainline
DILLexer.cpp
Go to the documentation of this file.
1//===-- DILLexer.cpp ------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7// This implements the recursive descent parser for the Data Inspection
8// Language (DIL), and its helper functions, which will eventually underlie the
9// 'frame variable' command. The language that this parser recognizes is
10// described in lldb/docs/dil-expr-lang.ebnf
11//
12//===----------------------------------------------------------------------===//
13
15#include "lldb/Utility/Status.h"
17#include "llvm/ADT/StringSwitch.h"
18
19namespace lldb_private::dil {
20
21llvm::StringRef Token::GetTokenName(Kind kind) {
22 switch (kind) {
23 case Kind::amp:
24 return "amp";
25 case Kind::arrow:
26 return "arrow";
27 case Kind::colon:
28 return "colon";
30 return "coloncolon";
31 case Kind::eof:
32 return "eof";
34 return "float_constant";
36 return "identifier";
38 return "integer_constant";
39 case Kind::kw_false:
40 return "false";
41 case Kind::kw_true:
42 return "true";
43 case Kind::l_paren:
44 return "l_paren";
45 case Kind::l_square:
46 return "l_square";
47 case Kind::minus:
48 return "minus";
49 case Kind::period:
50 return "period";
51 case Kind::plus:
52 return "plus";
53 case Kind::r_paren:
54 return "r_paren";
55 case Kind::r_square:
56 return "r_square";
57 case Token::star:
58 return "star";
59 }
60 llvm_unreachable("Unknown token name");
61}
62
63static bool IsLetter(char c) {
64 return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
65}
66
67static bool IsDigit(char c) { return '0' <= c && c <= '9'; }
68
69// A word starts with a letter, underscore, or dollar sign, followed by
70// letters ('a'..'z','A'..'Z'), digits ('0'..'9'), and/or underscores.
71static std::optional<llvm::StringRef> IsWord(llvm::StringRef expr,
72 llvm::StringRef &remainder) {
73 // Find the longest prefix consisting of letters, digits, underscors and
74 // '$'. If it doesn't start with a digit, then it's a word.
75 llvm::StringRef candidate = remainder.take_while(
76 [](char c) { return IsDigit(c) || IsLetter(c) || c == '_' || c == '$'; });
77 if (candidate.empty() || IsDigit(candidate[0]))
78 return std::nullopt;
79 remainder = remainder.drop_front(candidate.size());
80 return candidate;
81}
82
83static bool IsNumberBodyChar(char ch) {
84 return IsDigit(ch) || IsLetter(ch) || ch == '.';
85}
86
87static std::optional<llvm::StringRef> IsNumber(llvm::StringRef &remainder,
88 bool &isFloat) {
89 llvm::StringRef tail = remainder;
90 llvm::StringRef body = tail.take_while(IsNumberBodyChar);
91 size_t dots = body.count('.');
92 if (dots > 1 || dots == body.size())
93 return std::nullopt;
94 if (IsDigit(body.front()) || (body[0] == '.' && IsDigit(body[1]))) {
95 isFloat = dots == 1;
96 tail = tail.drop_front(body.size());
97 bool isHex = body.contains_insensitive('x');
98 bool hasExp = !isHex && body.contains_insensitive('e');
99 bool hasHexExp = isHex && body.contains_insensitive('p');
100 if (hasExp || hasHexExp) {
101 isFloat = true; // This marks numbers like 0x1p1 and 1e1 as float
102 if (body.ends_with_insensitive("e") || body.ends_with_insensitive("p"))
103 if (tail.consume_front("+") || tail.consume_front("-"))
104 tail = tail.drop_while(IsNumberBodyChar);
105 }
106 size_t number_length = remainder.size() - tail.size();
107 llvm::StringRef number = remainder.take_front(number_length);
108 remainder = remainder.drop_front(number_length);
109 return number;
110 }
111 return std::nullopt;
112}
113
114llvm::Expected<DILLexer> DILLexer::Create(llvm::StringRef expr) {
115 std::vector<Token> tokens;
116 llvm::StringRef remainder = expr;
117 do {
118 if (llvm::Expected<Token> t = Lex(expr, remainder)) {
119 tokens.push_back(std::move(*t));
120 } else {
121 return t.takeError();
122 }
123 } while (tokens.back().GetKind() != Token::eof);
124 return DILLexer(expr, std::move(tokens));
125}
126
127llvm::Expected<Token> DILLexer::Lex(llvm::StringRef expr,
128 llvm::StringRef &remainder) {
129 // Skip over whitespace (spaces).
130 remainder = remainder.ltrim();
131 llvm::StringRef::iterator cur_pos = remainder.begin();
132
133 // Check to see if we've reached the end of our input string.
134 if (remainder.empty())
135 return Token(Token::eof, "", (uint32_t)expr.size());
136
137 uint32_t position = cur_pos - expr.begin();
138 bool isFloat = false;
139 std::optional<llvm::StringRef> maybe_number = IsNumber(remainder, isFloat);
140 if (maybe_number) {
141 auto kind = isFloat ? Token::float_constant : Token::integer_constant;
142 return Token(kind, maybe_number->str(), position);
143 }
144 std::optional<llvm::StringRef> maybe_word = IsWord(expr, remainder);
145 if (maybe_word) {
146 llvm::StringRef word = *maybe_word;
147 Token::Kind kind = llvm::StringSwitch<Token::Kind>(word)
148 .Case("false", Token::kw_false)
149 .Case("true", Token::kw_true)
150 .Default(Token::identifier);
151 return Token(kind, word.str(), position);
152 }
153
154 constexpr std::pair<Token::Kind, const char *> operators[] = {
155 {Token::amp, "&"}, {Token::arrow, "->"}, {Token::coloncolon, "::"},
156 {Token::colon, ":"}, {Token::l_paren, "("}, {Token::l_square, "["},
157 {Token::minus, "-"}, {Token::period, "."}, {Token::plus, "+"},
158 {Token::r_paren, ")"}, {Token::r_square, "]"}, {Token::star, "*"},
159 };
160 for (auto [kind, str] : operators) {
161 if (remainder.consume_front(str))
162 return Token(kind, str, position);
163 }
164
165 // Unrecognized character(s) in string; unable to lex it.
166 return llvm::make_error<DILDiagnosticError>(expr, "unrecognized token",
167 position);
168}
169
170} // namespace lldb_private::dil
DILLexer(llvm::StringRef dil_expr, std::vector< Token > lexed_tokens)
Definition DILLexer.h:115
static llvm::Expected< DILLexer > Create(llvm::StringRef expr)
Lexes all the tokens in expr and calls the private constructor with the lexed tokens.
Definition DILLexer.cpp:114
static llvm::Expected< Token > Lex(llvm::StringRef expr, llvm::StringRef &remainder)
Definition DILLexer.cpp:127
Class defining the tokens generated by the DIL lexer and used by the DIL parser.
Definition DILLexer.h:24
static llvm::StringRef GetTokenName(Kind kind)
Definition DILLexer.cpp:21
static std::optional< llvm::StringRef > IsWord(llvm::StringRef expr, llvm::StringRef &remainder)
Definition DILLexer.cpp:71
static bool IsNumberBodyChar(char ch)
Definition DILLexer.cpp:83
static bool IsLetter(char c)
Definition DILLexer.cpp:63
static std::optional< llvm::StringRef > IsNumber(llvm::StringRef &remainder, bool &isFloat)
Definition DILLexer.cpp:87
static bool IsDigit(char c)
Definition DILLexer.cpp:67