LLDB mainline
ClangHighlighter.cpp
Go to the documentation of this file.
1//===-- ClangHighlighter.cpp ----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ClangHighlighter.h"
10
16
17#include "clang/Basic/FileManager.h"
18#include "clang/Basic/SourceManager.h"
19#include "clang/Lex/Lexer.h"
20#include "llvm/ADT/StringSet.h"
21#include "llvm/Support/MemoryBuffer.h"
22#include <optional>
23
24using namespace lldb_private;
25
27
28bool ClangHighlighter::isKeyword(llvm::StringRef token) const {
29 return keywords.contains(token);
30}
31
33#define KEYWORD(X, N) keywords.insert(#X);
34#include "clang/Basic/TokenKinds.def"
35}
36
37/// Determines which style should be applied to the given token.
38/// \param highlighter
39/// The current highlighter that should use the style.
40/// \param token
41/// The current token.
42/// \param tok_str
43/// The string in the source code the token represents.
44/// \param options
45/// The style we use for coloring the source code.
46/// \param in_pp_directive
47/// If we are currently in a preprocessor directive. NOTE: This is
48/// passed by reference and will be updated if the current token starts
49/// or ends a preprocessor directive.
50/// \return
51/// The ColorStyle that should be applied to the token.
54 const clang::Token &token, llvm::StringRef tok_str,
55 const HighlightStyle &options, bool &in_pp_directive) {
56 using namespace clang;
57
58 if (token.is(tok::comment)) {
59 // If we were in a preprocessor directive before, we now left it.
60 in_pp_directive = false;
61 return options.comment;
62 } else if (in_pp_directive || token.getKind() == tok::hash) {
63 // Let's assume that the rest of the line is a PP directive.
64 in_pp_directive = true;
65 // Preprocessor directives are hard to match, so we have to hack this in.
66 return options.pp_directive;
67 } else if (tok::isStringLiteral(token.getKind()))
68 return options.string_literal;
69 else if (tok::isLiteral(token.getKind()))
70 return options.scalar_literal;
71 else if (highlighter.isKeyword(tok_str))
72 return options.keyword;
73 else
74 switch (token.getKind()) {
75 case tok::raw_identifier:
76 case tok::identifier:
77 return options.identifier;
78 case tok::l_brace:
79 case tok::r_brace:
80 return options.braces;
81 case tok::l_square:
82 case tok::r_square:
83 return options.square_brackets;
84 case tok::l_paren:
85 case tok::r_paren:
86 return options.parentheses;
87 case tok::comma:
88 return options.comma;
89 case tok::coloncolon:
90 case tok::colon:
91 return options.colon;
92
93 case tok::amp:
94 case tok::ampamp:
95 case tok::ampequal:
96 case tok::star:
97 case tok::starequal:
98 case tok::plus:
99 case tok::plusplus:
100 case tok::plusequal:
101 case tok::minus:
102 case tok::arrow:
103 case tok::minusminus:
104 case tok::minusequal:
105 case tok::tilde:
106 case tok::exclaim:
107 case tok::exclaimequal:
108 case tok::slash:
109 case tok::slashequal:
110 case tok::percent:
111 case tok::percentequal:
112 case tok::less:
113 case tok::lessless:
114 case tok::lessequal:
115 case tok::lesslessequal:
116 case tok::spaceship:
117 case tok::greater:
118 case tok::greatergreater:
119 case tok::greaterequal:
120 case tok::greatergreaterequal:
121 case tok::caret:
122 case tok::caretequal:
123 case tok::pipe:
124 case tok::pipepipe:
125 case tok::pipeequal:
126 case tok::question:
127 case tok::equal:
128 case tok::equalequal:
129 return options.operators;
130 default:
131 break;
132 }
134}
135
137 llvm::StringRef line,
138 std::optional<size_t> cursor_pos,
139 llvm::StringRef previous_lines,
140 Stream &result) const {
141 using namespace clang;
142
143 FileSystemOptions file_opts;
144 FileManager file_mgr(file_opts,
145 FileSystem::Instance().GetVirtualFileSystem());
146
147 // The line might end in a backslash which would cause Clang to drop the
148 // backslash and the terminating new line. This makes sense when parsing C++,
149 // but when highlighting we care about preserving the backslash/newline. To
150 // not lose this information we remove the new line here so that Clang knows
151 // this is just a single line we are highlighting. We add back the newline
152 // after tokenizing.
153 llvm::StringRef line_ending = "";
154 // There are a few legal line endings Clang recognizes and we need to
155 // temporarily remove from the string.
156 if (line.consume_back("\r\n"))
157 line_ending = "\r\n";
158 else if (line.consume_back("\n"))
159 line_ending = "\n";
160 else if (line.consume_back("\r"))
161 line_ending = "\r";
162
163 unsigned line_number = previous_lines.count('\n') + 1U;
164
165 // Let's build the actual source code Clang needs and setup some utility
166 // objects.
167 std::string full_source = previous_lines.str() + line.str();
168 DiagnosticOptions diags_opts;
169 DiagnosticsEngine diags(DiagnosticIDs::create(), diags_opts);
170 clang::SourceManager SM(diags, file_mgr);
171 auto buf = llvm::MemoryBuffer::getMemBuffer(full_source);
172
173 FileID FID = SM.createFileID(buf->getMemBufferRef());
174
175 // Let's just enable the latest ObjC and C++ which should get most tokens
176 // right.
177 LangOptions Opts;
178 Opts.ObjC = true;
179 // FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too
180 Opts.CPlusPlus17 = true;
181 Opts.LineComment = true;
182
183 Lexer lex(FID, buf->getMemBufferRef(), SM, Opts);
184 // The lexer should keep whitespace around.
185 lex.SetKeepWhitespaceMode(true);
186
187 // Keeps track if we have entered a PP directive.
188 bool in_pp_directive = false;
189
190 // True once we actually lexed the user provided line.
191 bool found_user_line = false;
192
193 // True if we already highlighted the token under the cursor, false otherwise.
194 bool highlighted_cursor = false;
195 Token token;
196 bool exit = false;
197 while (!exit) {
198 // Returns true if this is the last token we get from the lexer.
199 exit = lex.LexFromRawLexer(token);
200
201 bool invalid = false;
202 unsigned current_line_number =
203 SM.getSpellingLineNumber(token.getLocation(), &invalid);
204 if (current_line_number != line_number)
205 continue;
206 found_user_line = true;
207
208 // We don't need to print any tokens without a spelling line number.
209 if (invalid)
210 continue;
211
212 // Same as above but with the column number.
213 invalid = false;
214 unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid);
215 if (invalid)
216 continue;
217 // Column numbers start at 1, but indexes in our string start at 0.
218 --start;
219
220 // Annotations don't have a length, so let's skip them.
221 if (token.isAnnotation())
222 continue;
223
224 // Extract the token string from our source code.
225 llvm::StringRef tok_str = line.substr(start, token.getLength());
226
227 // If the token is just an empty string, we can skip all the work below.
228 if (tok_str.empty())
229 continue;
230
231 // If the cursor is inside this token, we have to apply the 'selected'
232 // highlight style before applying the actual token color.
233 llvm::StringRef to_print = tok_str;
234 StreamString storage;
235 auto end = start + token.getLength();
236 if (cursor_pos && end > *cursor_pos && !highlighted_cursor) {
237 highlighted_cursor = true;
238 options.selected.Apply(storage, tok_str);
239 to_print = storage.GetString();
240 }
241
242 // See how we are supposed to highlight this token.
244 determineClangStyle(*this, token, tok_str, options, in_pp_directive);
245
246 color.Apply(result, to_print);
247 }
248
249 // Add the line ending we trimmed before tokenizing.
250 result << line_ending;
251
252 // If we went over the whole file but couldn't find our own file, then
253 // somehow our setup was wrong. When we're in release mode we just give the
254 // user the normal line and pretend we don't know how to highlight it. In
255 // debug mode we bail out with an assert as this should never happen.
256 if (!found_user_line) {
257 result << line;
258 assert(false && "We couldn't find the user line in the input file?");
259 }
260}
261
263 if (Language::LanguageIsCFamily(language))
264 return new ClangHighlighter();
265 return nullptr;
266}
267
272
static HighlightStyle::ColorStyle determineClangStyle(const ClangHighlighter &highlighter, const clang::Token &token, llvm::StringRef tok_str, const HighlightStyle &options, bool &in_pp_directive)
Determines which style should be applied to the given token.
#define LLDB_PLUGIN_DEFINE_ADV(ClassName, PluginName)
static llvm::StringRef GetPluginNameStatic()
static Highlighter * CreateInstance(lldb::LanguageType language)
void Highlight(const HighlightStyle &options, llvm::StringRef line, std::optional< size_t > cursor_pos, llvm::StringRef previous_lines, Stream &s) const override
Highlights the given line.
bool isKeyword(llvm::StringRef token) const
Returns true if the given string represents a keywords in any Clang supported language.
static FileSystem & Instance()
A pair of strings that should be placed around a certain token.
Definition Highlighter.h:30
void Apply(Stream &s, llvm::StringRef value) const
Applies this style to the given value.
static bool LanguageIsCFamily(lldb::LanguageType language)
Equivalent to LanguageIsC||LanguageIsObjC||LanguageIsCPlusPlus.
Definition Language.cpp:379
static bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description, ABICreateInstance create_callback)
static bool UnregisterPlugin(ABICreateInstance create_callback)
llvm::StringRef GetString() const
A stream class that can stream formatted output to a file.
Definition Stream.h:28
A class that represents a running process on the host machine.
LanguageType
Programming language type.
Represents style that the highlighter should apply to the given source code.
Definition Highlighter.h:25
ColorStyle square_brackets
Matches '[' or ']'.
Definition Highlighter.h:82
ColorStyle comment
Matches any comments in the language.
Definition Highlighter.h:69
ColorStyle braces
Matches '{' or '}'.
Definition Highlighter.h:80
ColorStyle scalar_literal
Matches scalar value literals like '42' or '0.1'.
Definition Highlighter.h:65
ColorStyle comma
Matches commas: ','.
Definition Highlighter.h:71
ColorStyle pp_directive
Matches directives to a preprocessor (if the language has any).
Definition Highlighter.h:89
ColorStyle operators
Matches operators like '+', '-', '', '&', '='.
Definition Highlighter.h:77
ColorStyle string_literal
Matches any string or character literals in the language: "foo" or 'f'.
Definition Highlighter.h:63
ColorStyle keyword
Matches all reserved keywords in the language.
Definition Highlighter.h:67
ColorStyle parentheses
Matches '(' or ')'.
Definition Highlighter.h:84
ColorStyle selected
The style for the token which is below the cursor of the user.
Definition Highlighter.h:58
ColorStyle identifier
Matches identifiers to variable or functions.
Definition Highlighter.h:61
ColorStyle colon
Matches one colon: ':'.
Definition Highlighter.h:73