LLDB  mainline
ClangHighlighter.cpp
Go to the documentation of this file.
1 //===-- ClangHighlighter.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ClangHighlighter.h"
10 
11 #include "lldb/Host/FileSystem.h"
12 #include "lldb/Target/Language.h"
15 
16 #include "clang/Basic/FileManager.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Lex/Lexer.h"
19 #include "llvm/ADT/StringSet.h"
20 #include "llvm/Support/MemoryBuffer.h"
21 #include <optional>
22 
23 using namespace lldb_private;
24 
25 bool ClangHighlighter::isKeyword(llvm::StringRef token) const {
26  return keywords.find(token) != keywords.end();
27 }
28 
30 #define KEYWORD(X, N) keywords.insert(#X);
31 #include "clang/Basic/TokenKinds.def"
32 }
33 
34 /// Determines which style should be applied to the given token.
35 /// \param highlighter
36 /// The current highlighter that should use the style.
37 /// \param token
38 /// The current token.
39 /// \param tok_str
40 /// The string in the source code the token represents.
41 /// \param options
42 /// The style we use for coloring the source code.
43 /// \param in_pp_directive
44 /// If we are currently in a preprocessor directive. NOTE: This is
45 /// passed by reference and will be updated if the current token starts
46 /// or ends a preprocessor directive.
47 /// \return
48 /// The ColorStyle that should be applied to the token.
51  const clang::Token &token, llvm::StringRef tok_str,
52  const HighlightStyle &options, bool &in_pp_directive) {
53  using namespace clang;
54 
55  if (token.is(tok::comment)) {
56  // If we were in a preprocessor directive before, we now left it.
57  in_pp_directive = false;
58  return options.comment;
59  } else if (in_pp_directive || token.getKind() == tok::hash) {
60  // Let's assume that the rest of the line is a PP directive.
61  in_pp_directive = true;
62  // Preprocessor directives are hard to match, so we have to hack this in.
63  return options.pp_directive;
64  } else if (tok::isStringLiteral(token.getKind()))
65  return options.string_literal;
66  else if (tok::isLiteral(token.getKind()))
67  return options.scalar_literal;
68  else if (highlighter.isKeyword(tok_str))
69  return options.keyword;
70  else
71  switch (token.getKind()) {
72  case tok::raw_identifier:
73  case tok::identifier:
74  return options.identifier;
75  case tok::l_brace:
76  case tok::r_brace:
77  return options.braces;
78  case tok::l_square:
79  case tok::r_square:
80  return options.square_brackets;
81  case tok::l_paren:
82  case tok::r_paren:
83  return options.parentheses;
84  case tok::comma:
85  return options.comma;
86  case tok::coloncolon:
87  case tok::colon:
88  return options.colon;
89 
90  case tok::amp:
91  case tok::ampamp:
92  case tok::ampequal:
93  case tok::star:
94  case tok::starequal:
95  case tok::plus:
96  case tok::plusplus:
97  case tok::plusequal:
98  case tok::minus:
99  case tok::arrow:
100  case tok::minusminus:
101  case tok::minusequal:
102  case tok::tilde:
103  case tok::exclaim:
104  case tok::exclaimequal:
105  case tok::slash:
106  case tok::slashequal:
107  case tok::percent:
108  case tok::percentequal:
109  case tok::less:
110  case tok::lessless:
111  case tok::lessequal:
112  case tok::lesslessequal:
113  case tok::spaceship:
114  case tok::greater:
115  case tok::greatergreater:
116  case tok::greaterequal:
117  case tok::greatergreaterequal:
118  case tok::caret:
119  case tok::caretequal:
120  case tok::pipe:
121  case tok::pipepipe:
122  case tok::pipeequal:
123  case tok::question:
124  case tok::equal:
125  case tok::equalequal:
126  return options.operators;
127  default:
128  break;
129  }
131 }
132 
134  llvm::StringRef line,
135  std::optional<size_t> cursor_pos,
136  llvm::StringRef previous_lines,
137  Stream &result) const {
138  using namespace clang;
139 
140  FileSystemOptions file_opts;
141  FileManager file_mgr(file_opts,
142  FileSystem::Instance().GetVirtualFileSystem());
143 
144  // The line might end in a backslash which would cause Clang to drop the
145  // backslash and the terminating new line. This makes sense when parsing C++,
146  // but when highlighting we care about preserving the backslash/newline. To
147  // not lose this information we remove the new line here so that Clang knows
148  // this is just a single line we are highlighting. We add back the newline
149  // after tokenizing.
150  llvm::StringRef line_ending = "";
151  // There are a few legal line endings Clang recognizes and we need to
152  // temporarily remove from the string.
153  if (line.consume_back("\r\n"))
154  line_ending = "\r\n";
155  else if (line.consume_back("\n"))
156  line_ending = "\n";
157  else if (line.consume_back("\r"))
158  line_ending = "\r";
159 
160  unsigned line_number = previous_lines.count('\n') + 1U;
161 
162  // Let's build the actual source code Clang needs and setup some utility
163  // objects.
164  std::string full_source = previous_lines.str() + line.str();
165  llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs());
166  llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts(
167  new DiagnosticOptions());
168  DiagnosticsEngine diags(diag_ids, diags_opts);
169  clang::SourceManager SM(diags, file_mgr);
170  auto buf = llvm::MemoryBuffer::getMemBuffer(full_source);
171 
172  FileID FID = SM.createFileID(buf->getMemBufferRef());
173 
174  // Let's just enable the latest ObjC and C++ which should get most tokens
175  // right.
176  LangOptions Opts;
177  Opts.ObjC = true;
178  // FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too
179  Opts.CPlusPlus17 = true;
180  Opts.LineComment = true;
181 
182  Lexer lex(FID, buf->getMemBufferRef(), SM, Opts);
183  // The lexer should keep whitespace around.
184  lex.SetKeepWhitespaceMode(true);
185 
186  // Keeps track if we have entered a PP directive.
187  bool in_pp_directive = false;
188 
189  // True once we actually lexed the user provided line.
190  bool found_user_line = false;
191 
192  // True if we already highlighted the token under the cursor, false otherwise.
193  bool highlighted_cursor = false;
194  Token token;
195  bool exit = false;
196  while (!exit) {
197  // Returns true if this is the last token we get from the lexer.
198  exit = lex.LexFromRawLexer(token);
199 
200  bool invalid = false;
201  unsigned current_line_number =
202  SM.getSpellingLineNumber(token.getLocation(), &invalid);
203  if (current_line_number != line_number)
204  continue;
205  found_user_line = true;
206 
207  // We don't need to print any tokens without a spelling line number.
208  if (invalid)
209  continue;
210 
211  // Same as above but with the column number.
212  invalid = false;
213  unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid);
214  if (invalid)
215  continue;
216  // Column numbers start at 1, but indexes in our string start at 0.
217  --start;
218 
219  // Annotations don't have a length, so let's skip them.
220  if (token.isAnnotation())
221  continue;
222 
223  // Extract the token string from our source code.
224  llvm::StringRef tok_str = line.substr(start, token.getLength());
225 
226  // If the token is just an empty string, we can skip all the work below.
227  if (tok_str.empty())
228  continue;
229 
230  // If the cursor is inside this token, we have to apply the 'selected'
231  // highlight style before applying the actual token color.
232  llvm::StringRef to_print = tok_str;
233  StreamString storage;
234  auto end = start + token.getLength();
235  if (cursor_pos && end > *cursor_pos && !highlighted_cursor) {
236  highlighted_cursor = true;
237  options.selected.Apply(storage, tok_str);
238  to_print = storage.GetString();
239  }
240 
241  // See how we are supposed to highlight this token.
243  determineClangStyle(*this, token, tok_str, options, in_pp_directive);
244 
245  color.Apply(result, to_print);
246  }
247 
248  // Add the line ending we trimmed before tokenizing.
249  result << line_ending;
250 
251  // If we went over the whole file but couldn't find our own file, then
252  // somehow our setup was wrong. When we're in release mode we just give the
253  // user the normal line and pretend we don't know how to highlight it. In
254  // debug mode we bail out with an assert as this should never happen.
255  if (!found_user_line) {
256  result << line;
257  assert(false && "We couldn't find the user line in the input file?");
258  }
259 }
FileSystem.h
lldb_private::ClangHighlighter::keywords
llvm::StringSet keywords
Definition: ClangHighlighter.h:21
lldb_private::HighlightStyle::selected
ColorStyle selected
The style for the token which is below the cursor of the user.
Definition: Highlighter.h:53
lldb_private::HighlightStyle::square_brackets
ColorStyle square_brackets
Matches '[' or ']'.
Definition: Highlighter.h:77
determineClangStyle
static HighlightStyle::ColorStyle determineClangStyle(const ClangHighlighter &highlighter, const clang::Token &token, llvm::StringRef tok_str, const HighlightStyle &options, bool &in_pp_directive)
Determines which style should be applied to the given token.
Definition: ClangHighlighter.cpp:50
lldb_private::HighlightStyle::scalar_literal
ColorStyle scalar_literal
Matches scalar value literals like '42' or '0.1'.
Definition: Highlighter.h:60
lldb_private::HighlightStyle::braces
ColorStyle braces
Matches '{' or '}'.
Definition: Highlighter.h:75
lldb_private::Stream
Definition: Stream.h:28
Language.h
lldb_private::StreamString::GetString
llvm::StringRef GetString() const
Definition: StreamString.cpp:51
lldb_private::HighlightStyle::keyword
ColorStyle keyword
Matches all reserved keywords in the language.
Definition: Highlighter.h:62
lldb_private::ClangHighlighter::Highlight
void Highlight(const HighlightStyle &options, llvm::StringRef line, std::optional< size_t > cursor_pos, llvm::StringRef previous_lines, Stream &s) const override
Highlights the given line.
Definition: ClangHighlighter.cpp:133
lldb_private::HighlightStyle::colon
ColorStyle colon
Matches one colon: ':'.
Definition: Highlighter.h:68
lldb_private::HighlightStyle::comment
ColorStyle comment
Matches any comments in the language.
Definition: Highlighter.h:64
StreamString.h
lldb_private::ClangHighlighter::ClangHighlighter
ClangHighlighter()
Definition: ClangHighlighter.cpp:29
lldb_private::ClangHighlighter::isKeyword
bool isKeyword(llvm::StringRef token) const
Returns true if the given string represents a keywords in any Clang supported language.
Definition: ClangHighlighter.cpp:25
lldb_private::StreamString
Definition: StreamString.h:23
lldb_private::HighlightStyle::parentheses
ColorStyle parentheses
Matches '(' or ')'.
Definition: Highlighter.h:79
ClangHighlighter.h
string
string(SUBSTRING ${p} 10 -1 pStripped) if($
Definition: Plugins/CMakeLists.txt:40
lldb_private::HighlightStyle::string_literal
ColorStyle string_literal
Matches any string or character literals in the language: "foo" or 'f'.
Definition: Highlighter.h:58
AnsiTerminal.h
lldb_private::HighlightStyle::identifier
ColorStyle identifier
Matches identifiers to variable or functions.
Definition: Highlighter.h:56
lldb_private::HighlightStyle
Represents style that the highlighter should apply to the given source code.
Definition: Highlighter.h:24
clang
Definition: ASTResultSynthesizer.h:15
lldb_private::FileSystem::Instance
static FileSystem & Instance()
Definition: common/FileSystem.cpp:47
lldb_private
A class that represents a running process on the host machine.
Definition: SBCommandInterpreterRunOptions.h:16
lldb_private::ClangHighlighter
Definition: ClangHighlighter.h:20
lldb_private::HighlightStyle::ColorStyle
A pair of strings that should be placed around a certain token.
Definition: Highlighter.h:29
lldb_private::HighlightStyle::pp_directive
ColorStyle pp_directive
Matches directives to a preprocessor (if the language has any).
Definition: Highlighter.h:84
lldb_private::HighlightStyle::comma
ColorStyle comma
Matches commas: ','.
Definition: Highlighter.h:66
lldb_private::HighlightStyle::ColorStyle::Apply
void Apply(Stream &s, llvm::StringRef value) const
Applies this style to the given value.
Definition: Highlighter.cpp:19
lldb_private::HighlightStyle::operators
ColorStyle operators
Matches operators like '+', '-', '', '&', '='.
Definition: Highlighter.h:72