LLDB  mainline
ClangHighlighter.cpp
Go to the documentation of this file.
1 //===-- ClangHighlighter.cpp ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ClangHighlighter.h"
10 
11 #include "lldb/Host/FileSystem.h"
12 #include "lldb/Target/Language.h"
15 
16 #include "clang/Basic/SourceManager.h"
17 #include "clang/Lex/Lexer.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Support/MemoryBuffer.h"
20 
21 using namespace lldb_private;
22 
23 bool ClangHighlighter::isKeyword(llvm::StringRef token) const {
24  return keywords.find(token) != keywords.end();
25 }
26 
28 #define KEYWORD(X, N) keywords.insert(#X);
29 #include "clang/Basic/TokenKinds.def"
30 }
31 
32 /// Determines which style should be applied to the given token.
33 /// \param highlighter
34 /// The current highlighter that should use the style.
35 /// \param token
36 /// The current token.
37 /// \param tok_str
38 /// The string in the source code the token represents.
39 /// \param options
40 /// The style we use for coloring the source code.
41 /// \param in_pp_directive
42 /// If we are currently in a preprocessor directive. NOTE: This is
43 /// passed by reference and will be updated if the current token starts
44 /// or ends a preprocessor directive.
45 /// \return
46 /// The ColorStyle that should be applied to the token.
49  const clang::Token &token, llvm::StringRef tok_str,
50  const HighlightStyle &options, bool &in_pp_directive) {
51  using namespace clang;
52 
53  if (token.is(tok::comment)) {
54  // If we were in a preprocessor directive before, we now left it.
55  in_pp_directive = false;
56  return options.comment;
57  } else if (in_pp_directive || token.getKind() == tok::hash) {
58  // Let's assume that the rest of the line is a PP directive.
59  in_pp_directive = true;
60  // Preprocessor directives are hard to match, so we have to hack this in.
61  return options.pp_directive;
62  } else if (tok::isStringLiteral(token.getKind()))
63  return options.string_literal;
64  else if (tok::isLiteral(token.getKind()))
65  return options.scalar_literal;
66  else if (highlighter.isKeyword(tok_str))
67  return options.keyword;
68  else
69  switch (token.getKind()) {
70  case tok::raw_identifier:
71  case tok::identifier:
72  return options.identifier;
73  case tok::l_brace:
74  case tok::r_brace:
75  return options.braces;
76  case tok::l_square:
77  case tok::r_square:
78  return options.square_brackets;
79  case tok::l_paren:
80  case tok::r_paren:
81  return options.parentheses;
82  case tok::comma:
83  return options.comma;
84  case tok::coloncolon:
85  case tok::colon:
86  return options.colon;
87 
88  case tok::amp:
89  case tok::ampamp:
90  case tok::ampequal:
91  case tok::star:
92  case tok::starequal:
93  case tok::plus:
94  case tok::plusplus:
95  case tok::plusequal:
96  case tok::minus:
97  case tok::arrow:
98  case tok::minusminus:
99  case tok::minusequal:
100  case tok::tilde:
101  case tok::exclaim:
102  case tok::exclaimequal:
103  case tok::slash:
104  case tok::slashequal:
105  case tok::percent:
106  case tok::percentequal:
107  case tok::less:
108  case tok::lessless:
109  case tok::lessequal:
110  case tok::lesslessequal:
111  case tok::spaceship:
112  case tok::greater:
113  case tok::greatergreater:
114  case tok::greaterequal:
115  case tok::greatergreaterequal:
116  case tok::caret:
117  case tok::caretequal:
118  case tok::pipe:
119  case tok::pipepipe:
120  case tok::pipeequal:
121  case tok::question:
122  case tok::equal:
123  case tok::equalequal:
124  return options.operators;
125  default:
126  break;
127  }
129 }
130 
132  llvm::StringRef line,
133  llvm::Optional<size_t> cursor_pos,
134  llvm::StringRef previous_lines,
135  Stream &result) const {
136  using namespace clang;
137 
138  FileSystemOptions file_opts;
139  FileManager file_mgr(file_opts,
140  FileSystem::Instance().GetVirtualFileSystem());
141 
142  unsigned line_number = previous_lines.count('\n') + 1U;
143 
144  // Let's build the actual source code Clang needs and setup some utility
145  // objects.
146  std::string full_source = previous_lines.str() + line.str();
147  llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs());
148  llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts(
149  new DiagnosticOptions());
150  DiagnosticsEngine diags(diag_ids, diags_opts);
151  clang::SourceManager SM(diags, file_mgr);
152  auto buf = llvm::MemoryBuffer::getMemBuffer(full_source);
153 
154  FileID FID = SM.createFileID(clang::SourceManager::Unowned, buf.get());
155 
156  // Let's just enable the latest ObjC and C++ which should get most tokens
157  // right.
158  LangOptions Opts;
159  Opts.ObjC = true;
160  // FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too
161  Opts.CPlusPlus17 = true;
162  Opts.LineComment = true;
163 
164  Lexer lex(FID, buf.get(), SM, Opts);
165  // The lexer should keep whitespace around.
166  lex.SetKeepWhitespaceMode(true);
167 
168  // Keeps track if we have entered a PP directive.
169  bool in_pp_directive = false;
170 
171  // True once we actually lexed the user provided line.
172  bool found_user_line = false;
173 
174  // True if we already highlighted the token under the cursor, false otherwise.
175  bool highlighted_cursor = false;
176  Token token;
177  bool exit = false;
178  while (!exit) {
179  // Returns true if this is the last token we get from the lexer.
180  exit = lex.LexFromRawLexer(token);
181 
182  bool invalid = false;
183  unsigned current_line_number =
184  SM.getSpellingLineNumber(token.getLocation(), &invalid);
185  if (current_line_number != line_number)
186  continue;
187  found_user_line = true;
188 
189  // We don't need to print any tokens without a spelling line number.
190  if (invalid)
191  continue;
192 
193  // Same as above but with the column number.
194  invalid = false;
195  unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid);
196  if (invalid)
197  continue;
198  // Column numbers start at 1, but indexes in our string start at 0.
199  --start;
200 
201  // Annotations don't have a length, so let's skip them.
202  if (token.isAnnotation())
203  continue;
204 
205  // Extract the token string from our source code.
206  llvm::StringRef tok_str = line.substr(start, token.getLength());
207 
208  // If the token is just an empty string, we can skip all the work below.
209  if (tok_str.empty())
210  continue;
211 
212  // If the cursor is inside this token, we have to apply the 'selected'
213  // highlight style before applying the actual token color.
214  llvm::StringRef to_print = tok_str;
215  StreamString storage;
216  auto end = start + token.getLength();
217  if (cursor_pos && end > *cursor_pos && !highlighted_cursor) {
218  highlighted_cursor = true;
219  options.selected.Apply(storage, tok_str);
220  to_print = storage.GetString();
221  }
222 
223  // See how we are supposed to highlight this token.
225  determineClangStyle(*this, token, tok_str, options, in_pp_directive);
226 
227  color.Apply(result, to_print);
228  }
229 
230  // If we went over the whole file but couldn't find our own file, then
231  // somehow our setup was wrong. When we're in release mode we just give the
232  // user the normal line and pretend we don't know how to highlight it. In
233  // debug mode we bail out with an assert as this should never happen.
234  if (!found_user_line) {
235  result << line;
236  assert(false && "We couldn't find the user line in the input file?");
237  }
238 }
Enumerations for broadcasting.
Definition: SBLaunchInfo.h:14
A stream class that can stream formatted output to a file.
Definition: Stream.h:28
A pair of strings that should be placed around a certain token.
Definition: Highlighter.h:28
ColorStyle string_literal
Matches any string or character literals in the language: "foo" or &#39;f&#39;.
Definition: Highlighter.h:59
ColorStyle parentheses
Matches &#39;(&#39; or &#39;)&#39;.
Definition: Highlighter.h:80
ColorStyle operators
Matches operators like &#39;+&#39;, &#39;-&#39;, &#39;&#39;, &#39;&&#39;, &#39;=&#39;.
Definition: Highlighter.h:73
static FileSystem & Instance()
ColorStyle comma
Matches commas: &#39;,&#39;.
Definition: Highlighter.h:67
bool isKeyword(llvm::StringRef token) const
Returns true if the given string represents a keywords in any Clang supported language.
void Apply(Stream &s, llvm::StringRef value) const
Applies this style to the given value.
Definition: Highlighter.cpp:17
llvm::StringRef GetString() const
ColorStyle square_brackets
Matches &#39;[&#39; or &#39;]&#39;.
Definition: Highlighter.h:78
ColorStyle selected
The style for the token which is below the cursor of the user.
Definition: Highlighter.h:54
ColorStyle keyword
Matches all reserved keywords in the language.
Definition: Highlighter.h:63
Represents style that the highlighter should apply to the given source code.
Definition: Highlighter.h:23
ColorStyle pp_directive
Matches directives to a preprocessor (if the language has any).
Definition: Highlighter.h:85
ColorStyle identifier
Matches identifiers to variable or functions.
Definition: Highlighter.h:57
ColorStyle colon
Matches one colon: &#39;:&#39;.
Definition: Highlighter.h:69
ColorStyle comment
Matches any comments in the language.
Definition: Highlighter.h:65
ColorStyle braces
Matches &#39;{&#39; or &#39;}&#39;.
Definition: Highlighter.h:76
void Highlight(const HighlightStyle &options, llvm::StringRef line, llvm::Optional< size_t > cursor_pos, llvm::StringRef previous_lines, Stream &s) const override
Highlights the given line.
static HighlightStyle::ColorStyle determineClangStyle(const ClangHighlighter &highlighter, const clang::Token &token, llvm::StringRef tok_str, const HighlightStyle &options, bool &in_pp_directive)
Determines which style should be applied to the given token.
ColorStyle scalar_literal
Matches scalar value literals like &#39;42&#39; or &#39;0.1&#39;.
Definition: Highlighter.h:61