LLDB mainline
TreeSitterHighlighter.cpp
Go to the documentation of this file.
1//===----------------------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
11#include "lldb/Utility/Log.h"
13#include "llvm/ADT/StringRef.h"
14#include "llvm/ADT/StringSwitch.h"
15
16using namespace lldb_private;
17
19 if (query)
20 ts_query_delete(query);
21 if (parser)
22 ts_parser_delete(parser);
23}
24
25TreeSitterHighlighter::TSState::operator bool() const {
26 return parser && query;
27}
28
30 if (m_ts_state)
31 return *m_ts_state;
32
34
35 m_ts_state.emplace();
36 m_ts_state->parser = ts_parser_new();
37 if (!m_ts_state->parser) {
38 LLDB_LOG(log, "Creating tree-sitter parser failed for {0}", GetName());
39 return *m_ts_state;
40 }
41
42 const TSLanguage *language = GetLanguage();
43 if (!language || !ts_parser_set_language(m_ts_state->parser, language)) {
44 LLDB_LOG(log, "Creating tree-sitter language failed for {0}", GetName());
45 return *m_ts_state;
46 }
47
48 llvm::StringRef query_source = GetHighlightQuery();
49 uint32_t error_offset = 0;
50 TSQueryError error_type = TSQueryErrorNone;
51 m_ts_state->query = ts_query_new(language, query_source.data(),
52 static_cast<uint32_t>(query_source.size()),
53 &error_offset, &error_type);
54 if (!m_ts_state->query || error_type != TSQueryErrorNone) {
55 LLDB_LOG(log,
56 "Creating tree-sitter query failed for {0} with error {1}: {2}",
57 GetName(), error_type, query_source.substr(error_offset, 64));
58 // If we have an error but a valid query, we need to reset the object to
59 // (1) avoid it looking valid and (2) release the parser.
60 m_ts_state.emplace();
61 }
62
63 return *m_ts_state;
64}
65
67TreeSitterHighlighter::GetStyleForCapture(llvm::StringRef capture_name,
68 const HighlightStyle &options) const {
69 return llvm::StringSwitch<HighlightStyle::ColorStyle>(capture_name)
70 .Case("comment", options.comment)
71 .Case("keyword", options.keyword)
72 .Case("operator", options.operators)
73 .Case("type", options.keyword)
74 .Case("punctuation.delimiter.comma", options.comma)
75 .Case("punctuation.delimiter.colon", options.colon)
76 .Case("punctuation.delimiter.semicolon", options.semicolons)
77 .Case("punctuation.bracket.square", options.square_brackets)
78 .Cases({"keyword.directive", "preproc"}, options.pp_directive)
79 .Cases({"string", "string.literal"}, options.string_literal)
80 .Cases({"number", "number.literal", "constant.numeric"},
81 options.scalar_literal)
82 .Cases({"identifier", "variable", "function"}, options.identifier)
83 .Cases({"punctuation.bracket.curly", "punctuation.brace"}, options.braces)
84 .Cases({"punctuation.bracket.round", "punctuation.bracket",
85 "punctuation.paren"},
86 options.parentheses)
87 .Default({});
88}
89
91 const HighlightStyle &options, llvm::StringRef text, uint32_t start_byte,
92 uint32_t end_byte, const HighlightStyle::ColorStyle &style,
93 std::optional<size_t> cursor_pos, bool &highlighted_cursor,
94 Stream &s) const {
95
96 if (start_byte >= end_byte || start_byte >= text.size())
97 return;
98
99 end_byte = std::min(end_byte, static_cast<uint32_t>(text.size()));
100
101 llvm::StringRef range = text.substr(start_byte, end_byte - start_byte);
102
103 auto print = [&](llvm::StringRef str) {
104 if (style)
105 style.Apply(s, str);
106 else
107 s << str;
108 };
109
110 // Check if cursor is within this range.
111 if (cursor_pos && *cursor_pos >= start_byte && *cursor_pos < end_byte &&
112 !highlighted_cursor) {
113 highlighted_cursor = true;
114
115 // Split range around cursor position.
116 const size_t cursor_in_range = *cursor_pos - start_byte;
117
118 // Print everything before the cursor.
119 if (cursor_in_range > 0) {
120 llvm::StringRef before = range.substr(0, cursor_in_range);
121 print(before);
122 }
123
124 // Print the cursor itself.
125 if (cursor_in_range < range.size()) {
126 StreamString cursor_str;
127 llvm::StringRef cursor_char = range.substr(cursor_in_range, 1);
128 if (style)
129 style.Apply(cursor_str, cursor_char);
130 else
131 cursor_str << cursor_char;
132 options.selected.Apply(s, cursor_str.GetString());
133 }
134
135 // Print everything after the cursor.
136 if (cursor_in_range + 1 < range.size()) {
137 llvm::StringRef after = range.substr(cursor_in_range + 1);
138 print(after);
139 }
140 } else {
141 // No cursor in this range, apply style directly.
142 print(range);
143 }
144}
145
147 llvm::StringRef line,
148 std::optional<size_t> cursor_pos,
149 llvm::StringRef previous_lines,
150 Stream &s) const {
151 auto unformatted = [&]() -> void { s << line; };
152
153 TSState &ts_state = GetTSState();
154 if (!ts_state)
155 return unformatted();
156
157 std::string source = previous_lines.str() + line.str();
158 TSTree *tree =
159 ts_parser_parse_string(ts_state.parser, nullptr, source.c_str(),
160 static_cast<uint32_t>(source.size()));
161 if (!tree)
162 return unformatted();
163
164 TSQueryCursor *cursor = ts_query_cursor_new();
165 assert(cursor);
166
167 llvm::scope_exit delete_cusor([&] { ts_query_cursor_delete(cursor); });
168
169 TSNode root_node = ts_tree_root_node(tree);
170 ts_query_cursor_exec(cursor, ts_state.query, root_node);
171
172 // Collect all matches and their byte ranges.
173 std::vector<HLRange> highlights;
174 TSQueryMatch match;
175 uint32_t capture_index;
176 while (ts_query_cursor_next_capture(cursor, &match, &capture_index)) {
177 TSQueryCapture capture = match.captures[capture_index];
178
179 uint32_t capture_name_len = 0;
180 const char *capture_name = ts_query_capture_name_for_id(
181 ts_state.query, capture.index, &capture_name_len);
182
184 llvm::StringRef(capture_name, capture_name_len), options);
185
186 TSNode node = capture.node;
187 uint32_t start = ts_node_start_byte(node);
188 uint32_t end = ts_node_end_byte(node);
189
190 if (style && start < end)
191 highlights.push_back({start, end, style});
192 }
193
194 std::sort(highlights.begin(), highlights.end(),
195 [](const HLRange &a, const HLRange &b) {
196 if (a.start_byte != b.start_byte)
197 return a.start_byte < b.start_byte;
198 // Prefer shorter matches. For example, if we have an expression
199 // consisting of a variable and a property, we want to highlight
200 // them as individual components.
201 return (b.end_byte - b.start_byte) > (a.end_byte - a.start_byte);
202 });
203
204 uint32_t current_pos = 0;
205 bool highlighted_cursor = false;
206
207 for (const auto &h : highlights) {
208 // Skip over highlights that start before our current position, which means
209 // there's overlap.
210 if (h.start_byte < current_pos)
211 continue;
212
213 // Output any unhighlighted text before this highlight.
214 if (current_pos < h.start_byte) {
215 HighlightRange(options, line, current_pos, h.start_byte, {}, cursor_pos,
216 highlighted_cursor, s);
217 current_pos = h.start_byte;
218 }
219
220 // Output the highlighted range.
221 HighlightRange(options, line, h.start_byte, h.end_byte, h.style, cursor_pos,
222 highlighted_cursor, s);
223 current_pos = h.end_byte;
224 }
225
226 // Output any remaining unhighlighted text.
227 if (current_pos < line.size()) {
228 HighlightRange(options, line, current_pos,
229 static_cast<uint32_t>(line.size()), {}, cursor_pos,
230 highlighted_cursor, s);
231 }
232}
#define LLDB_LOG(log,...)
The LLDB_LOG* macros defined below are the way to emit log messages.
Definition Log.h:369
A pair of strings that should be placed around a certain token.
Definition Highlighter.h:30
void Apply(Stream &s, llvm::StringRef value) const
Applies this style to the given value.
virtual llvm::StringRef GetName() const =0
Returns a human readable name for the selected highlighter.
llvm::StringRef GetString() const
A stream class that can stream formatted output to a file.
Definition Stream.h:28
virtual llvm::StringRef GetHighlightQuery() const =0
Returns the tree-sitter highlight query for this language.
virtual const TSLanguage * GetLanguage() const =0
Returns the tree-sitter language for this highlighter.
void Highlight(const HighlightStyle &options, llvm::StringRef line, std::optional< size_t > cursor_pos, llvm::StringRef previous_lines, Stream &s) const override
Highlights a single line of code using tree-sitter parsing.
void HighlightRange(const HighlightStyle &options, llvm::StringRef text, uint32_t start_byte, uint32_t end_byte, const HighlightStyle::ColorStyle &style, std::optional< size_t > cursor_pos, bool &highlighted_cursor, Stream &s) const
Applies syntax highlighting to a range of text.
TSState & GetTSState() const
Lazily creates a tree-sitter state (TSState).
HighlightStyle::ColorStyle GetStyleForCapture(llvm::StringRef capture_name, const HighlightStyle &options) const
Maps a tree-sitter capture name to a HighlightStyle color.
A class that represents a running process on the host machine.
Log * GetLog(Cat mask)
Retrieve the Log object for the channel associated with the given log enum.
Definition Log.h:332
Represents style that the highlighter should apply to the given source code.
Definition Highlighter.h:25
ColorStyle square_brackets
Matches '[' or ']'.
Definition Highlighter.h:82
ColorStyle comment
Matches any comments in the language.
Definition Highlighter.h:69
ColorStyle semicolons
Matches any semicolon: ';'.
Definition Highlighter.h:75
ColorStyle braces
Matches '{' or '}'.
Definition Highlighter.h:80
ColorStyle scalar_literal
Matches scalar value literals like '42' or '0.1'.
Definition Highlighter.h:65
ColorStyle comma
Matches commas: ','.
Definition Highlighter.h:71
ColorStyle pp_directive
Matches directives to a preprocessor (if the language has any).
Definition Highlighter.h:89
ColorStyle operators
Matches operators like '+', '-', '', '&', '='.
Definition Highlighter.h:77
ColorStyle string_literal
Matches any string or character literals in the language: "foo" or 'f'.
Definition Highlighter.h:63
ColorStyle keyword
Matches all reserved keywords in the language.
Definition Highlighter.h:67
ColorStyle parentheses
Matches '(' or ')'.
Definition Highlighter.h:84
ColorStyle selected
The style for the token which is below the cursor of the user.
Definition Highlighter.h:58
ColorStyle identifier
Matches identifiers to variable or functions.
Definition Highlighter.h:61
ColorStyle colon
Matches one colon: ':'.
Definition Highlighter.h:73