LLDB  mainline
StringExtractor.cpp
Go to the documentation of this file.
1 //===-- StringExtractor.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "llvm/ADT/StringExtras.h"
11 
12 #include <tuple>
13 
14 #include <cctype>
15 #include <cstdlib>
16 #include <cstring>
17 
18 static inline int xdigit_to_sint(char ch) {
19  if (ch >= 'a' && ch <= 'f')
20  return 10 + ch - 'a';
21  if (ch >= 'A' && ch <= 'F')
22  return 10 + ch - 'A';
23  if (ch >= '0' && ch <= '9')
24  return ch - '0';
25  return -1;
26 }
27 
28 // StringExtractor constructor
30 
31 StringExtractor::StringExtractor(llvm::StringRef packet_str) : m_packet() {
32  m_packet.assign(packet_str.begin(), packet_str.end());
33 }
34 
35 StringExtractor::StringExtractor(const char *packet_cstr) : m_packet() {
36  if (packet_cstr)
37  m_packet.assign(packet_cstr);
38 }
39 
40 // Destructor
42 
43 char StringExtractor::GetChar(char fail_value) {
44  if (m_index < m_packet.size()) {
45  char ch = m_packet[m_index];
46  ++m_index;
47  return ch;
48  }
50  return fail_value;
51 }
52 
53 // If a pair of valid hex digits exist at the head of the StringExtractor they
54 // are decoded into an unsigned byte and returned by this function
55 //
56 // If there is not a pair of valid hex digits at the head of the
57 // StringExtractor, it is left unchanged and -1 is returned
59  SkipSpaces();
60  if (GetBytesLeft() < 2) {
61  return -1;
62  }
63  const int hi_nibble = xdigit_to_sint(m_packet[m_index]);
64  const int lo_nibble = xdigit_to_sint(m_packet[m_index + 1]);
65  if (hi_nibble == -1 || lo_nibble == -1) {
66  return -1;
67  }
68  m_index += 2;
69  return static_cast<uint8_t>((hi_nibble << 4) + lo_nibble);
70 }
71 
72 // Extract an unsigned character from two hex ASCII chars in the packet string,
73 // or return fail_value on failure
74 uint8_t StringExtractor::GetHexU8(uint8_t fail_value, bool set_eof_on_fail) {
75  // On success, fail_value will be overwritten with the next character in the
76  // stream
77  GetHexU8Ex(fail_value, set_eof_on_fail);
78  return fail_value;
79 }
80 
81 bool StringExtractor::GetHexU8Ex(uint8_t &ch, bool set_eof_on_fail) {
82  int byte = DecodeHexU8();
83  if (byte == -1) {
84  if (set_eof_on_fail || m_index >= m_packet.size())
86  // ch should not be changed in case of failure
87  return false;
88  }
89  ch = static_cast<uint8_t>(byte);
90  return true;
91 }
92 
93 uint32_t StringExtractor::GetU32(uint32_t fail_value, int base) {
94  if (m_index < m_packet.size()) {
95  char *end = nullptr;
96  const char *start = m_packet.c_str();
97  const char *cstr = start + m_index;
98  uint32_t result = static_cast<uint32_t>(::strtoul(cstr, &end, base));
99 
100  if (end && end != cstr) {
101  m_index = end - start;
102  return result;
103  }
104  }
105  return fail_value;
106 }
107 
108 int32_t StringExtractor::GetS32(int32_t fail_value, int base) {
109  if (m_index < m_packet.size()) {
110  char *end = nullptr;
111  const char *start = m_packet.c_str();
112  const char *cstr = start + m_index;
113  int32_t result = static_cast<int32_t>(::strtol(cstr, &end, base));
114 
115  if (end && end != cstr) {
116  m_index = end - start;
117  return result;
118  }
119  }
120  return fail_value;
121 }
122 
123 uint64_t StringExtractor::GetU64(uint64_t fail_value, int base) {
124  if (m_index < m_packet.size()) {
125  char *end = nullptr;
126  const char *start = m_packet.c_str();
127  const char *cstr = start + m_index;
128  uint64_t result = ::strtoull(cstr, &end, base);
129 
130  if (end && end != cstr) {
131  m_index = end - start;
132  return result;
133  }
134  }
135  return fail_value;
136 }
137 
138 int64_t StringExtractor::GetS64(int64_t fail_value, int base) {
139  if (m_index < m_packet.size()) {
140  char *end = nullptr;
141  const char *start = m_packet.c_str();
142  const char *cstr = start + m_index;
143  int64_t result = ::strtoll(cstr, &end, base);
144 
145  if (end && end != cstr) {
146  m_index = end - start;
147  return result;
148  }
149  }
150  return fail_value;
151 }
152 
154  uint32_t fail_value) {
155  uint32_t result = 0;
156  uint32_t nibble_count = 0;
157 
158  SkipSpaces();
159  if (little_endian) {
160  uint32_t shift_amount = 0;
161  while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
162  // Make sure we don't exceed the size of a uint32_t...
163  if (nibble_count >= (sizeof(uint32_t) * 2)) {
165  return fail_value;
166  }
167 
168  uint8_t nibble_lo;
169  uint8_t nibble_hi = xdigit_to_sint(m_packet[m_index]);
170  ++m_index;
171  if (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
172  nibble_lo = xdigit_to_sint(m_packet[m_index]);
173  ++m_index;
174  result |= (static_cast<uint32_t>(nibble_hi) << (shift_amount + 4));
175  result |= (static_cast<uint32_t>(nibble_lo) << shift_amount);
176  nibble_count += 2;
177  shift_amount += 8;
178  } else {
179  result |= (static_cast<uint32_t>(nibble_hi) << shift_amount);
180  nibble_count += 1;
181  shift_amount += 4;
182  }
183  }
184  } else {
185  while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
186  // Make sure we don't exceed the size of a uint32_t...
187  if (nibble_count >= (sizeof(uint32_t) * 2)) {
189  return fail_value;
190  }
191 
192  uint8_t nibble = xdigit_to_sint(m_packet[m_index]);
193  // Big Endian
194  result <<= 4;
195  result |= nibble;
196 
197  ++m_index;
198  ++nibble_count;
199  }
200  }
201  return result;
202 }
203 
204 uint64_t StringExtractor::GetHexMaxU64(bool little_endian,
205  uint64_t fail_value) {
206  uint64_t result = 0;
207  uint32_t nibble_count = 0;
208 
209  SkipSpaces();
210  if (little_endian) {
211  uint32_t shift_amount = 0;
212  while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
213  // Make sure we don't exceed the size of a uint64_t...
214  if (nibble_count >= (sizeof(uint64_t) * 2)) {
216  return fail_value;
217  }
218 
219  uint8_t nibble_lo;
220  uint8_t nibble_hi = xdigit_to_sint(m_packet[m_index]);
221  ++m_index;
222  if (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
223  nibble_lo = xdigit_to_sint(m_packet[m_index]);
224  ++m_index;
225  result |= (static_cast<uint64_t>(nibble_hi) << (shift_amount + 4));
226  result |= (static_cast<uint64_t>(nibble_lo) << shift_amount);
227  nibble_count += 2;
228  shift_amount += 8;
229  } else {
230  result |= (static_cast<uint64_t>(nibble_hi) << shift_amount);
231  nibble_count += 1;
232  shift_amount += 4;
233  }
234  }
235  } else {
236  while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
237  // Make sure we don't exceed the size of a uint64_t...
238  if (nibble_count >= (sizeof(uint64_t) * 2)) {
240  return fail_value;
241  }
242 
243  uint8_t nibble = xdigit_to_sint(m_packet[m_index]);
244  // Big Endian
245  result <<= 4;
246  result |= nibble;
247 
248  ++m_index;
249  ++nibble_count;
250  }
251  }
252  return result;
253 }
254 
255 bool StringExtractor::ConsumeFront(const llvm::StringRef &str) {
256  llvm::StringRef S = GetStringRef();
257  if (!S.startswith(str))
258  return false;
259  else
260  m_index += str.size();
261  return true;
262 }
263 
264 size_t StringExtractor::GetHexBytes(llvm::MutableArrayRef<uint8_t> dest,
265  uint8_t fail_fill_value) {
266  size_t bytes_extracted = 0;
267  while (!dest.empty() && GetBytesLeft() > 0) {
268  dest[0] = GetHexU8(fail_fill_value);
269  if (!IsGood())
270  break;
271  ++bytes_extracted;
272  dest = dest.drop_front();
273  }
274 
275  if (!dest.empty())
276  ::memset(dest.data(), fail_fill_value, dest.size());
277 
278  return bytes_extracted;
279 }
280 
281 // Decodes all valid hex encoded bytes at the head of the StringExtractor,
282 // limited by dst_len.
283 //
284 // Returns the number of bytes successfully decoded
285 size_t StringExtractor::GetHexBytesAvail(llvm::MutableArrayRef<uint8_t> dest) {
286  size_t bytes_extracted = 0;
287  while (!dest.empty()) {
288  int decode = DecodeHexU8();
289  if (decode == -1)
290  break;
291  dest[0] = static_cast<uint8_t>(decode);
292  dest = dest.drop_front();
293  ++bytes_extracted;
294  }
295  return bytes_extracted;
296 }
297 
299  str.clear();
300  str.reserve(GetBytesLeft() / 2);
301  char ch;
302  while ((ch = GetHexU8()) != '\0')
303  str.append(1, ch);
304  return str.size();
305 }
306 
308  uint32_t nibble_length) {
309  str.clear();
310 
311  uint32_t nibble_count = 0;
312  for (const char *pch = Peek();
313  (nibble_count < nibble_length) && (pch != nullptr);
314  str.append(1, GetHexU8(0, false)), pch = Peek(), nibble_count += 2) {
315  }
316 
317  return str.size();
318 }
319 
321  char terminator) {
322  str.clear();
323  char ch;
324  while ((ch = GetHexU8(0, false)) != '\0')
325  str.append(1, ch);
326  if (Peek() && *Peek() == terminator)
327  return str.size();
328 
329  str.clear();
330  return str.size();
331 }
332 
333 bool StringExtractor::GetNameColonValue(llvm::StringRef &name,
334  llvm::StringRef &value) {
335  // Read something in the form of NNNN:VVVV; where NNNN is any character that
336  // is not a colon, followed by a ':' character, then a value (one or more ';'
337  // chars), followed by a ';'
338  if (m_index >= m_packet.size())
339  return fail();
340 
341  llvm::StringRef view(m_packet);
342  if (view.empty())
343  return fail();
344 
345  llvm::StringRef a, b, c, d;
346  view = view.substr(m_index);
347  std::tie(a, b) = view.split(':');
348  if (a.empty() || b.empty())
349  return fail();
350  std::tie(c, d) = b.split(';');
351  if (b == c && d.empty())
352  return fail();
353 
354  name = a;
355  value = c;
356  if (d.empty())
357  m_index = m_packet.size();
358  else {
359  size_t bytes_consumed = d.data() - view.data();
360  m_index += bytes_consumed;
361  }
362  return true;
363 }
364 
366  const size_t n = m_packet.size();
367  while (m_index < n && llvm::isSpace(m_packet[m_index]))
368  ++m_index;
369 }
StringExtractor::GetBytesLeft
size_t GetBytesLeft()
Definition: StringExtractor.h:52
StringExtractor::ConsumeFront
bool ConsumeFront(const llvm::StringRef &str)
Definition: StringExtractor.cpp:255
StringExtractor::GetStringRef
llvm::StringRef GetStringRef() const
Definition: StringExtractor.h:48
StringExtractor::SkipSpaces
void SkipSpaces()
Definition: StringExtractor.cpp:365
StringExtractor::DecodeHexU8
int DecodeHexU8()
Definition: StringExtractor.cpp:58
StringExtractor::GetHexMaxU32
uint32_t GetHexMaxU32(bool little_endian, uint32_t fail_value)
Definition: StringExtractor.cpp:153
StringExtractor::fail
bool fail()
Definition: StringExtractor.h:107
StringExtractor::Peek
const char * Peek()
Definition: StringExtractor.h:100
StringExtractor::StringExtractor
StringExtractor()
Definition: StringExtractor.cpp:29
StringExtractor::GetHexByteStringTerminatedBy
size_t GetHexByteStringTerminatedBy(std::string &str, char terminator)
Definition: StringExtractor.cpp:320
StringExtractor::GetHexByteStringFixedLength
size_t GetHexByteStringFixedLength(std::string &str, uint32_t nibble_length)
Definition: StringExtractor.cpp:307
StringExtractor::GetHexBytesAvail
size_t GetHexBytesAvail(llvm::MutableArrayRef< uint8_t > dest)
Definition: StringExtractor.cpp:285
StringExtractor::GetChar
char GetChar(char fail_value='\0')
Definition: StringExtractor.cpp:43
string
string(SUBSTRING ${p} 10 -1 pStripped) if($
Definition: Plugins/CMakeLists.txt:40
StringExtractor::GetHexU8
uint8_t GetHexU8(uint8_t fail_value=0, bool set_eof_on_fail=true)
Definition: StringExtractor.cpp:74
StringExtractor::GetNameColonValue
bool GetNameColonValue(llvm::StringRef &name, llvm::StringRef &value)
Definition: StringExtractor.cpp:333
StringExtractor::m_packet
std::string m_packet
The string in which to extract data.
Definition: StringExtractor.h:113
StringExtractor::GetHexByteString
size_t GetHexByteString(std::string &str)
Definition: StringExtractor.cpp:298
StringExtractor::GetU64
uint64_t GetU64(uint64_t fail_value, int base=0)
Definition: StringExtractor.cpp:123
StringExtractor::GetHexMaxU64
uint64_t GetHexMaxU64(bool little_endian, uint64_t fail_value)
Definition: StringExtractor.cpp:204
StringExtractor::GetS64
int64_t GetS64(int64_t fail_value, int base=0)
Definition: StringExtractor.cpp:138
StringExtractor::GetHexU8Ex
bool GetHexU8Ex(uint8_t &ch, bool set_eof_on_fail=true)
Definition: StringExtractor.cpp:81
uint32_t
StringExtractor::m_index
uint64_t m_index
When extracting data from a packet, this index will march along as things get extracted.
Definition: StringExtractor.h:118
StringExtractor.h
StringExtractor::GetS32
int32_t GetS32(int32_t fail_value, int base=0)
Definition: StringExtractor.cpp:108
StringExtractor::~StringExtractor
virtual ~StringExtractor()
StringExtractor::GetHexBytes
size_t GetHexBytes(llvm::MutableArrayRef< uint8_t > dest, uint8_t fail_fill_value)
Definition: StringExtractor.cpp:264
xdigit_to_sint
static int xdigit_to_sint(char ch)
Definition: StringExtractor.cpp:18
StringExtractor::GetU32
uint32_t GetU32(uint32_t fail_value, int base=0)
Definition: StringExtractor.cpp:93
UINT64_MAX
#define UINT64_MAX
Definition: lldb-defines.h:23
StringExtractor::IsGood
bool IsGood() const
Definition: StringExtractor.h:35