LLDB mainline
ObjectFileWasm.cpp
Go to the documentation of this file.
1//===-- ObjectFileWasm.cpp ------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ObjectFileWasm.h"
10#include "lldb/Core/Module.h"
13#include "lldb/Core/Section.h"
14#include "lldb/Target/Process.h"
16#include "lldb/Target/Target.h"
19#include "lldb/Utility/Log.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/SmallVector.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/BinaryFormat/Magic.h"
24#include "llvm/BinaryFormat/Wasm.h"
25#include "llvm/Support/CheckedArithmetic.h"
26#include "llvm/Support/Endian.h"
27#include "llvm/Support/Format.h"
28#include <optional>
29
30using namespace lldb;
31using namespace lldb_private;
32using namespace lldb_private::wasm;
33
35
36static const uint32_t kWasmHeaderSize =
37 sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion);
38
39/// Helper to read a 32-bit ULEB using LLDB's DataExtractor.
40static inline llvm::Expected<uint32_t> GetULEB32(DataExtractor &data,
41 lldb::offset_t &offset) {
42 const uint64_t value = data.GetULEB128(&offset);
43 if (value > std::numeric_limits<uint32_t>::max())
44 return llvm::createStringError("ULEB exceeds 32 bits");
45 return value;
46}
47
48/// Helper to read a 32-bit ULEB using LLVM's DataExtractor.
49static inline llvm::Expected<uint32_t>
50GetULEB32(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
51 const uint64_t value = data.getULEB128(c);
52 if (!c)
53 return c.takeError();
54 if (value > std::numeric_limits<uint32_t>::max())
55 return llvm::createStringError("ULEB exceeds 32 bits");
56 return value;
57}
58
59/// Helper to read a Wasm string, whcih is encoded as a vector of UTF-8 codes.
60static inline llvm::Expected<std::string>
61GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
62 llvm::Expected<uint32_t> len = GetULEB32(data, c);
63 if (!len)
64 return len.takeError();
65
66 llvm::SmallVector<uint8_t, 32> str_storage;
67 data.getU8(c, str_storage, *len);
68 if (!c)
69 return c.takeError();
70
71 return std::string(toStringRef(llvm::ArrayRef(str_storage)));
72}
73
74/// An "init expr" refers to a constant expression used to determine the initial
75/// value of certain elements within a module during instantiation. These
76/// expressions are restricted to operations that can be evaluated at module
77/// instantiation time. Currently we only support simple constant opcodes.
79 lldb::offset_t &offset) {
80 lldb::offset_t init_expr_offset = LLDB_INVALID_OFFSET;
81
82 uint8_t opcode = data.GetU8(&offset);
83 switch (opcode) {
84 case llvm::wasm::WASM_OPCODE_I32_CONST:
85 case llvm::wasm::WASM_OPCODE_I64_CONST:
86 init_expr_offset = data.GetSLEB128(&offset);
87 break;
88 case llvm::wasm::WASM_OPCODE_GLOBAL_GET:
89 init_expr_offset = data.GetULEB128(&offset);
90 break;
91 case llvm::wasm::WASM_OPCODE_F32_CONST:
92 case llvm::wasm::WASM_OPCODE_F64_CONST:
93 // Not a meaningful offset.
94 data.GetFloat(&offset);
95 break;
96 case llvm::wasm::WASM_OPCODE_REF_NULL:
97 // Not a meaningful offset.
98 data.GetULEB128(&offset);
99 break;
100 }
101
102 // Make sure the opcodes we read aren't part of an extended init expr.
103 opcode = data.GetU8(&offset);
104 if (opcode == llvm::wasm::WASM_OPCODE_END)
105 return init_expr_offset;
106
107 // Extended init expressions are not supported, but we still have to parse
108 // them to skip over them and read the next segment.
109 do {
110 opcode = data.GetU8(&offset);
111 } while (opcode != llvm::wasm::WASM_OPCODE_END);
112 return LLDB_INVALID_OFFSET;
113}
114
115/// Checks whether the data buffer starts with a valid Wasm module header.
116static bool ValidateModuleHeader(llvm::ArrayRef<uint8_t> data) {
117 if (data.size() < kWasmHeaderSize)
118 return false;
119
120 if (llvm::identify_magic(toStringRef(data)) != llvm::file_magic::wasm_object)
121 return false;
122
123 const uint8_t *Ptr = data.data() + sizeof(llvm::wasm::WasmMagic);
124
125 uint32_t version = llvm::support::endian::read32le(Ptr);
126 return version == llvm::wasm::WasmVersion;
127}
128
130
136
140
142 DataExtractorSP extractor_sp,
143 offset_t data_offset,
144 const FileSpec *file,
145 offset_t file_offset,
146 offset_t length) {
147 Log *log = GetLog(LLDBLog::Object);
148
149 if (!extractor_sp || !extractor_sp->HasData()) {
150 DataBufferSP data_sp = MapFileData(*file, length, file_offset);
151 if (!data_sp) {
152 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s",
153 file->GetPath().c_str());
154 return nullptr;
155 }
156 extractor_sp = std::make_shared<DataExtractor>(data_sp);
157 data_offset = 0;
158 }
159
160 assert(extractor_sp);
161 if (!ValidateModuleHeader(extractor_sp->GetData())) {
162 LLDB_LOGF(log,
163 "Failed to create ObjectFileWasm instance: invalid Wasm header");
164 return nullptr;
165 }
166
167 // Update the data to contain the entire file if it doesn't contain it
168 // already.
169 if (extractor_sp->GetByteSize() < length) {
170 DataBufferSP data_sp = MapFileData(*file, length, file_offset);
171 if (!data_sp) {
172 LLDB_LOGF(log,
173 "Failed to create ObjectFileWasm instance: cannot read file %s",
174 file->GetPath().c_str());
175 return nullptr;
176 }
177 extractor_sp = std::make_shared<DataExtractor>(data_sp);
178 data_offset = 0;
179 }
180
181 std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm(
182 module_sp, extractor_sp, data_offset, file, file_offset, length));
183 ArchSpec spec = objfile_up->GetArchitecture();
184 if (spec && objfile_up->SetModulesArchitecture(spec)) {
185 LLDB_LOGF(log,
186 "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s",
187 static_cast<void *>(objfile_up.get()),
188 static_cast<void *>(objfile_up->GetModule().get()),
189 objfile_up->GetModule()->GetSpecificationDescription().c_str(),
190 file ? file->GetPath().c_str() : "<NULL>");
191 return objfile_up.release();
192 }
193
194 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance");
195 return nullptr;
196}
197
199 WritableDataBufferSP data_sp,
200 const ProcessSP &process_sp,
201 addr_t header_addr) {
202 if (!ValidateModuleHeader(data_sp->GetData()))
203 return nullptr;
204
205 std::unique_ptr<ObjectFileWasm> objfile_up(
206 new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr));
207 ArchSpec spec = objfile_up->GetArchitecture();
208 if (spec && objfile_up->SetModulesArchitecture(spec))
209 return objfile_up.release();
210 return nullptr;
211}
212
214 // Buffer sufficient to read a section header and find the pointer to the next
215 // section.
216 const uint32_t kBufferSize = 1024;
217 DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize);
218
219 llvm::DataExtractor data = section_header_data.GetAsLLVM();
220 llvm::DataExtractor::Cursor c(0);
221
222 // Each section consists of:
223 // - a one-byte section id,
224 // - the u32 size of the contents, in bytes,
225 // - the actual contents.
226 uint8_t section_id = data.getU8(c);
227 uint64_t payload_len = data.getULEB128(c);
228 if (!c)
229 return !llvm::errorToBool(c.takeError());
230
231 if (payload_len > std::numeric_limits<uint32_t>::max())
232 return false;
233
234 if (section_id == llvm::wasm::WASM_SEC_CUSTOM) {
235 // Custom sections have the id 0. Their contents consist of a name
236 // identifying the custom section, followed by an uninterpreted sequence
237 // of bytes.
238 lldb::offset_t prev_offset = c.tell();
239 llvm::Expected<std::string> sect_name = GetWasmString(data, c);
240 if (!sect_name) {
241 LLDB_LOG_ERROR(GetLog(LLDBLog::Object), sect_name.takeError(),
242 "failed to parse section name: {0}");
243 return false;
244 }
245
246 if (payload_len < c.tell() - prev_offset)
247 return false;
248
249 uint32_t section_length = payload_len - (c.tell() - prev_offset);
250 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length,
251 section_id, ConstString(*sect_name)});
252 *offset_ptr += (c.tell() + section_length);
253 } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) {
254 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(),
255 static_cast<uint32_t>(payload_len),
256 section_id, ConstString()});
257 *offset_ptr += (c.tell() + payload_len);
258 } else {
259 // Invalid section id.
260 return false;
261 }
262 return true;
263}
264
267 if (IsInMemory()) {
268 offset += m_memory_addr;
269 }
270
271 while (DecodeNextSection(&offset))
272 ;
273 return true;
274}
275
277 const FileSpec &file, DataExtractorSP &extractor_sp, offset_t data_offset,
278 offset_t file_offset, offset_t length, ModuleSpecList &specs) {
279 if (!ValidateModuleHeader(extractor_sp->GetData())) {
280 return 0;
281 }
282
283 ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm"));
284 specs.Append(spec);
285 return 1;
286}
287
289 DataExtractorSP extractor_sp,
290 offset_t data_offset, const FileSpec *file,
291 offset_t offset, offset_t length)
292 : ObjectFile(module_sp, file, offset, length, extractor_sp, data_offset),
293 m_arch("wasm32-unknown-unknown-wasm") {
294 m_data_nsp->SetAddressByteSize(4);
295}
296
298 lldb::WritableDataBufferSP header_data_sp,
299 const lldb::ProcessSP &process_sp,
300 lldb::addr_t header_addr)
301 : ObjectFile(module_sp, process_sp, header_addr,
302 std::make_shared<DataExtractor>(header_data_sp)),
303 m_arch("wasm32-unknown-unknown-wasm") {}
304
306 // We already parsed the header during initialization.
307 return true;
308}
309
314
315static llvm::Expected<uint32_t> ParseImports(DataExtractor &import_data) {
316 // Currently this function just returns the number of imported functions.
317 // If we want to do anything with global names in the future, we'll also
318 // need to know those.
319 llvm::DataExtractor data = import_data.GetAsLLVM();
320 llvm::DataExtractor::Cursor c(0);
321
322 llvm::Expected<uint32_t> count = GetULEB32(data, c);
323 if (!count)
324 return count.takeError();
325
326 uint32_t function_imports = 0;
327 for (uint32_t i = 0; c && i < *count; ++i) {
328 // We don't need module and field names, so we can just get them as raw
329 // strings and discard.
330 llvm::Expected<std::string> module_name = GetWasmString(data, c);
331 if (!module_name)
332 return llvm::joinErrors(
333 llvm::createStringError("failed to parse module name"),
334 module_name.takeError());
335 llvm::Expected<std::string> field_name = GetWasmString(data, c);
336 if (!field_name)
337 return llvm::joinErrors(
338 llvm::createStringError("failed to parse field name"),
339 field_name.takeError());
340
341 uint8_t kind = data.getU8(c);
342 if (kind == llvm::wasm::WASM_EXTERNAL_FUNCTION)
343 function_imports++;
344
345 // For function imports, this is a type index. For others it's different.
346 // We don't need it, just need to parse it to advance the cursor.
347 data.getULEB128(c);
348 }
349
350 if (!c)
351 return c.takeError();
352
353 return function_imports;
354}
355
356static llvm::Expected<std::vector<WasmFunction>>
358 lldb::offset_t offset = 0;
359
360 llvm::Expected<uint32_t> function_count = GetULEB32(data, offset);
361 if (!function_count)
362 return function_count.takeError();
363
364 std::vector<WasmFunction> functions;
365 functions.reserve(*function_count);
366
367 for (uint32_t i = 0; i < *function_count; ++i) {
368 llvm::Expected<uint32_t> function_size = GetULEB32(data, offset);
369 if (!function_size)
370 return function_size.takeError();
371 // llvm-objdump considers the ULEB with the function size to be part of the
372 // function. We can't do that here because that would break symbolic
373 // breakpoints, as that address is never executed.
374 functions.push_back({offset, *function_size});
375
376 std::optional<lldb::offset_t> next_offset =
377 llvm::checkedAddUnsigned<lldb::offset_t>(offset, *function_size);
378 if (!next_offset)
379 return llvm::createStringError("function offset overflows 64 bits");
380 offset = *next_offset;
381 }
382
383 return functions;
384}
385
401
402static llvm::Expected<std::vector<WasmSegment>> ParseData(DataExtractor &data) {
403 lldb::offset_t offset = 0;
404
405 llvm::Expected<uint32_t> segment_count = GetULEB32(data, offset);
406 if (!segment_count)
407 return segment_count.takeError();
408
409 std::vector<WasmSegment> segments;
410 segments.reserve(*segment_count);
411
412 for (uint32_t i = 0; i < *segment_count; ++i) {
413 llvm::Expected<uint32_t> flags = GetULEB32(data, offset);
414 if (!flags)
415 return flags.takeError();
416
417 WasmSegment segment;
418
419 // Data segments have a mode that identifies them as either passive or
420 // active. An active data segment copies its contents into a memory during
421 // instantiation, as specified by a memory index and a constant expression
422 // defining an offset into that memory.
423 segment.type = (*flags & llvm::wasm::WASM_DATA_SEGMENT_IS_PASSIVE)
426
427 if (*flags & llvm::wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX) {
428 assert(segment.type == WasmSegment::Active);
429 llvm::Expected<uint32_t> memidx = GetULEB32(data, offset);
430 if (!memidx)
431 return memidx.takeError();
432 segment.memory_index = *memidx;
433 }
434
435 if (segment.type == WasmSegment::Active)
436 segment.init_expr_offset = GetWasmOffsetFromInitExpr(data, offset);
437
438 llvm::Expected<uint32_t> segment_size = GetULEB32(data, offset);
439 if (!segment_size)
440 return segment_size.takeError();
441
442 segment.section_offset = offset;
443 segment.size = *segment_size;
444 segments.push_back(segment);
445
446 std::optional<lldb::offset_t> next_offset =
447 llvm::checkedAddUnsigned<lldb::offset_t>(offset, *segment_size);
448 if (!next_offset)
449 return llvm::createStringError("segment offset overflows 64 bits");
450 offset = *next_offset;
451 }
452
453 return segments;
454}
455
456static llvm::Expected<std::vector<Symbol>>
457ParseNames(SectionSP code_section_sp, DataExtractor &name_data,
458 const std::vector<WasmFunction> &functions,
459 std::vector<WasmSegment> &segments,
460 uint32_t num_imported_functions) {
461
462 llvm::DataExtractor data = name_data.GetAsLLVM();
463 llvm::DataExtractor::Cursor c(0);
464 std::vector<Symbol> symbols;
465 while (c && c.tell() < data.size()) {
466 const uint8_t type = data.getU8(c);
467 llvm::Expected<uint32_t> size = GetULEB32(data, c);
468 if (!size)
469 return size.takeError();
470
471 switch (type) {
472 case llvm::wasm::WASM_NAMES_FUNCTION: {
473 const uint64_t count = data.getULEB128(c);
474 if (count > std::numeric_limits<uint32_t>::max())
475 return llvm::createStringError("function count overflows uint32_t");
476
477 for (uint64_t i = 0; c && i < count; ++i) {
478 llvm::Expected<uint32_t> idx = GetULEB32(data, c);
479 if (!idx)
480 return idx.takeError();
481 llvm::Expected<std::string> name = GetWasmString(data, c);
482 if (!name)
483 return name.takeError();
484 if (*idx >= num_imported_functions + functions.size())
485 continue;
486
487 if (*idx < num_imported_functions) {
488 symbols.emplace_back(symbols.size(), *name, lldb::eSymbolTypeCode,
489 /*external=*/true, /*is_debug=*/false,
490 /*is_trampoline=*/false,
491 /*is_artificial=*/false,
492 /*section_sp=*/lldb::SectionSP(),
493 /*value=*/0, /*size=*/0,
494 /*size_is_valid=*/false,
495 /*contains_linker_annotations=*/false,
496 /*flags=*/0);
497 } else {
498 const WasmFunction &func = functions[*idx - num_imported_functions];
499 symbols.emplace_back(symbols.size(), *name, lldb::eSymbolTypeCode,
500 /*external=*/false, /*is_debug=*/false,
501 /*is_trampoline=*/false, /*is_artificial=*/false,
502 code_section_sp, func.section_offset, func.size,
503 /*size_is_valid=*/true,
504 /*contains_linker_annotations=*/false,
505 /*flags=*/0);
506 }
507 }
508 } break;
509 case llvm::wasm::WASM_NAMES_DATA_SEGMENT: {
510 llvm::Expected<uint32_t> count = GetULEB32(data, c);
511 if (!count)
512 return count.takeError();
513 for (uint32_t i = 0; c && i < *count; ++i) {
514 llvm::Expected<uint32_t> idx = GetULEB32(data, c);
515 if (!idx)
516 return idx.takeError();
517 llvm::Expected<std::string> name = GetWasmString(data, c);
518 if (!name)
519 return name.takeError();
520 if (*idx >= segments.size())
521 continue;
522 // Update the segment name.
523 segments[i].name = *name;
524 }
525
526 } break;
527 case llvm::wasm::WASM_NAMES_GLOBAL:
528 case llvm::wasm::WASM_NAMES_LOCAL:
529 default:
530 std::optional<lldb::offset_t> offset =
531 llvm::checkedAddUnsigned<lldb::offset_t>(c.tell(), *size);
532 if (!offset)
533 return llvm::createStringError("offset overflows 64 bits");
534 c.seek(*offset);
535 }
536 }
537
538 if (!c)
539 return c.takeError();
540
541 return symbols;
542}
543
545 for (const Symbol &symbol : m_symbols)
546 symtab.AddSymbol(symbol);
547
548 symtab.Finalize();
549 m_symbols.clear();
550}
551
552static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
553 if (Name == "name")
555 if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_"))
557 return eSectionTypeOther;
558}
559
560std::optional<ObjectFileWasm::section_info>
561ObjectFileWasm::GetSectionInfo(uint32_t section_id) {
562 for (const section_info &sect_info : m_sect_infos) {
563 if (sect_info.id == section_id)
564 return sect_info;
565 }
566 return std::nullopt;
567}
568
569std::optional<ObjectFileWasm::section_info>
570ObjectFileWasm::GetSectionInfo(llvm::StringRef section_name) {
571 for (const section_info &sect_info : m_sect_infos) {
572 if (sect_info.name == section_name)
573 return sect_info;
574 }
575 return std::nullopt;
576}
577
578void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
579 Log *log = GetLog(LLDBLog::Object);
580
581 if (m_sections_up)
582 return;
583
584 m_sections_up = std::make_unique<SectionList>();
585
586 if (m_sect_infos.empty()) {
588 }
589
590 for (const section_info &sect_info : m_sect_infos) {
591 SectionType section_type = eSectionTypeOther;
592 ConstString section_name;
593 offset_t file_offset = sect_info.offset & 0xffffffff;
594 addr_t vm_addr = sect_info.offset;
595 size_t vm_size = sect_info.size;
596
597 if (llvm::wasm::WASM_SEC_CODE == sect_info.id) {
598 section_type = eSectionTypeCode;
599 section_name = ConstString("code");
600
601 // A code address in DWARF for WebAssembly is the offset of an
602 // instruction relative within the Code section of the WebAssembly file.
603 // For this reason Section::GetFileAddress() must return zero for the
604 // Code section.
605 vm_addr = 0;
606 } else {
607 section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
608 if (section_type == eSectionTypeOther)
609 continue;
610 section_name = sect_info.name;
611 if (!IsInMemory()) {
612 vm_size = 0;
613 vm_addr = 0;
614 }
615 }
616
617 SectionSP section_sp = std::make_shared<Section>(
618 GetModule(), // Module to which this section belongs.
619 this, // ObjectFile to which this section belongs and
620 // should read section data from.
621 section_type, // Section ID.
622 section_name, // Section name.
623 section_type, // Section type.
624 vm_addr, // VM address.
625 vm_size, // VM size in bytes of this section.
626 file_offset, // Offset of this section in the file.
627 sect_info.size, // Size of the section as found in the file.
628 0, // Alignment of the section
629 0, // Flags for this section.
630 1); // Number of host bytes per target byte
631 m_sections_up->AddSection(section_sp);
632 unified_section_list.AddSection(section_sp);
633 }
634
635 // The name section contains names and indexes. First parse the data from the
636 // relevant sections so we can access it by its index.
637 std::vector<WasmFunction> functions;
638 std::vector<WasmSegment> segments;
639
640 // Parse the code section.
641 if (std::optional<section_info> info =
642 GetSectionInfo(llvm::wasm::WASM_SEC_CODE)) {
643 DataExtractor code_data = ReadImageData(info->offset, info->size);
644 llvm::Expected<std::vector<WasmFunction>> maybe_functions =
645 ParseFunctions(code_data);
646 if (!maybe_functions) {
647 LLDB_LOG_ERROR(log, maybe_functions.takeError(),
648 "Failed to parse Wasm code section: {0}");
649 } else {
650 functions = *maybe_functions;
651 }
652 }
653
654 // Parse the import section. The number of functions is needed because the
655 // function index space used in the name section includes imports.
656 if (std::optional<section_info> info =
657 GetSectionInfo(llvm::wasm::WASM_SEC_IMPORT)) {
658 DataExtractor import_data = ReadImageData(info->offset, info->size);
659 llvm::Expected<uint32_t> num_imports = ParseImports(import_data);
660 if (!num_imports) {
661 LLDB_LOG_ERROR(log, num_imports.takeError(),
662 "Failed to parse Wasm import section: {0}");
663 } else {
664 m_num_imported_functions = *num_imports;
665 }
666 }
667
668 // Parse the data section.
669 std::optional<section_info> data_info =
670 GetSectionInfo(llvm::wasm::WASM_SEC_DATA);
671 if (data_info) {
672 DataExtractor data_data = ReadImageData(data_info->offset, data_info->size);
673 llvm::Expected<std::vector<WasmSegment>> maybe_segments =
674 ParseData(data_data);
675 if (!maybe_segments) {
676 LLDB_LOG_ERROR(log, maybe_segments.takeError(),
677 "Failed to parse Wasm data section: {0}");
678 } else {
679 segments = *maybe_segments;
680 }
681 }
682
683 if (std::optional<section_info> info = GetSectionInfo("name")) {
684 DataExtractor names_data = ReadImageData(info->offset, info->size);
685 llvm::Expected<std::vector<Symbol>> symbols = ParseNames(
686 m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false),
687 names_data, functions, segments, m_num_imported_functions);
688 if (!symbols) {
689 LLDB_LOG_ERROR(log, symbols.takeError(),
690 "Failed to parse Wasm names: {0}");
691 } else {
692 m_symbols = *symbols;
693 }
694 }
695
696 lldb::user_id_t segment_id = 0;
697 for (const WasmSegment &segment : segments) {
698 if (segment.type == WasmSegment::Active) {
699 // FIXME: Support segments with a memory index.
700 if (segment.memory_index != 0) {
701 LLDB_LOG(log, "Skipping segment {0}: non-zero memory index is "
702 "currently unsupported");
703 continue;
704 }
705
706 if (segment.init_expr_offset == LLDB_INVALID_OFFSET) {
707 LLDB_LOG(log, "Skipping segment {0}: unsupported init expression");
708 continue;
709 }
710 }
711
712 const lldb::addr_t file_vm_addr =
713 segment.type == WasmSegment::Active
714 ? segment.init_expr_offset
715 : data_info->offset + segment.section_offset;
716 const lldb::offset_t file_offset =
717 data_info->GetFileOffset() + segment.GetFileOffset();
718 SectionSP segment_sp = std::make_shared<Section>(
719 GetModule(),
720 /*obj_file=*/this,
721 ++segment_id << 8, // 1-based segment index, shifted by 8 bits to avoid
722 // collision with section IDs.
723 ConstString(segment.name), eSectionTypeData,
724 /*file_vm_addr=*/file_vm_addr,
725 /*vm_size=*/segment.size,
726 /*file_offset=*/file_offset,
727 /*file_size=*/segment.size,
728 /*log2align=*/0, /*flags=*/0);
729 m_sections_up->AddSection(segment_sp);
730 GetModule()->GetSectionList()->AddSection(segment_sp);
731 }
732}
733
735 bool value_is_offset) {
736 /// In WebAssembly, linear memory is disjointed from code space. The VM can
737 /// load multiple instances of a module, which logically share the same code.
738 /// We represent a wasm32 code address with 64-bits, like:
739 /// 63 32 31 0
740 /// +---------------+---------------+
741 /// + module_id | offset |
742 /// +---------------+---------------+
743 /// where the lower 32 bits represent a module offset (relative to the module
744 /// start not to the beginning of the code section) and the higher 32 bits
745 /// uniquely identify the module in the WebAssembly VM.
746 /// In other words, we assume that each WebAssembly module is loaded by the
747 /// engine at a 64-bit address that starts at the boundary of 4GB pages, like
748 /// 0x0000000400000000 for module_id == 4.
749 /// These 64-bit addresses will be used to request code ranges for a specific
750 /// module from the WebAssembly engine.
751
753 m_memory_addr == load_address);
754
755 ModuleSP module_sp = GetModule();
756 if (!module_sp)
757 return false;
758
760
761 size_t num_loaded_sections = 0;
762 SectionList *section_list = GetSectionList();
763 if (!section_list)
764 return false;
765
766 const size_t num_sections = section_list->GetSize();
767 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
768 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx));
769 if (target.SetSectionLoadAddress(
770 section_sp, load_address | section_sp->GetFileOffset())) {
771 ++num_loaded_sections;
772 }
773 }
774
775 return num_loaded_sections > 0;
776}
777
779 DataExtractor data;
780 if (m_file) {
781 if (offset < GetByteSize()) {
782 size = std::min(static_cast<uint64_t>(size), GetByteSize() - offset);
783 auto buffer_sp = MapFileData(m_file, size, offset);
784 return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize());
785 }
786 } else {
787 ProcessSP process_sp(m_process_wp.lock());
788 if (process_sp) {
789 auto data_up = std::make_unique<DataBufferHeap>(size, 0);
790 Status readmem_error;
791 size_t bytes_read = process_sp->ReadMemory(
792 offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error);
793 if (bytes_read > 0) {
794 DataBufferSP buffer_sp(data_up.release());
795 data.SetData(buffer_sp);
796 }
797 } else if (offset < m_data_nsp->GetByteSize()) {
798 size = std::min(static_cast<uint64_t>(size),
799 m_data_nsp->GetByteSize() - offset);
800 return DataExtractor(m_data_nsp->GetDataStart() + offset, size,
802 }
803 }
805 return data;
806}
807
809 static ConstString g_sect_name_external_debug_info("external_debug_info");
810
811 for (const section_info &sect_info : m_sect_infos) {
812 if (g_sect_name_external_debug_info == sect_info.name) {
813 const uint32_t kBufferSize = 1024;
814 DataExtractor section_header_data =
815 ReadImageData(sect_info.offset, kBufferSize);
816
817 llvm::DataExtractor data = section_header_data.GetAsLLVM();
818 llvm::DataExtractor::Cursor c(0);
819 llvm::Expected<std::string> symbols_url = GetWasmString(data, c);
820 if (!symbols_url) {
821 llvm::consumeError(symbols_url.takeError());
822 return std::nullopt;
823 }
824 return FileSpec(*symbols_url);
825 }
826 }
827 return std::nullopt;
828}
829
831 ModuleSP module_sp(GetModule());
832 if (!module_sp)
833 return;
834
835 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
836
837 llvm::raw_ostream &ostream = s->AsRawOstream();
838 ostream << static_cast<void *>(this) << ": ";
839 s->Indent();
840 ostream << "ObjectFileWasm, file = '";
841 m_file.Dump(ostream);
842 ostream << "', arch = ";
843 ostream << GetArchitecture().GetArchitectureName() << "\n";
844
845 SectionList *sections = GetSectionList();
846 if (sections) {
847 sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
848 UINT32_MAX);
849 }
850 ostream << "\n";
851 DumpSectionHeaders(ostream);
852 ostream << "\n";
853}
854
855void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream,
856 const section_info &sh) {
857 ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " "
858 << llvm::format_hex(sh.offset, 10) << " "
859 << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6)
860 << "\n";
861}
862
863void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) {
864 ostream << "Section Headers\n";
865 ostream << "IDX name addr size id\n";
866 ostream << "==== ---------------- ---------- ---------- ------\n";
867
868 uint32_t idx = 0;
869 for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end();
870 ++pos, ++idx) {
871 ostream << "[" << llvm::format_decimal(idx, 2) << "] ";
873 }
874}
#define LLDB_LOG(log,...)
The LLDB_LOG* macros defined below are the way to emit log messages.
Definition Log.h:369
#define LLDB_LOGF(log,...)
Definition Log.h:376
#define LLDB_LOG_ERROR(log, error,...)
Definition Log.h:392
static SectionType GetSectionTypeFromName(llvm::StringRef Name)
static lldb::offset_t GetWasmOffsetFromInitExpr(DataExtractor &data, lldb::offset_t &offset)
An "init expr" refers to a constant expression used to determine the initial value of certain element...
static llvm::Expected< std::string > GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c)
Helper to read a Wasm string, whcih is encoded as a vector of UTF-8 codes.
static llvm::Expected< uint32_t > GetULEB32(DataExtractor &data, lldb::offset_t &offset)
Helper to read a 32-bit ULEB using LLDB's DataExtractor.
static llvm::Expected< uint32_t > ParseImports(DataExtractor &import_data)
static llvm::Expected< std::vector< WasmFunction > > ParseFunctions(DataExtractor &data)
static llvm::Expected< std::vector< Symbol > > ParseNames(SectionSP code_section_sp, DataExtractor &name_data, const std::vector< WasmFunction > &functions, std::vector< WasmSegment > &segments, uint32_t num_imported_functions)
static bool ValidateModuleHeader(llvm::ArrayRef< uint8_t > data)
Checks whether the data buffer starts with a valid Wasm module header.
static const uint32_t kWasmHeaderSize
static llvm::Expected< std::vector< WasmSegment > > ParseData(DataExtractor &data)
#define LLDB_PLUGIN_DEFINE(PluginName)
An architecture specification class.
Definition ArchSpec.h:31
const char * GetArchitectureName() const
Returns a static string representing the current architecture.
Definition ArchSpec.cpp:548
A uniqued constant string class.
Definition ConstString.h:40
llvm::StringRef GetStringRef() const
Get the string value as a llvm::StringRef.
An data extractor class.
uint64_t GetULEB128(lldb::offset_t *offset_ptr) const
Extract a unsigned LEB128 value from *offset_ptr.
float GetFloat(lldb::offset_t *offset_ptr) const
Extract a float from *offset_ptr.
llvm::DataExtractor GetAsLLVM() const
void SetByteOrder(lldb::ByteOrder byte_order)
Set the byte_order value.
lldb::offset_t SetData(const void *bytes, lldb::offset_t length, lldb::ByteOrder byte_order)
Set data with a buffer that is caller owned.
int64_t GetSLEB128(lldb::offset_t *offset_ptr) const
Extract a signed LEB128 value from *offset_ptr.
uint8_t GetU8(lldb::offset_t *offset_ptr) const
Extract a uint8_t value from *offset_ptr.
A file utility class.
Definition FileSpec.h:57
size_t GetPath(char *path, size_t max_path_length, bool denormalize=true) const
Extract the full path to the file.
Definition FileSpec.cpp:374
lldb::ModuleSP GetModule() const
Get const accessor for the module pointer.
void Append(const ModuleSpec &spec)
Definition ModuleSpec.h:341
std::unique_ptr< lldb_private::SectionList > m_sections_up
Definition ObjectFile.h:776
static lldb::DataBufferSP MapFileData(const FileSpec &file, uint64_t Size, uint64_t Offset)
const lldb::addr_t m_memory_addr
Set if the object file only exists in memory.
Definition ObjectFile.h:775
static lldb::SectionType GetDWARFSectionTypeFromName(llvm::StringRef name)
Parses the section type from a section name for DWARF sections.
DataExtractorNSP m_data_nsp
The data for this object file so things can be parsed lazily.
Definition ObjectFile.h:769
virtual SectionList * GetSectionList(bool update_module_section_list=true)
Gets the section list for the currently selected architecture (and object for archives).
ObjectFile(const lldb::ModuleSP &module_sp, const FileSpec *file_spec_ptr, lldb::offset_t file_offset, lldb::offset_t length, lldb::DataExtractorSP extractor_sp, lldb::offset_t data_offset)
Construct with a parent module, offset, and header data.
bool IsInMemory() const
Returns true if the object file exists only in memory.
Definition ObjectFile.h:687
lldb::ProcessWP m_process_wp
Definition ObjectFile.h:773
virtual lldb::addr_t GetByteSize() const
Definition ObjectFile.h:275
static bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description, ABICreateInstance create_callback)
static bool UnregisterPlugin(ABICreateInstance create_callback)
size_t GetSize() const
Definition Section.h:77
size_t AddSection(const lldb::SectionSP &section_sp)
Definition Section.cpp:488
void Dump(llvm::raw_ostream &s, unsigned indent, Target *target, bool show_header, uint32_t depth) const
Definition Section.cpp:650
lldb::SectionSP GetSectionAtIndex(size_t idx) const
Definition Section.cpp:557
An error handling class.
Definition Status.h:118
A stream class that can stream formatted output to a file.
Definition Stream.h:28
llvm::raw_ostream & AsRawOstream()
Returns a raw_ostream that forwards the data to this Stream object.
Definition Stream.h:406
size_t Indent(llvm::StringRef s="")
Indent the current line in the stream.
Definition Stream.cpp:157
unsigned GetIndentLevel() const
Get the current indentation level.
Definition Stream.cpp:187
uint32_t AddSymbol(const Symbol &symbol)
Definition Symtab.cpp:64
bool SetSectionLoadAddress(const lldb::SectionSP &section, lldb::addr_t load_addr, bool warn_multiple=false)
Definition Target.cpp:3334
Generic Wasm object file reader.
ArchSpec GetArchitecture() override
Get the ArchSpec for this object file.
static size_t GetModuleSpecifications(const FileSpec &file, lldb::DataExtractorSP &extractor_sp, lldb::offset_t data_offset, lldb::offset_t file_offset, lldb::offset_t length, ModuleSpecList &specs)
std::optional< FileSpec > GetExternalDebugInfoFileSpec()
A Wasm module that has external DWARF debug information should contain a custom section named "extern...
bool SetLoadAddress(lldb_private::Target &target, lldb::addr_t value, bool value_is_offset) override
Sets the load address for an entire module, assuming a rigid slide of sections, if possible in the im...
bool DecodeNextSection(lldb::offset_t *offset_ptr)
Wasm section decoding routines.
lldb::ByteOrder GetByteOrder() const override
Gets whether endian swapping should occur when extracting data from this object file.
void CreateSections(SectionList &unified_section_list) override
ObjectFileWasm(const lldb::ModuleSP &module_sp, lldb::DataExtractorSP extractor_sp, lldb::offset_t data_offset, const FileSpec *file, lldb::offset_t offset, lldb::offset_t length)
std::optional< section_info > GetSectionInfo(uint32_t section_id)
void Dump(Stream *s) override
Dump a description of this object to a Stream.
static llvm::StringRef GetPluginNameStatic()
std::vector< section_info > m_sect_infos
void DumpSectionHeader(llvm::raw_ostream &ostream, const section_info &sh)
Wasm section header dump routines.
void DumpSectionHeaders(llvm::raw_ostream &ostream)
static ObjectFile * CreateInstance(const lldb::ModuleSP &module_sp, lldb::DataExtractorSP extractor_sp, lldb::offset_t data_offset, const FileSpec *file, lldb::offset_t file_offset, lldb::offset_t length)
static char ID
LLVM RTTI support.
void ParseSymtab(lldb_private::Symtab &symtab) override
Parse the symbol table into the provides symbol table object.
uint32_t GetAddressByteSize() const override
Gets the address size in bytes for the current object file.
static ObjectFile * CreateMemoryInstance(const lldb::ModuleSP &module_sp, lldb::WritableDataBufferSP data_sp, const lldb::ProcessSP &process_sp, lldb::addr_t header_addr)
bool ParseHeader() override
ObjectFile Protocol.
static const char * GetPluginDescriptionStatic()
DataExtractor ReadImageData(lldb::offset_t offset, uint32_t size)
Read a range of bytes from the Wasm module.
#define LLDB_INVALID_ADDRESS
#define LLDB_INVALID_OFFSET
#define UINT32_MAX
A class that represents a running process on the host machine.
Log * GetLog(Cat mask)
Retrieve the Log object for the channel associated with the given log enum.
Definition Log.h:332
uint64_t offset_t
Definition lldb-types.h:85
std::shared_ptr< lldb_private::Process > ProcessSP
uint64_t user_id_t
Definition lldb-types.h:82
std::shared_ptr< lldb_private::DataBuffer > DataBufferSP
std::shared_ptr< lldb_private::Section > SectionSP
std::shared_ptr< lldb_private::WritableDataBuffer > WritableDataBufferSP
uint64_t addr_t
Definition lldb-types.h:80
@ eSectionTypeWasmName
std::shared_ptr< lldb_private::DataExtractor > DataExtractorSP
std::shared_ptr< lldb_private::Module > ModuleSP
lldb::offset_t section_offset
SegmentType type
std::string name
uint32_t memory_index
lldb::offset_t section_offset
lldb::offset_t GetFileOffset() const
lldb::offset_t init_expr_offset