LLDB mainline
ObjectFileWasm.cpp
Go to the documentation of this file.
1//===-- ObjectFileWasm.cpp ------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ObjectFileWasm.h"
10#include "lldb/Core/Module.h"
13#include "lldb/Core/Section.h"
14#include "lldb/Target/Process.h"
16#include "lldb/Target/Target.h"
19#include "lldb/Utility/Log.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/SmallVector.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/BinaryFormat/Magic.h"
24#include "llvm/BinaryFormat/Wasm.h"
25#include "llvm/Support/CheckedArithmetic.h"
26#include "llvm/Support/Endian.h"
27#include "llvm/Support/Format.h"
28#include <optional>
29
30using namespace lldb;
31using namespace lldb_private;
32using namespace lldb_private::wasm;
33
35
36static const uint32_t kWasmHeaderSize =
37 sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion);
38
39/// Helper to read a 32-bit ULEB using LLDB's DataExtractor.
40static inline llvm::Expected<uint32_t> GetULEB32(DataExtractor &data,
41 lldb::offset_t &offset) {
42 const uint64_t value = data.GetULEB128(&offset);
43 if (value > std::numeric_limits<uint32_t>::max())
44 return llvm::createStringError("ULEB exceeds 32 bits");
45 return value;
46}
47
48/// Helper to read a 32-bit ULEB using LLVM's DataExtractor.
49static inline llvm::Expected<uint32_t>
50GetULEB32(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
51 const uint64_t value = data.getULEB128(c);
52 if (!c)
53 return c.takeError();
54 if (value > std::numeric_limits<uint32_t>::max())
55 return llvm::createStringError("ULEB exceeds 32 bits");
56 return value;
57}
58
59/// Helper to read a Wasm string, whcih is encoded as a vector of UTF-8 codes.
60static inline llvm::Expected<std::string>
61GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
62 llvm::Expected<uint32_t> len = GetULEB32(data, c);
63 if (!len)
64 return len.takeError();
65
66 llvm::SmallVector<uint8_t, 32> str_storage;
67 data.getU8(c, str_storage, *len);
68 if (!c)
69 return c.takeError();
70
71 return std::string(toStringRef(llvm::ArrayRef(str_storage)));
72}
73
74/// An "init expr" refers to a constant expression used to determine the initial
75/// value of certain elements within a module during instantiation. These
76/// expressions are restricted to operations that can be evaluated at module
77/// instantiation time. Currently we only support simple constant opcodes.
79 lldb::offset_t &offset) {
80 lldb::offset_t init_expr_offset = LLDB_INVALID_OFFSET;
81
82 uint8_t opcode = data.GetU8(&offset);
83 switch (opcode) {
84 case llvm::wasm::WASM_OPCODE_I32_CONST:
85 case llvm::wasm::WASM_OPCODE_I64_CONST:
86 init_expr_offset = data.GetSLEB128(&offset);
87 break;
88 case llvm::wasm::WASM_OPCODE_GLOBAL_GET:
89 init_expr_offset = data.GetULEB128(&offset);
90 break;
91 case llvm::wasm::WASM_OPCODE_F32_CONST:
92 case llvm::wasm::WASM_OPCODE_F64_CONST:
93 // Not a meaningful offset.
94 data.GetFloat(&offset);
95 break;
96 case llvm::wasm::WASM_OPCODE_REF_NULL:
97 // Not a meaningful offset.
98 data.GetULEB128(&offset);
99 break;
100 }
101
102 // Make sure the opcodes we read aren't part of an extended init expr.
103 opcode = data.GetU8(&offset);
104 if (opcode == llvm::wasm::WASM_OPCODE_END)
105 return init_expr_offset;
106
107 // Extended init expressions are not supported, but we still have to parse
108 // them to skip over them and read the next segment.
109 do {
110 opcode = data.GetU8(&offset);
111 } while (opcode != llvm::wasm::WASM_OPCODE_END);
112 return LLDB_INVALID_OFFSET;
113}
114
115/// Checks whether the data buffer starts with a valid Wasm module header.
116static bool ValidateModuleHeader(llvm::ArrayRef<uint8_t> data) {
117 if (data.size() < kWasmHeaderSize)
118 return false;
119
120 if (llvm::identify_magic(toStringRef(data)) != llvm::file_magic::wasm_object)
121 return false;
122
123 const uint8_t *Ptr = data.data() + sizeof(llvm::wasm::WasmMagic);
124
125 uint32_t version = llvm::support::endian::read32le(Ptr);
126 return version == llvm::wasm::WasmVersion;
127}
128
130
136
140
142 DataExtractorSP extractor_sp,
143 offset_t data_offset,
144 const FileSpec *file,
145 offset_t file_offset,
146 offset_t length) {
147 Log *log = GetLog(LLDBLog::Object);
148
149 if (!extractor_sp || !extractor_sp->HasData()) {
150 DataBufferSP data_sp = MapFileData(*file, length, file_offset);
151 if (!data_sp) {
152 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s",
153 file->GetPath().c_str());
154 return nullptr;
155 }
156 extractor_sp = std::make_shared<DataExtractor>(data_sp);
157 data_offset = 0;
158 }
159
160 assert(extractor_sp);
161 if (!ValidateModuleHeader(extractor_sp->GetData())) {
162 LLDB_LOGF(log,
163 "Failed to create ObjectFileWasm instance: invalid Wasm header");
164 return nullptr;
165 }
166
167 // Update the data to contain the entire file if it doesn't contain it
168 // already.
169 if (extractor_sp->GetByteSize() < length) {
170 DataBufferSP data_sp = MapFileData(*file, length, file_offset);
171 if (!data_sp) {
172 LLDB_LOGF(log,
173 "Failed to create ObjectFileWasm instance: cannot read file %s",
174 file->GetPath().c_str());
175 return nullptr;
176 }
177 extractor_sp = std::make_shared<DataExtractor>(data_sp);
178 data_offset = 0;
179 }
180
181 std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm(
182 module_sp, extractor_sp, data_offset, file, file_offset, length));
183 ArchSpec spec = objfile_up->GetArchitecture();
184 if (spec && objfile_up->SetModulesArchitecture(spec)) {
185 LLDB_LOGF(log,
186 "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s",
187 static_cast<void *>(objfile_up.get()),
188 static_cast<void *>(objfile_up->GetModule().get()),
189 objfile_up->GetModule()->GetSpecificationDescription().c_str(),
190 file ? file->GetPath().c_str() : "<NULL>");
191 return objfile_up.release();
192 }
193
194 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance");
195 return nullptr;
196}
197
199 WritableDataBufferSP data_sp,
200 const ProcessSP &process_sp,
201 addr_t header_addr) {
202 if (!ValidateModuleHeader(data_sp->GetData()))
203 return nullptr;
204
205 std::unique_ptr<ObjectFileWasm> objfile_up(
206 new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr));
207 ArchSpec spec = objfile_up->GetArchitecture();
208 if (spec && objfile_up->SetModulesArchitecture(spec))
209 return objfile_up.release();
210 return nullptr;
211}
212
214 // Buffer sufficient to read a section header and find the pointer to the next
215 // section.
216 const uint32_t kBufferSize = 1024;
217 DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize);
218
219 llvm::DataExtractor data = section_header_data.GetAsLLVM();
220 llvm::DataExtractor::Cursor c(0);
221
222 // Each section consists of:
223 // - a one-byte section id,
224 // - the u32 size of the contents, in bytes,
225 // - the actual contents.
226 uint8_t section_id = data.getU8(c);
227 uint64_t payload_len = data.getULEB128(c);
228 if (!c)
229 return !llvm::errorToBool(c.takeError());
230
231 if (payload_len > std::numeric_limits<uint32_t>::max())
232 return false;
233
234 if (section_id == llvm::wasm::WASM_SEC_CUSTOM) {
235 // Custom sections have the id 0. Their contents consist of a name
236 // identifying the custom section, followed by an uninterpreted sequence
237 // of bytes.
238 lldb::offset_t prev_offset = c.tell();
239 llvm::Expected<std::string> sect_name = GetWasmString(data, c);
240 if (!sect_name) {
241 LLDB_LOG_ERROR(GetLog(LLDBLog::Object), sect_name.takeError(),
242 "failed to parse section name: {0}");
243 return false;
244 }
245
246 if (payload_len < c.tell() - prev_offset)
247 return false;
248
249 uint32_t section_length = payload_len - (c.tell() - prev_offset);
250 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length,
251 section_id, ConstString(*sect_name)});
252 *offset_ptr += (c.tell() + section_length);
253 } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) {
254 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(),
255 static_cast<uint32_t>(payload_len),
256 section_id, ConstString()});
257 *offset_ptr += (c.tell() + payload_len);
258 } else {
259 // Invalid section id.
260 return false;
261 }
262 return true;
263}
264
267 if (IsInMemory()) {
268 offset += m_memory_addr;
269 }
270
271 while (DecodeNextSection(&offset))
272 ;
273 return true;
274}
275
278 DataExtractorSP &extractor_sp,
279 offset_t file_offset, offset_t length) {
280 if (!ValidateModuleHeader(extractor_sp->GetData()))
281 return {};
282
283 ModuleSpecList specs;
284 specs.Append(ModuleSpec(file, ArchSpec("wasm32-unknown-unknown-wasm")));
285 return specs;
286}
287
289 DataExtractorSP extractor_sp,
290 offset_t data_offset, const FileSpec *file,
291 offset_t offset, offset_t length)
292 : ObjectFile(module_sp, file, offset, length, extractor_sp, data_offset),
293 m_arch("wasm32-unknown-unknown-wasm") {
294 m_data_nsp->SetAddressByteSize(4);
295}
296
298 lldb::WritableDataBufferSP header_data_sp,
299 const lldb::ProcessSP &process_sp,
300 lldb::addr_t header_addr)
301 : ObjectFile(module_sp, process_sp, header_addr,
302 std::make_shared<DataExtractor>(header_data_sp)),
303 m_arch("wasm32-unknown-unknown-wasm") {}
304
306 // We already parsed the header during initialization.
307 return true;
308}
309
311 /// Offset from the section to the start of the function. This points past the
312 /// function size, which some other tools consider part of the function.
314
315 /// Function size, which includes the function header, but not the size ULEB
316 /// that proceeds it.
317 uint32_t size = 0;
318
319 /// Offset from section_offset to the first instruction in the function, past
320 /// the local variable declarations.
321 uint32_t code_offset = 0;
322};
323
324static llvm::Expected<uint32_t> ParseImports(DataExtractor &import_data) {
325 // Currently this function just returns the number of imported functions.
326 // If we want to do anything with global names in the future, we'll also
327 // need to know those.
328 llvm::DataExtractor data = import_data.GetAsLLVM();
329 llvm::DataExtractor::Cursor c(0);
330
331 llvm::Expected<uint32_t> count = GetULEB32(data, c);
332 if (!count)
333 return count.takeError();
334
335 uint32_t function_imports = 0;
336 for (uint32_t i = 0; c && i < *count; ++i) {
337 // We don't need module and field names, so we can just get them as raw
338 // strings and discard.
339 llvm::Expected<std::string> module_name = GetWasmString(data, c);
340 if (!module_name)
341 return llvm::joinErrors(
342 llvm::createStringError("failed to parse module name"),
343 module_name.takeError());
344 llvm::Expected<std::string> field_name = GetWasmString(data, c);
345 if (!field_name)
346 return llvm::joinErrors(
347 llvm::createStringError("failed to parse field name"),
348 field_name.takeError());
349
350 uint8_t kind = data.getU8(c);
351 if (kind == llvm::wasm::WASM_EXTERNAL_FUNCTION)
352 function_imports++;
353
354 // For function imports, this is a type index. For others it's different.
355 // We don't need it, just need to parse it to advance the cursor.
356 data.getULEB128(c);
357 }
358
359 if (!c)
360 return c.takeError();
361
362 return function_imports;
363}
364
365/// Get the offset in the function to the first instruction.
366static llvm::Expected<uint32_t> GetFunctionCodeOffset(DataExtractor &data,
367 lldb::offset_t offset) {
368 // Wasm function bodies start with:
369 // [local_count: ULEB128]
370 // [local_decl: {count: ULEB128, type: byte}] × local_count
371 // [instructions...]
372 const lldb::offset_t locals_start = offset;
373 const uint32_t local_count = data.GetULEB128(&offset);
374 for (uint32_t i = 0; i < local_count; ++i) {
375 data.GetULEB128(&offset); // count
376 data.GetU8(&offset); // valtype
377 }
378 return offset - locals_start;
379}
380
381static llvm::Expected<std::vector<WasmFunction>>
383 lldb::offset_t offset = 0;
384
385 llvm::Expected<uint32_t> function_count = GetULEB32(data, offset);
386 if (!function_count)
387 return function_count.takeError();
388
389 std::vector<WasmFunction> functions;
390 functions.reserve(*function_count);
391
392 for (uint32_t i = 0; i < *function_count; ++i) {
393 // llvm-objdump considers the ULEB with the function size to be part of the
394 // function. We can't do that here because that would not match the DWARF,
395 // which considers the function to start with the local variable
396 // declarations (the header).
397 llvm::Expected<uint32_t> function_size = GetULEB32(data, offset);
398 if (!function_size)
399 return function_size.takeError();
400
401 // Functions start with with a number of local variable declarations.
402 // They're part of the function but they're not instructions.
403 llvm::Expected<uint32_t> code_offset = GetFunctionCodeOffset(data, offset);
404 if (!code_offset)
405 return code_offset.takeError();
406
407 functions.push_back({offset, *function_size, *code_offset});
408
409 std::optional<lldb::offset_t> next_offset =
410 llvm::checkedAddUnsigned<lldb::offset_t>(offset, *function_size);
411 if (!next_offset)
412 return llvm::createStringError("function offset overflows 64 bits");
413 offset = *next_offset;
414 }
415
416 return functions;
417}
418
434
435static llvm::Expected<std::vector<WasmSegment>> ParseData(DataExtractor &data) {
436 lldb::offset_t offset = 0;
437
438 llvm::Expected<uint32_t> segment_count = GetULEB32(data, offset);
439 if (!segment_count)
440 return segment_count.takeError();
441
442 std::vector<WasmSegment> segments;
443 segments.reserve(*segment_count);
444
445 for (uint32_t i = 0; i < *segment_count; ++i) {
446 llvm::Expected<uint32_t> flags = GetULEB32(data, offset);
447 if (!flags)
448 return flags.takeError();
449
451
452 // Data segments have a mode that identifies them as either passive or
453 // active. An active data segment copies its contents into a memory during
454 // instantiation, as specified by a memory index and a constant expression
455 // defining an offset into that memory.
456 segment.type = (*flags & llvm::wasm::WASM_DATA_SEGMENT_IS_PASSIVE)
459
460 if (*flags & llvm::wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX) {
461 assert(segment.type == WasmSegment::Active);
462 llvm::Expected<uint32_t> memidx = GetULEB32(data, offset);
463 if (!memidx)
464 return memidx.takeError();
465 segment.memory_index = *memidx;
466 }
467
468 if (segment.type == WasmSegment::Active)
469 segment.init_expr_offset = GetWasmOffsetFromInitExpr(data, offset);
470
471 llvm::Expected<uint32_t> segment_size = GetULEB32(data, offset);
472 if (!segment_size)
473 return segment_size.takeError();
474
475 segment.section_offset = offset;
476 segment.size = *segment_size;
477 segments.push_back(segment);
478
479 std::optional<lldb::offset_t> next_offset =
480 llvm::checkedAddUnsigned<lldb::offset_t>(offset, *segment_size);
481 if (!next_offset)
482 return llvm::createStringError("segment offset overflows 64 bits");
483 offset = *next_offset;
484 }
485
486 return segments;
487}
488
489static llvm::Expected<std::vector<Symbol>>
490ParseNames(SectionSP code_section_sp, DataExtractor &name_data,
491 const std::vector<WasmFunction> &functions,
492 std::vector<WasmSegment> &segments,
493 uint32_t num_imported_functions) {
494
495 llvm::DataExtractor data = name_data.GetAsLLVM();
496 llvm::DataExtractor::Cursor c(0);
497 std::vector<Symbol> symbols;
498 while (c && c.tell() < data.size()) {
499 const uint8_t type = data.getU8(c);
500 llvm::Expected<uint32_t> size = GetULEB32(data, c);
501 if (!size)
502 return size.takeError();
503
504 switch (type) {
505 case llvm::wasm::WASM_NAMES_FUNCTION: {
506 const uint64_t count = data.getULEB128(c);
507 if (count > std::numeric_limits<uint32_t>::max())
508 return llvm::createStringError("function count overflows uint32_t");
509
510 for (uint64_t i = 0; c && i < count; ++i) {
511 llvm::Expected<uint32_t> idx = GetULEB32(data, c);
512 if (!idx)
513 return idx.takeError();
514 llvm::Expected<std::string> name = GetWasmString(data, c);
515 if (!name)
516 return name.takeError();
517 if (*idx >= num_imported_functions + functions.size())
518 continue;
519
520 if (*idx < num_imported_functions) {
521 symbols.emplace_back(symbols.size(), *name, lldb::eSymbolTypeCode,
522 /*external=*/true, /*is_debug=*/false,
523 /*is_trampoline=*/false,
524 /*is_artificial=*/false,
525 /*section_sp=*/lldb::SectionSP(),
526 /*value=*/0, /*size=*/0,
527 /*size_is_valid=*/false,
528 /*contains_linker_annotations=*/false,
529 /*flags=*/0);
530 } else {
531 const WasmFunction &func = functions[*idx - num_imported_functions];
532 symbols.emplace_back(symbols.size(), *name, lldb::eSymbolTypeCode,
533 /*external=*/false, /*is_debug=*/false,
534 /*is_trampoline=*/false, /*is_artificial=*/false,
535 code_section_sp, func.section_offset, func.size,
536 /*size_is_valid=*/true,
537 /*contains_linker_annotations=*/false,
538 /*flags=*/0);
539 if (func.code_offset)
540 symbols.back().SetPrologueByteSize(func.code_offset);
541 }
542 }
543 } break;
544 case llvm::wasm::WASM_NAMES_DATA_SEGMENT: {
545 llvm::Expected<uint32_t> count = GetULEB32(data, c);
546 if (!count)
547 return count.takeError();
548 for (uint32_t i = 0; c && i < *count; ++i) {
549 llvm::Expected<uint32_t> idx = GetULEB32(data, c);
550 if (!idx)
551 return idx.takeError();
552 llvm::Expected<std::string> name = GetWasmString(data, c);
553 if (!name)
554 return name.takeError();
555 if (*idx >= segments.size())
556 continue;
557 // Update the segment name.
558 segments[i].name = *name;
559 }
560
561 } break;
562 case llvm::wasm::WASM_NAMES_GLOBAL:
563 case llvm::wasm::WASM_NAMES_LOCAL:
564 default:
565 std::optional<lldb::offset_t> offset =
566 llvm::checkedAddUnsigned<lldb::offset_t>(c.tell(), *size);
567 if (!offset)
568 return llvm::createStringError("offset overflows 64 bits");
569 c.seek(*offset);
570 }
571 }
572
573 if (!c)
574 return c.takeError();
575
576 return symbols;
577}
578
580 for (const Symbol &symbol : m_symbols)
581 symtab.AddSymbol(symbol);
582
583 symtab.Finalize();
584 m_symbols.clear();
585}
586
587static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
588 if (Name == "name")
590 if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_"))
592 return eSectionTypeOther;
593}
594
595std::optional<ObjectFileWasm::section_info>
596ObjectFileWasm::GetSectionInfo(uint32_t section_id) {
597 for (const section_info &sect_info : m_sect_infos) {
598 if (sect_info.id == section_id)
599 return sect_info;
600 }
601 return std::nullopt;
602}
603
604std::optional<ObjectFileWasm::section_info>
605ObjectFileWasm::GetSectionInfo(llvm::StringRef section_name) {
606 for (const section_info &sect_info : m_sect_infos) {
607 if (sect_info.name == section_name)
608 return sect_info;
609 }
610 return std::nullopt;
611}
612
613void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
614 Log *log = GetLog(LLDBLog::Object);
615
616 if (m_sections_up)
617 return;
618
619 m_sections_up = std::make_unique<SectionList>();
620
621 if (m_sect_infos.empty()) {
623 }
624
625 for (const section_info &sect_info : m_sect_infos) {
626 SectionType section_type = eSectionTypeOther;
627 ConstString section_name;
628 offset_t file_offset = sect_info.offset & 0xffffffff;
629 addr_t vm_addr = sect_info.offset;
630 size_t vm_size = sect_info.size;
631
632 if (llvm::wasm::WASM_SEC_CODE == sect_info.id) {
633 section_type = eSectionTypeCode;
634 section_name = ConstString("code");
635
636 // A code address in DWARF for WebAssembly is the offset of an
637 // instruction relative within the Code section of the WebAssembly file.
638 // For this reason Section::GetFileAddress() must return zero for the
639 // Code section.
640 vm_addr = 0;
641 } else {
642 section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
643 if (section_type == eSectionTypeOther)
644 continue;
645 section_name = sect_info.name;
646 if (!IsInMemory()) {
647 vm_size = 0;
648 vm_addr = 0;
649 }
650 }
651
652 SectionSP section_sp = std::make_shared<Section>(
653 GetModule(), // Module to which this section belongs.
654 this, // ObjectFile to which this section belongs and
655 // should read section data from.
656 section_type, // Section ID.
657 section_name, // Section name.
658 section_type, // Section type.
659 vm_addr, // VM address.
660 vm_size, // VM size in bytes of this section.
661 file_offset, // Offset of this section in the file.
662 sect_info.size, // Size of the section as found in the file.
663 0, // Alignment of the section
664 0); // Flags for this section.
665 m_sections_up->AddSection(section_sp);
666 unified_section_list.AddSection(section_sp);
667 }
668
669 // The name section contains names and indexes. First parse the data from the
670 // relevant sections so we can access it by its index.
671 std::vector<WasmFunction> functions;
672 std::vector<WasmSegment> segments;
673
674 // Parse the code section.
675 if (std::optional<section_info> info =
676 GetSectionInfo(llvm::wasm::WASM_SEC_CODE)) {
677 DataExtractor code_data = ReadImageData(info->offset, info->size);
678 llvm::Expected<std::vector<WasmFunction>> maybe_functions =
679 ParseFunctions(code_data);
680 if (!maybe_functions) {
681 LLDB_LOG_ERROR(log, maybe_functions.takeError(),
682 "Failed to parse Wasm code section: {0}");
683 } else {
684 functions = *maybe_functions;
685 }
686 }
687
688 // Parse the import section. The number of functions is needed because the
689 // function index space used in the name section includes imports.
690 if (std::optional<section_info> info =
691 GetSectionInfo(llvm::wasm::WASM_SEC_IMPORT)) {
692 DataExtractor import_data = ReadImageData(info->offset, info->size);
693 llvm::Expected<uint32_t> num_imports = ParseImports(import_data);
694 if (!num_imports) {
695 LLDB_LOG_ERROR(log, num_imports.takeError(),
696 "Failed to parse Wasm import section: {0}");
697 } else {
698 m_num_imported_functions = *num_imports;
699 }
700 }
701
702 // Parse the data section.
703 std::optional<section_info> data_info =
704 GetSectionInfo(llvm::wasm::WASM_SEC_DATA);
705 if (data_info) {
706 DataExtractor data_data = ReadImageData(data_info->offset, data_info->size);
707 llvm::Expected<std::vector<WasmSegment>> maybe_segments =
708 ParseData(data_data);
709 if (!maybe_segments) {
710 LLDB_LOG_ERROR(log, maybe_segments.takeError(),
711 "Failed to parse Wasm data section: {0}");
712 } else {
713 segments = *maybe_segments;
714 }
715 }
716
717 if (std::optional<section_info> info = GetSectionInfo("name")) {
718 DataExtractor names_data = ReadImageData(info->offset, info->size);
719 llvm::Expected<std::vector<Symbol>> symbols = ParseNames(
720 m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false),
721 names_data, functions, segments, m_num_imported_functions);
722 if (!symbols) {
723 LLDB_LOG_ERROR(log, symbols.takeError(),
724 "Failed to parse Wasm names: {0}");
725 } else {
726 m_symbols = *symbols;
727 }
728 }
729
730 lldb::user_id_t segment_id = 0;
731 for (const WasmSegment &segment : segments) {
732 if (segment.type == WasmSegment::Active) {
733 // FIXME: Support segments with a memory index.
734 if (segment.memory_index != 0) {
735 LLDB_LOG(log, "Skipping segment {0}: non-zero memory index is "
736 "currently unsupported");
737 continue;
738 }
739
740 if (segment.init_expr_offset == LLDB_INVALID_OFFSET) {
741 LLDB_LOG(log, "Skipping segment {0}: unsupported init expression");
742 continue;
743 }
744 }
745
746 const lldb::addr_t file_vm_addr =
748 ? segment.init_expr_offset
749 : data_info->offset + segment.section_offset;
750 const lldb::offset_t file_offset =
751 data_info->GetFileOffset() + segment.GetFileOffset();
752 SectionSP segment_sp = std::make_shared<Section>(
753 GetModule(),
754 /*obj_file=*/this,
755 ++segment_id << 8, // 1-based segment index, shifted by 8 bits to avoid
756 // collision with section IDs.
758 /*file_vm_addr=*/file_vm_addr,
759 /*vm_size=*/segment.size,
760 /*file_offset=*/file_offset,
761 /*file_size=*/segment.size,
762 /*log2align=*/0, /*flags=*/0);
763 m_sections_up->AddSection(segment_sp);
764 GetModule()->GetSectionList()->AddSection(segment_sp);
765 }
766}
767
769 bool value_is_offset) {
770 /// In WebAssembly, linear memory is disjointed from code space. The VM can
771 /// load multiple instances of a module, which logically share the same code.
772 /// We represent a wasm32 code address with 64-bits, like:
773 /// 63 32 31 0
774 /// +---------------+---------------+
775 /// + module_id | offset |
776 /// +---------------+---------------+
777 /// where the lower 32 bits represent a module offset (relative to the module
778 /// start not to the beginning of the code section) and the higher 32 bits
779 /// uniquely identify the module in the WebAssembly VM.
780 /// In other words, we assume that each WebAssembly module is loaded by the
781 /// engine at a 64-bit address that starts at the boundary of 4GB pages, like
782 /// 0x0000000400000000 for module_id == 4.
783 /// These 64-bit addresses will be used to request code ranges for a specific
784 /// module from the WebAssembly engine.
785
787 m_memory_addr == load_address);
788
789 ModuleSP module_sp = GetModule();
790 if (!module_sp)
791 return false;
792
794
795 size_t num_loaded_sections = 0;
796 SectionList *section_list = GetSectionList();
797 if (!section_list)
798 return false;
799
800 const size_t num_sections = section_list->GetSize();
801 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
802 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx));
803 if (target.SetSectionLoadAddress(
804 section_sp, load_address | section_sp->GetFileOffset())) {
805 ++num_loaded_sections;
806 }
807 }
808
809 return num_loaded_sections > 0;
810}
811
813 DataExtractor data;
814 if (m_file) {
815 if (offset < GetByteSize()) {
816 size = std::min(static_cast<uint64_t>(size), GetByteSize() - offset);
817 auto buffer_sp = MapFileData(m_file, size, offset);
818 return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize());
819 }
820 } else {
821 ProcessSP process_sp(m_process_wp.lock());
822 if (process_sp) {
823 auto data_up = std::make_unique<DataBufferHeap>(size, 0);
824 Status readmem_error;
825 size_t bytes_read = process_sp->ReadMemory(
826 offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error);
827 if (bytes_read > 0) {
828 DataBufferSP buffer_sp(data_up.release());
829 data.SetData(buffer_sp);
830 }
831 } else if (offset < m_data_nsp->GetByteSize()) {
832 size = std::min(static_cast<uint64_t>(size),
833 m_data_nsp->GetByteSize() - offset);
834 return DataExtractor(m_data_nsp->GetDataStart() + offset, size,
836 }
837 }
839 return data;
840}
841
843 static ConstString g_sect_name_external_debug_info("external_debug_info");
844
845 for (const section_info &sect_info : m_sect_infos) {
846 if (g_sect_name_external_debug_info == sect_info.name) {
847 const uint32_t kBufferSize = 1024;
848 DataExtractor section_header_data =
849 ReadImageData(sect_info.offset, kBufferSize);
850
851 llvm::DataExtractor data = section_header_data.GetAsLLVM();
852 llvm::DataExtractor::Cursor c(0);
853 llvm::Expected<std::string> symbols_url = GetWasmString(data, c);
854 if (!symbols_url) {
855 llvm::consumeError(symbols_url.takeError());
856 return std::nullopt;
857 }
858 return FileSpec(*symbols_url);
859 }
860 }
861 return std::nullopt;
862}
863
865 ModuleSP module_sp(GetModule());
866 if (!module_sp)
867 return;
868
869 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
870
871 llvm::raw_ostream &ostream = s->AsRawOstream();
872 ostream << static_cast<void *>(this) << ": ";
873 s->Indent();
874 ostream << "ObjectFileWasm, file = '";
875 m_file.Dump(ostream);
876 ostream << "', arch = ";
877 ostream << GetArchitecture().GetArchitectureName() << "\n";
878
879 SectionList *sections = GetSectionList();
880 if (sections) {
881 sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
882 UINT32_MAX);
883 }
884 ostream << "\n";
885 DumpSectionHeaders(ostream);
886 ostream << "\n";
887}
888
889void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream,
890 const section_info &sh) {
891 ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " "
892 << llvm::format_hex(sh.offset, 10) << " "
893 << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6)
894 << "\n";
895}
896
897void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) {
898 ostream << "Section Headers\n";
899 ostream << "IDX name addr size id\n";
900 ostream << "==== ---------------- ---------- ---------- ------\n";
901
902 uint32_t idx = 0;
903 for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end();
904 ++pos, ++idx) {
905 ostream << "[" << llvm::format_decimal(idx, 2) << "] ";
907 }
908}
#define LLDB_LOG(log,...)
The LLDB_LOG* macros defined below are the way to emit log messages.
Definition Log.h:364
#define LLDB_LOGF(log,...)
Definition Log.h:378
#define LLDB_LOG_ERROR(log, error,...)
Definition Log.h:394
static SectionType GetSectionTypeFromName(llvm::StringRef Name)
static lldb::offset_t GetWasmOffsetFromInitExpr(DataExtractor &data, lldb::offset_t &offset)
An "init expr" refers to a constant expression used to determine the initial value of certain element...
static llvm::Expected< std::string > GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c)
Helper to read a Wasm string, whcih is encoded as a vector of UTF-8 codes.
static llvm::Expected< uint32_t > GetULEB32(DataExtractor &data, lldb::offset_t &offset)
Helper to read a 32-bit ULEB using LLDB's DataExtractor.
static llvm::Expected< uint32_t > GetFunctionCodeOffset(DataExtractor &data, lldb::offset_t offset)
Get the offset in the function to the first instruction.
static llvm::Expected< uint32_t > ParseImports(DataExtractor &import_data)
static llvm::Expected< std::vector< WasmFunction > > ParseFunctions(DataExtractor &data)
static llvm::Expected< std::vector< Symbol > > ParseNames(SectionSP code_section_sp, DataExtractor &name_data, const std::vector< WasmFunction > &functions, std::vector< WasmSegment > &segments, uint32_t num_imported_functions)
static bool ValidateModuleHeader(llvm::ArrayRef< uint8_t > data)
Checks whether the data buffer starts with a valid Wasm module header.
static const uint32_t kWasmHeaderSize
static llvm::Expected< std::vector< WasmSegment > > ParseData(DataExtractor &data)
#define LLDB_PLUGIN_DEFINE(PluginName)
An architecture specification class.
Definition ArchSpec.h:32
const char * GetArchitectureName() const
Returns a static string representing the current architecture.
Definition ArchSpec.cpp:548
A uniqued constant string class.
Definition ConstString.h:40
llvm::StringRef GetStringRef() const
Get the string value as a llvm::StringRef.
An data extractor class.
uint64_t GetULEB128(lldb::offset_t *offset_ptr) const
Extract a unsigned LEB128 value from *offset_ptr.
float GetFloat(lldb::offset_t *offset_ptr) const
Extract a float from *offset_ptr.
llvm::DataExtractor GetAsLLVM() const
void SetByteOrder(lldb::ByteOrder byte_order)
Set the byte_order value.
virtual lldb::offset_t SetData(const void *bytes, lldb::offset_t length, lldb::ByteOrder byte_order)
Set data with a buffer that is caller owned.
int64_t GetSLEB128(lldb::offset_t *offset_ptr) const
Extract a signed LEB128 value from *offset_ptr.
uint8_t GetU8(lldb::offset_t *offset_ptr) const
Extract a uint8_t value from *offset_ptr.
A file utility class.
Definition FileSpec.h:57
size_t GetPath(char *path, size_t max_path_length, bool denormalize=true) const
Extract the full path to the file.
Definition FileSpec.cpp:374
lldb::ModuleSP GetModule() const
Get const accessor for the module pointer.
void Append(const ModuleSpec &spec)
Definition ModuleSpec.h:341
std::unique_ptr< lldb_private::SectionList > m_sections_up
Definition ObjectFile.h:774
static lldb::DataBufferSP MapFileData(const FileSpec &file, uint64_t Size, uint64_t Offset)
const lldb::addr_t m_memory_addr
Set if the object file only exists in memory.
Definition ObjectFile.h:773
static lldb::SectionType GetDWARFSectionTypeFromName(llvm::StringRef name)
Parses the section type from a section name for DWARF sections.
DataExtractorNSP m_data_nsp
The data for this object file so things can be parsed lazily.
Definition ObjectFile.h:767
virtual SectionList * GetSectionList(bool update_module_section_list=true)
Gets the section list for the currently selected architecture (and object for archives).
ObjectFile(const lldb::ModuleSP &module_sp, const FileSpec *file_spec_ptr, lldb::offset_t file_offset, lldb::offset_t length, lldb::DataExtractorSP extractor_sp, lldb::offset_t data_offset)
Construct with a parent module, offset, and header data.
bool IsInMemory() const
Returns true if the object file exists only in memory.
Definition ObjectFile.h:685
lldb::ProcessWP m_process_wp
Definition ObjectFile.h:771
virtual lldb::addr_t GetByteSize() const
Definition ObjectFile.h:273
static bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description, ABICreateInstance create_callback)
static bool UnregisterPlugin(ABICreateInstance create_callback)
size_t GetSize() const
Definition Section.h:77
size_t AddSection(const lldb::SectionSP &section_sp)
Definition Section.cpp:480
void Dump(llvm::raw_ostream &s, unsigned indent, Target *target, bool show_header, uint32_t depth) const
Definition Section.cpp:642
lldb::SectionSP GetSectionAtIndex(size_t idx) const
Definition Section.cpp:549
An error handling class.
Definition Status.h:118
A stream class that can stream formatted output to a file.
Definition Stream.h:28
llvm::raw_ostream & AsRawOstream()
Returns a raw_ostream that forwards the data to this Stream object.
Definition Stream.h:402
size_t Indent(llvm::StringRef s="")
Indent the current line in the stream.
Definition Stream.cpp:155
unsigned GetIndentLevel() const
Get the current indentation level.
Definition Stream.cpp:191
uint32_t AddSymbol(const Symbol &symbol)
Definition Symtab.cpp:61
bool SetSectionLoadAddress(const lldb::SectionSP &section, lldb::addr_t load_addr, bool warn_multiple=false)
Definition Target.cpp:3330
Generic Wasm object file reader.
ArchSpec GetArchitecture() override
Get the ArchSpec for this object file.
std::optional< FileSpec > GetExternalDebugInfoFileSpec()
A Wasm module that has external DWARF debug information should contain a custom section named "extern...
bool SetLoadAddress(lldb_private::Target &target, lldb::addr_t value, bool value_is_offset) override
Sets the load address for an entire module, assuming a rigid slide of sections, if possible in the im...
bool DecodeNextSection(lldb::offset_t *offset_ptr)
Wasm section decoding routines.
lldb::ByteOrder GetByteOrder() const override
Gets whether endian swapping should occur when extracting data from this object file.
void CreateSections(SectionList &unified_section_list) override
ObjectFileWasm(const lldb::ModuleSP &module_sp, lldb::DataExtractorSP extractor_sp, lldb::offset_t data_offset, const FileSpec *file, lldb::offset_t offset, lldb::offset_t length)
std::optional< section_info > GetSectionInfo(uint32_t section_id)
void Dump(Stream *s) override
Dump a description of this object to a Stream.
static llvm::StringRef GetPluginNameStatic()
std::vector< section_info > m_sect_infos
void DumpSectionHeader(llvm::raw_ostream &ostream, const section_info &sh)
Wasm section header dump routines.
void DumpSectionHeaders(llvm::raw_ostream &ostream)
static ObjectFile * CreateInstance(const lldb::ModuleSP &module_sp, lldb::DataExtractorSP extractor_sp, lldb::offset_t data_offset, const FileSpec *file, lldb::offset_t file_offset, lldb::offset_t length)
static char ID
LLVM RTTI support.
void ParseSymtab(lldb_private::Symtab &symtab) override
Parse the symbol table into the provides symbol table object.
static ModuleSpecList GetModuleSpecifications(const FileSpec &file, lldb::DataExtractorSP &extractor_sp, lldb::offset_t file_offset, lldb::offset_t length)
uint32_t GetAddressByteSize() const override
Gets the address size in bytes for the current object file.
static ObjectFile * CreateMemoryInstance(const lldb::ModuleSP &module_sp, lldb::WritableDataBufferSP data_sp, const lldb::ProcessSP &process_sp, lldb::addr_t header_addr)
bool ParseHeader() override
ObjectFile Protocol.
static const char * GetPluginDescriptionStatic()
DataExtractor ReadImageData(lldb::offset_t offset, uint32_t size)
Read a range of bytes from the Wasm module.
#define LLDB_INVALID_ADDRESS
#define LLDB_INVALID_OFFSET
#define UINT32_MAX
A class that represents a running process on the host machine.
Log * GetLog(Cat mask)
Retrieve the Log object for the channel associated with the given log enum.
Definition Log.h:327
uint64_t offset_t
Definition lldb-types.h:85
std::shared_ptr< lldb_private::Process > ProcessSP
uint64_t user_id_t
Definition lldb-types.h:82
std::shared_ptr< lldb_private::DataBuffer > DataBufferSP
std::shared_ptr< lldb_private::Section > SectionSP
std::shared_ptr< lldb_private::WritableDataBuffer > WritableDataBufferSP
uint64_t addr_t
Definition lldb-types.h:80
@ eSectionTypeWasmName
std::shared_ptr< lldb_private::DataExtractor > DataExtractorSP
std::shared_ptr< lldb_private::Module > ModuleSP
lldb::offset_t section_offset
Offset from the section to the start of the function.
uint32_t code_offset
Offset from section_offset to the first instruction in the function, past the local variable declarat...
uint32_t size
Function size, which includes the function header, but not the size ULEB that proceeds it.
SegmentType type
std::string name
uint32_t memory_index
lldb::offset_t section_offset
lldb::offset_t GetFileOffset() const
lldb::offset_t init_expr_offset
std::string name