LLDB mainline
ObjectFileWasm.cpp
Go to the documentation of this file.
1//===-- ObjectFileWasm.cpp ------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ObjectFileWasm.h"
10#include "lldb/Core/Module.h"
13#include "lldb/Core/Section.h"
14#include "lldb/Target/Process.h"
16#include "lldb/Target/Target.h"
19#include "lldb/Utility/Log.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/SmallVector.h"
22#include "llvm/ADT/StringRef.h"
23#include "llvm/BinaryFormat/Magic.h"
24#include "llvm/BinaryFormat/Wasm.h"
25#include "llvm/Support/CheckedArithmetic.h"
26#include "llvm/Support/Endian.h"
27#include "llvm/Support/Format.h"
28#include <optional>
29
30using namespace lldb;
31using namespace lldb_private;
32using namespace lldb_private::wasm;
33
35
36static const uint32_t kWasmHeaderSize =
37 sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion);
38
39/// Helper to read a 32-bit ULEB using LLDB's DataExtractor.
40static inline llvm::Expected<uint32_t> GetULEB32(DataExtractor &data,
41 lldb::offset_t &offset) {
42 const uint64_t value = data.GetULEB128(&offset);
43 if (value > std::numeric_limits<uint32_t>::max())
44 return llvm::createStringError("ULEB exceeds 32 bits");
45 return value;
46}
47
48/// Helper to read a 32-bit ULEB using LLVM's DataExtractor.
49static inline llvm::Expected<uint32_t>
50GetULEB32(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
51 const uint64_t value = data.getULEB128(c);
52 if (!c)
53 return c.takeError();
54 if (value > std::numeric_limits<uint32_t>::max())
55 return llvm::createStringError("ULEB exceeds 32 bits");
56 return value;
57}
58
59/// Helper to read a Wasm string, whcih is encoded as a vector of UTF-8 codes.
60static inline llvm::Expected<std::string>
61GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
62 llvm::Expected<uint32_t> len = GetULEB32(data, c);
63 if (!len)
64 return len.takeError();
65
66 llvm::SmallVector<uint8_t, 32> str_storage;
67 data.getU8(c, str_storage, *len);
68 if (!c)
69 return c.takeError();
70
71 return std::string(toStringRef(llvm::ArrayRef(str_storage)));
72}
73
74/// An "init expr" refers to a constant expression used to determine the initial
75/// value of certain elements within a module during instantiation. These
76/// expressions are restricted to operations that can be evaluated at module
77/// instantiation time. Currently we only support simple constant opcodes.
79 lldb::offset_t &offset) {
80 lldb::offset_t init_expr_offset = LLDB_INVALID_OFFSET;
81
82 uint8_t opcode = data.GetU8(&offset);
83 switch (opcode) {
84 case llvm::wasm::WASM_OPCODE_I32_CONST:
85 case llvm::wasm::WASM_OPCODE_I64_CONST:
86 init_expr_offset = data.GetSLEB128(&offset);
87 break;
88 case llvm::wasm::WASM_OPCODE_GLOBAL_GET:
89 init_expr_offset = data.GetULEB128(&offset);
90 break;
91 case llvm::wasm::WASM_OPCODE_F32_CONST:
92 case llvm::wasm::WASM_OPCODE_F64_CONST:
93 // Not a meaningful offset.
94 data.GetFloat(&offset);
95 break;
96 case llvm::wasm::WASM_OPCODE_REF_NULL:
97 // Not a meaningful offset.
98 data.GetULEB128(&offset);
99 break;
100 }
101
102 // Make sure the opcodes we read aren't part of an extended init expr.
103 opcode = data.GetU8(&offset);
104 if (opcode == llvm::wasm::WASM_OPCODE_END)
105 return init_expr_offset;
106
107 // Extended init expressions are not supported, but we still have to parse
108 // them to skip over them and read the next segment.
109 do {
110 opcode = data.GetU8(&offset);
111 } while (opcode != llvm::wasm::WASM_OPCODE_END);
112 return LLDB_INVALID_OFFSET;
113}
114
115/// Checks whether the data buffer starts with a valid Wasm module header.
116static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
117 if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize)
118 return false;
119
120 if (llvm::identify_magic(toStringRef(data_sp->GetData())) !=
121 llvm::file_magic::wasm_object)
122 return false;
123
124 const uint8_t *Ptr = data_sp->GetBytes() + sizeof(llvm::wasm::WasmMagic);
125
126 uint32_t version = llvm::support::endian::read32le(Ptr);
127 return version == llvm::wasm::WasmVersion;
128}
129
131
137
141
143 DataExtractorSP extractor_sp,
144 offset_t data_offset,
145 const FileSpec *file,
146 offset_t file_offset,
147 offset_t length) {
148 Log *log = GetLog(LLDBLog::Object);
149
150 if (!extractor_sp || !extractor_sp->HasData()) {
151 DataBufferSP data_sp = MapFileData(*file, length, file_offset);
152 if (!data_sp) {
153 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance for file %s",
154 file->GetPath().c_str());
155 return nullptr;
156 }
157 extractor_sp = std::make_shared<DataExtractor>(data_sp);
158 data_offset = 0;
159 }
160
161 assert(extractor_sp);
162 if (!ValidateModuleHeader(extractor_sp->GetSharedDataBuffer())) {
163 LLDB_LOGF(log,
164 "Failed to create ObjectFileWasm instance: invalid Wasm header");
165 return nullptr;
166 }
167
168 // Update the data to contain the entire file if it doesn't contain it
169 // already.
170 if (extractor_sp->GetByteSize() < length) {
171 DataBufferSP data_sp = MapFileData(*file, length, file_offset);
172 if (!data_sp) {
173 LLDB_LOGF(log,
174 "Failed to create ObjectFileWasm instance: cannot read file %s",
175 file->GetPath().c_str());
176 return nullptr;
177 }
178 extractor_sp = std::make_shared<DataExtractor>(data_sp);
179 data_offset = 0;
180 }
181
182 std::unique_ptr<ObjectFileWasm> objfile_up(new ObjectFileWasm(
183 module_sp, extractor_sp, data_offset, file, file_offset, length));
184 ArchSpec spec = objfile_up->GetArchitecture();
185 if (spec && objfile_up->SetModulesArchitecture(spec)) {
186 LLDB_LOGF(log,
187 "%p ObjectFileWasm::CreateInstance() module = %p (%s), file = %s",
188 static_cast<void *>(objfile_up.get()),
189 static_cast<void *>(objfile_up->GetModule().get()),
190 objfile_up->GetModule()->GetSpecificationDescription().c_str(),
191 file ? file->GetPath().c_str() : "<NULL>");
192 return objfile_up.release();
193 }
194
195 LLDB_LOGF(log, "Failed to create ObjectFileWasm instance");
196 return nullptr;
197}
198
200 WritableDataBufferSP data_sp,
201 const ProcessSP &process_sp,
202 addr_t header_addr) {
203 if (!ValidateModuleHeader(data_sp))
204 return nullptr;
205
206 std::unique_ptr<ObjectFileWasm> objfile_up(
207 new ObjectFileWasm(module_sp, data_sp, process_sp, header_addr));
208 ArchSpec spec = objfile_up->GetArchitecture();
209 if (spec && objfile_up->SetModulesArchitecture(spec))
210 return objfile_up.release();
211 return nullptr;
212}
213
215 // Buffer sufficient to read a section header and find the pointer to the next
216 // section.
217 const uint32_t kBufferSize = 1024;
218 DataExtractor section_header_data = ReadImageData(*offset_ptr, kBufferSize);
219
220 llvm::DataExtractor data = section_header_data.GetAsLLVM();
221 llvm::DataExtractor::Cursor c(0);
222
223 // Each section consists of:
224 // - a one-byte section id,
225 // - the u32 size of the contents, in bytes,
226 // - the actual contents.
227 uint8_t section_id = data.getU8(c);
228 uint64_t payload_len = data.getULEB128(c);
229 if (!c)
230 return !llvm::errorToBool(c.takeError());
231
232 if (payload_len > std::numeric_limits<uint32_t>::max())
233 return false;
234
235 if (section_id == llvm::wasm::WASM_SEC_CUSTOM) {
236 // Custom sections have the id 0. Their contents consist of a name
237 // identifying the custom section, followed by an uninterpreted sequence
238 // of bytes.
239 lldb::offset_t prev_offset = c.tell();
240 llvm::Expected<std::string> sect_name = GetWasmString(data, c);
241 if (!sect_name) {
242 LLDB_LOG_ERROR(GetLog(LLDBLog::Object), sect_name.takeError(),
243 "failed to parse section name: {0}");
244 return false;
245 }
246
247 if (payload_len < c.tell() - prev_offset)
248 return false;
249
250 uint32_t section_length = payload_len - (c.tell() - prev_offset);
251 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length,
252 section_id, ConstString(*sect_name)});
253 *offset_ptr += (c.tell() + section_length);
254 } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) {
255 m_sect_infos.push_back(section_info{*offset_ptr + c.tell(),
256 static_cast<uint32_t>(payload_len),
257 section_id, ConstString()});
258 *offset_ptr += (c.tell() + payload_len);
259 } else {
260 // Invalid section id.
261 return false;
262 }
263 return true;
264}
265
268 if (IsInMemory()) {
269 offset += m_memory_addr;
270 }
271
272 while (DecodeNextSection(&offset))
273 ;
274 return true;
275}
276
278 const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset,
279 offset_t file_offset, offset_t length, ModuleSpecList &specs) {
280 if (!ValidateModuleHeader(data_sp)) {
281 return 0;
282 }
283
284 ModuleSpec spec(file, ArchSpec("wasm32-unknown-unknown-wasm"));
285 specs.Append(spec);
286 return 1;
287}
288
290 DataExtractorSP extractor_sp,
291 offset_t data_offset, const FileSpec *file,
292 offset_t offset, offset_t length)
293 : ObjectFile(module_sp, file, offset, length, extractor_sp, data_offset),
294 m_arch("wasm32-unknown-unknown-wasm") {
295 m_data_nsp->SetAddressByteSize(4);
296}
297
299 lldb::WritableDataBufferSP header_data_sp,
300 const lldb::ProcessSP &process_sp,
301 lldb::addr_t header_addr)
302 : ObjectFile(module_sp, process_sp, header_addr,
303 std::make_shared<DataExtractor>(header_data_sp)),
304 m_arch("wasm32-unknown-unknown-wasm") {}
305
307 // We already parsed the header during initialization.
308 return true;
309}
310
315
316static llvm::Expected<uint32_t> ParseImports(DataExtractor &import_data) {
317 // Currently this function just returns the number of imported functions.
318 // If we want to do anything with global names in the future, we'll also
319 // need to know those.
320 llvm::DataExtractor data = import_data.GetAsLLVM();
321 llvm::DataExtractor::Cursor c(0);
322
323 llvm::Expected<uint32_t> count = GetULEB32(data, c);
324 if (!count)
325 return count.takeError();
326
327 uint32_t function_imports = 0;
328 for (uint32_t i = 0; c && i < *count; ++i) {
329 // We don't need module and field names, so we can just get them as raw
330 // strings and discard.
331 if (!GetWasmString(data, c))
332 return llvm::createStringError("failed to parse module name");
333 if (!GetWasmString(data, c))
334 return llvm::createStringError("failed to parse field name");
335
336 uint8_t kind = data.getU8(c);
337 if (kind == llvm::wasm::WASM_EXTERNAL_FUNCTION)
338 function_imports++;
339
340 // For function imports, this is a type index. For others it's different.
341 // We don't need it, just need to parse it to advance the cursor.
342 data.getULEB128(c);
343 }
344
345 if (!c)
346 return c.takeError();
347
348 return function_imports;
349}
350
351static llvm::Expected<std::vector<WasmFunction>>
353 lldb::offset_t offset = 0;
354
355 llvm::Expected<uint32_t> function_count = GetULEB32(data, offset);
356 if (!function_count)
357 return function_count.takeError();
358
359 std::vector<WasmFunction> functions;
360 functions.reserve(*function_count);
361
362 for (uint32_t i = 0; i < *function_count; ++i) {
363 llvm::Expected<uint32_t> function_size = GetULEB32(data, offset);
364 if (!function_size)
365 return function_size.takeError();
366 // llvm-objdump considers the ULEB with the function size to be part of the
367 // function. We can't do that here because that would break symbolic
368 // breakpoints, as that address is never executed.
369 functions.push_back({offset, *function_size});
370
371 std::optional<lldb::offset_t> next_offset =
372 llvm::checkedAddUnsigned<lldb::offset_t>(offset, *function_size);
373 if (!next_offset)
374 return llvm::createStringError("function offset overflows 64 bits");
375 offset = *next_offset;
376 }
377
378 return functions;
379}
380
396
397static llvm::Expected<std::vector<WasmSegment>> ParseData(DataExtractor &data) {
398 lldb::offset_t offset = 0;
399
400 llvm::Expected<uint32_t> segment_count = GetULEB32(data, offset);
401 if (!segment_count)
402 return segment_count.takeError();
403
404 std::vector<WasmSegment> segments;
405 segments.reserve(*segment_count);
406
407 for (uint32_t i = 0; i < *segment_count; ++i) {
408 llvm::Expected<uint32_t> flags = GetULEB32(data, offset);
409 if (!flags)
410 return flags.takeError();
411
412 WasmSegment segment;
413
414 // Data segments have a mode that identifies them as either passive or
415 // active. An active data segment copies its contents into a memory during
416 // instantiation, as specified by a memory index and a constant expression
417 // defining an offset into that memory.
418 segment.type = (*flags & llvm::wasm::WASM_DATA_SEGMENT_IS_PASSIVE)
421
422 if (*flags & llvm::wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX) {
423 assert(segment.type == WasmSegment::Active);
424 llvm::Expected<uint32_t> memidx = GetULEB32(data, offset);
425 if (!memidx)
426 return memidx.takeError();
427 segment.memory_index = *memidx;
428 }
429
430 if (segment.type == WasmSegment::Active)
431 segment.init_expr_offset = GetWasmOffsetFromInitExpr(data, offset);
432
433 llvm::Expected<uint32_t> segment_size = GetULEB32(data, offset);
434 if (!segment_size)
435 return segment_size.takeError();
436
437 segment.section_offset = offset;
438 segment.size = *segment_size;
439 segments.push_back(segment);
440
441 std::optional<lldb::offset_t> next_offset =
442 llvm::checkedAddUnsigned<lldb::offset_t>(offset, *segment_size);
443 if (!next_offset)
444 return llvm::createStringError("segment offset overflows 64 bits");
445 offset = *next_offset;
446 }
447
448 return segments;
449}
450
451static llvm::Expected<std::vector<Symbol>>
452ParseNames(SectionSP code_section_sp, DataExtractor &name_data,
453 const std::vector<WasmFunction> &functions,
454 std::vector<WasmSegment> &segments,
455 uint32_t num_imported_functions) {
456
457 llvm::DataExtractor data = name_data.GetAsLLVM();
458 llvm::DataExtractor::Cursor c(0);
459 std::vector<Symbol> symbols;
460 while (c && c.tell() < data.size()) {
461 const uint8_t type = data.getU8(c);
462 llvm::Expected<uint32_t> size = GetULEB32(data, c);
463 if (!size)
464 return size.takeError();
465
466 switch (type) {
467 case llvm::wasm::WASM_NAMES_FUNCTION: {
468 const uint64_t count = data.getULEB128(c);
469 if (count > std::numeric_limits<uint32_t>::max())
470 return llvm::createStringError("function count overflows uint32_t");
471
472 for (uint64_t i = 0; c && i < count; ++i) {
473 llvm::Expected<uint32_t> idx = GetULEB32(data, c);
474 if (!idx)
475 return idx.takeError();
476 llvm::Expected<std::string> name = GetWasmString(data, c);
477 if (!name)
478 return name.takeError();
479 if (*idx >= num_imported_functions + functions.size())
480 continue;
481
482 if (*idx < num_imported_functions) {
483 symbols.emplace_back(symbols.size(), *name, lldb::eSymbolTypeCode,
484 /*external=*/true, /*is_debug=*/false,
485 /*is_trampoline=*/false,
486 /*is_artificial=*/false,
487 /*section_sp=*/lldb::SectionSP(),
488 /*value=*/0, /*size=*/0,
489 /*size_is_valid=*/false,
490 /*contains_linker_annotations=*/false,
491 /*flags=*/0);
492 } else {
493 const WasmFunction &func = functions[*idx - num_imported_functions];
494 symbols.emplace_back(symbols.size(), *name, lldb::eSymbolTypeCode,
495 /*external=*/false, /*is_debug=*/false,
496 /*is_trampoline=*/false, /*is_artificial=*/false,
497 code_section_sp, func.section_offset, func.size,
498 /*size_is_valid=*/true,
499 /*contains_linker_annotations=*/false,
500 /*flags=*/0);
501 }
502 }
503 } break;
504 case llvm::wasm::WASM_NAMES_DATA_SEGMENT: {
505 llvm::Expected<uint32_t> count = GetULEB32(data, c);
506 if (!count)
507 return count.takeError();
508 for (uint32_t i = 0; c && i < *count; ++i) {
509 llvm::Expected<uint32_t> idx = GetULEB32(data, c);
510 if (!idx)
511 return idx.takeError();
512 llvm::Expected<std::string> name = GetWasmString(data, c);
513 if (!name)
514 return name.takeError();
515 if (*idx >= segments.size())
516 continue;
517 // Update the segment name.
518 segments[i].name = *name;
519 }
520
521 } break;
522 case llvm::wasm::WASM_NAMES_GLOBAL:
523 case llvm::wasm::WASM_NAMES_LOCAL:
524 default:
525 std::optional<lldb::offset_t> offset =
526 llvm::checkedAddUnsigned<lldb::offset_t>(c.tell(), *size);
527 if (!offset)
528 return llvm::createStringError("offset overflows 64 bits");
529 c.seek(*offset);
530 }
531 }
532
533 if (!c)
534 return c.takeError();
535
536 return symbols;
537}
538
540 for (const Symbol &symbol : m_symbols)
541 symtab.AddSymbol(symbol);
542
543 symtab.Finalize();
544 m_symbols.clear();
545}
546
547static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
548 if (Name == "name")
550 if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_"))
552 return eSectionTypeOther;
553}
554
555std::optional<ObjectFileWasm::section_info>
556ObjectFileWasm::GetSectionInfo(uint32_t section_id) {
557 for (const section_info &sect_info : m_sect_infos) {
558 if (sect_info.id == section_id)
559 return sect_info;
560 }
561 return std::nullopt;
562}
563
564std::optional<ObjectFileWasm::section_info>
565ObjectFileWasm::GetSectionInfo(llvm::StringRef section_name) {
566 for (const section_info &sect_info : m_sect_infos) {
567 if (sect_info.name == section_name)
568 return sect_info;
569 }
570 return std::nullopt;
571}
572
573void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
574 Log *log = GetLog(LLDBLog::Object);
575
576 if (m_sections_up)
577 return;
578
579 m_sections_up = std::make_unique<SectionList>();
580
581 if (m_sect_infos.empty()) {
583 }
584
585 for (const section_info &sect_info : m_sect_infos) {
586 SectionType section_type = eSectionTypeOther;
587 ConstString section_name;
588 offset_t file_offset = sect_info.offset & 0xffffffff;
589 addr_t vm_addr = sect_info.offset;
590 size_t vm_size = sect_info.size;
591
592 if (llvm::wasm::WASM_SEC_CODE == sect_info.id) {
593 section_type = eSectionTypeCode;
594 section_name = ConstString("code");
595
596 // A code address in DWARF for WebAssembly is the offset of an
597 // instruction relative within the Code section of the WebAssembly file.
598 // For this reason Section::GetFileAddress() must return zero for the
599 // Code section.
600 vm_addr = 0;
601 } else {
602 section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
603 if (section_type == eSectionTypeOther)
604 continue;
605 section_name = sect_info.name;
606 if (!IsInMemory()) {
607 vm_size = 0;
608 vm_addr = 0;
609 }
610 }
611
612 SectionSP section_sp = std::make_shared<Section>(
613 GetModule(), // Module to which this section belongs.
614 this, // ObjectFile to which this section belongs and
615 // should read section data from.
616 section_type, // Section ID.
617 section_name, // Section name.
618 section_type, // Section type.
619 vm_addr, // VM address.
620 vm_size, // VM size in bytes of this section.
621 file_offset, // Offset of this section in the file.
622 sect_info.size, // Size of the section as found in the file.
623 0, // Alignment of the section
624 0, // Flags for this section.
625 1); // Number of host bytes per target byte
626 m_sections_up->AddSection(section_sp);
627 unified_section_list.AddSection(section_sp);
628 }
629
630 // The name section contains names and indexes. First parse the data from the
631 // relevant sections so we can access it by its index.
632 std::vector<WasmFunction> functions;
633 std::vector<WasmSegment> segments;
634
635 // Parse the code section.
636 if (std::optional<section_info> info =
637 GetSectionInfo(llvm::wasm::WASM_SEC_CODE)) {
638 DataExtractor code_data = ReadImageData(info->offset, info->size);
639 llvm::Expected<std::vector<WasmFunction>> maybe_functions =
640 ParseFunctions(code_data);
641 if (!maybe_functions) {
642 LLDB_LOG_ERROR(log, maybe_functions.takeError(),
643 "Failed to parse Wasm code section: {0}");
644 } else {
645 functions = *maybe_functions;
646 }
647 }
648
649 // Parse the import section. The number of functions is needed because the
650 // function index space used in the name section includes imports.
651 if (std::optional<section_info> info =
652 GetSectionInfo(llvm::wasm::WASM_SEC_IMPORT)) {
653 DataExtractor import_data = ReadImageData(info->offset, info->size);
654 llvm::Expected<uint32_t> num_imports = ParseImports(import_data);
655 if (!num_imports) {
656 LLDB_LOG_ERROR(log, num_imports.takeError(),
657 "Failed to parse Wasm import section: {0}");
658 } else {
659 m_num_imported_functions = *num_imports;
660 }
661 }
662
663 // Parse the data section.
664 std::optional<section_info> data_info =
665 GetSectionInfo(llvm::wasm::WASM_SEC_DATA);
666 if (data_info) {
667 DataExtractor data_data = ReadImageData(data_info->offset, data_info->size);
668 llvm::Expected<std::vector<WasmSegment>> maybe_segments =
669 ParseData(data_data);
670 if (!maybe_segments) {
671 LLDB_LOG_ERROR(log, maybe_segments.takeError(),
672 "Failed to parse Wasm data section: {0}");
673 } else {
674 segments = *maybe_segments;
675 }
676 }
677
678 if (std::optional<section_info> info = GetSectionInfo("name")) {
679 DataExtractor names_data = ReadImageData(info->offset, info->size);
680 llvm::Expected<std::vector<Symbol>> symbols = ParseNames(
681 m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false),
682 names_data, functions, segments, m_num_imported_functions);
683 if (!symbols) {
684 LLDB_LOG_ERROR(log, symbols.takeError(),
685 "Failed to parse Wasm names: {0}");
686 } else {
687 m_symbols = *symbols;
688 }
689 }
690
691 lldb::user_id_t segment_id = 0;
692 for (const WasmSegment &segment : segments) {
693 if (segment.type == WasmSegment::Active) {
694 // FIXME: Support segments with a memory index.
695 if (segment.memory_index != 0) {
696 LLDB_LOG(log, "Skipping segment {0}: non-zero memory index is "
697 "currently unsupported");
698 continue;
699 }
700
701 if (segment.init_expr_offset == LLDB_INVALID_OFFSET) {
702 LLDB_LOG(log, "Skipping segment {0}: unsupported init expression");
703 continue;
704 }
705 }
706
707 const lldb::addr_t file_vm_addr =
708 segment.type == WasmSegment::Active
709 ? segment.init_expr_offset
710 : data_info->offset + segment.section_offset;
711 const lldb::offset_t file_offset =
712 data_info->GetFileOffset() + segment.GetFileOffset();
713 SectionSP segment_sp = std::make_shared<Section>(
714 GetModule(),
715 /*obj_file=*/this,
716 ++segment_id << 8, // 1-based segment index, shifted by 8 bits to avoid
717 // collision with section IDs.
718 ConstString(segment.name), eSectionTypeData,
719 /*file_vm_addr=*/file_vm_addr,
720 /*vm_size=*/segment.size,
721 /*file_offset=*/file_offset,
722 /*file_size=*/segment.size,
723 /*log2align=*/0, /*flags=*/0);
724 m_sections_up->AddSection(segment_sp);
725 GetModule()->GetSectionList()->AddSection(segment_sp);
726 }
727}
728
730 bool value_is_offset) {
731 /// In WebAssembly, linear memory is disjointed from code space. The VM can
732 /// load multiple instances of a module, which logically share the same code.
733 /// We represent a wasm32 code address with 64-bits, like:
734 /// 63 32 31 0
735 /// +---------------+---------------+
736 /// + module_id | offset |
737 /// +---------------+---------------+
738 /// where the lower 32 bits represent a module offset (relative to the module
739 /// start not to the beginning of the code section) and the higher 32 bits
740 /// uniquely identify the module in the WebAssembly VM.
741 /// In other words, we assume that each WebAssembly module is loaded by the
742 /// engine at a 64-bit address that starts at the boundary of 4GB pages, like
743 /// 0x0000000400000000 for module_id == 4.
744 /// These 64-bit addresses will be used to request code ranges for a specific
745 /// module from the WebAssembly engine.
746
748 m_memory_addr == load_address);
749
750 ModuleSP module_sp = GetModule();
751 if (!module_sp)
752 return false;
753
755
756 size_t num_loaded_sections = 0;
757 SectionList *section_list = GetSectionList();
758 if (!section_list)
759 return false;
760
761 const size_t num_sections = section_list->GetSize();
762 for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) {
763 SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx));
764 if (target.SetSectionLoadAddress(
765 section_sp, load_address | section_sp->GetFileOffset())) {
766 ++num_loaded_sections;
767 }
768 }
769
770 return num_loaded_sections > 0;
771}
772
774 DataExtractor data;
775 if (m_file) {
776 if (offset < GetByteSize()) {
777 size = std::min(static_cast<uint64_t>(size), GetByteSize() - offset);
778 auto buffer_sp = MapFileData(m_file, size, offset);
779 return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize());
780 }
781 } else {
782 ProcessSP process_sp(m_process_wp.lock());
783 if (process_sp) {
784 auto data_up = std::make_unique<DataBufferHeap>(size, 0);
785 Status readmem_error;
786 size_t bytes_read = process_sp->ReadMemory(
787 offset, data_up->GetBytes(), data_up->GetByteSize(), readmem_error);
788 if (bytes_read > 0) {
789 DataBufferSP buffer_sp(data_up.release());
790 data.SetData(buffer_sp);
791 }
792 } else if (offset < m_data_nsp->GetByteSize()) {
793 size = std::min(static_cast<uint64_t>(size),
794 m_data_nsp->GetByteSize() - offset);
795 return DataExtractor(m_data_nsp->GetDataStart() + offset, size,
797 }
798 }
800 return data;
801}
802
804 static ConstString g_sect_name_external_debug_info("external_debug_info");
805
806 for (const section_info &sect_info : m_sect_infos) {
807 if (g_sect_name_external_debug_info == sect_info.name) {
808 const uint32_t kBufferSize = 1024;
809 DataExtractor section_header_data =
810 ReadImageData(sect_info.offset, kBufferSize);
811
812 llvm::DataExtractor data = section_header_data.GetAsLLVM();
813 llvm::DataExtractor::Cursor c(0);
814 llvm::Expected<std::string> symbols_url = GetWasmString(data, c);
815 if (!symbols_url) {
816 llvm::consumeError(symbols_url.takeError());
817 return std::nullopt;
818 }
819 return FileSpec(*symbols_url);
820 }
821 }
822 return std::nullopt;
823}
824
826 ModuleSP module_sp(GetModule());
827 if (!module_sp)
828 return;
829
830 std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
831
832 llvm::raw_ostream &ostream = s->AsRawOstream();
833 ostream << static_cast<void *>(this) << ": ";
834 s->Indent();
835 ostream << "ObjectFileWasm, file = '";
836 m_file.Dump(ostream);
837 ostream << "', arch = ";
838 ostream << GetArchitecture().GetArchitectureName() << "\n";
839
840 SectionList *sections = GetSectionList();
841 if (sections) {
842 sections->Dump(s->AsRawOstream(), s->GetIndentLevel(), nullptr, true,
843 UINT32_MAX);
844 }
845 ostream << "\n";
846 DumpSectionHeaders(ostream);
847 ostream << "\n";
848}
849
850void ObjectFileWasm::DumpSectionHeader(llvm::raw_ostream &ostream,
851 const section_info &sh) {
852 ostream << llvm::left_justify(sh.name.GetStringRef(), 16) << " "
853 << llvm::format_hex(sh.offset, 10) << " "
854 << llvm::format_hex(sh.size, 10) << " " << llvm::format_hex(sh.id, 6)
855 << "\n";
856}
857
858void ObjectFileWasm::DumpSectionHeaders(llvm::raw_ostream &ostream) {
859 ostream << "Section Headers\n";
860 ostream << "IDX name addr size id\n";
861 ostream << "==== ---------------- ---------- ---------- ------\n";
862
863 uint32_t idx = 0;
864 for (auto pos = m_sect_infos.begin(); pos != m_sect_infos.end();
865 ++pos, ++idx) {
866 ostream << "[" << llvm::format_decimal(idx, 2) << "] ";
868 }
869}
#define LLDB_LOG(log,...)
The LLDB_LOG* macros defined below are the way to emit log messages.
Definition Log.h:369
#define LLDB_LOGF(log,...)
Definition Log.h:376
#define LLDB_LOG_ERROR(log, error,...)
Definition Log.h:392
static SectionType GetSectionTypeFromName(llvm::StringRef Name)
static lldb::offset_t GetWasmOffsetFromInitExpr(DataExtractor &data, lldb::offset_t &offset)
An "init expr" refers to a constant expression used to determine the initial value of certain element...
static bool ValidateModuleHeader(const DataBufferSP &data_sp)
Checks whether the data buffer starts with a valid Wasm module header.
static llvm::Expected< std::string > GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c)
Helper to read a Wasm string, whcih is encoded as a vector of UTF-8 codes.
static llvm::Expected< uint32_t > GetULEB32(DataExtractor &data, lldb::offset_t &offset)
Helper to read a 32-bit ULEB using LLDB's DataExtractor.
static llvm::Expected< uint32_t > ParseImports(DataExtractor &import_data)
static llvm::Expected< std::vector< WasmFunction > > ParseFunctions(DataExtractor &data)
static llvm::Expected< std::vector< Symbol > > ParseNames(SectionSP code_section_sp, DataExtractor &name_data, const std::vector< WasmFunction > &functions, std::vector< WasmSegment > &segments, uint32_t num_imported_functions)
static const uint32_t kWasmHeaderSize
static llvm::Expected< std::vector< WasmSegment > > ParseData(DataExtractor &data)
#define LLDB_PLUGIN_DEFINE(PluginName)
An architecture specification class.
Definition ArchSpec.h:31
const char * GetArchitectureName() const
Returns a static string representing the current architecture.
Definition ArchSpec.cpp:548
A uniqued constant string class.
Definition ConstString.h:40
llvm::StringRef GetStringRef() const
Get the string value as a llvm::StringRef.
An data extractor class.
uint64_t GetULEB128(lldb::offset_t *offset_ptr) const
Extract a unsigned LEB128 value from *offset_ptr.
float GetFloat(lldb::offset_t *offset_ptr) const
Extract a float from *offset_ptr.
llvm::DataExtractor GetAsLLVM() const
void SetByteOrder(lldb::ByteOrder byte_order)
Set the byte_order value.
lldb::offset_t SetData(const void *bytes, lldb::offset_t length, lldb::ByteOrder byte_order)
Set data with a buffer that is caller owned.
int64_t GetSLEB128(lldb::offset_t *offset_ptr) const
Extract a signed LEB128 value from *offset_ptr.
uint8_t GetU8(lldb::offset_t *offset_ptr) const
Extract a uint8_t value from *offset_ptr.
A file utility class.
Definition FileSpec.h:57
size_t GetPath(char *path, size_t max_path_length, bool denormalize=true) const
Extract the full path to the file.
Definition FileSpec.cpp:374
lldb::ModuleSP GetModule() const
Get const accessor for the module pointer.
void Append(const ModuleSpec &spec)
Definition ModuleSpec.h:326
std::unique_ptr< lldb_private::SectionList > m_sections_up
Definition ObjectFile.h:799
static lldb::DataBufferSP MapFileData(const FileSpec &file, uint64_t Size, uint64_t Offset)
const lldb::addr_t m_memory_addr
Set if the object file only exists in memory.
Definition ObjectFile.h:798
static lldb::SectionType GetDWARFSectionTypeFromName(llvm::StringRef name)
Parses the section type from a section name for DWARF sections.
DataExtractorNSP m_data_nsp
The data for this object file so things can be parsed lazily.
Definition ObjectFile.h:792
virtual SectionList * GetSectionList(bool update_module_section_list=true)
Gets the section list for the currently selected architecture (and object for archives).
ObjectFile(const lldb::ModuleSP &module_sp, const FileSpec *file_spec_ptr, lldb::offset_t file_offset, lldb::offset_t length, lldb::DataExtractorSP extractor_sp, lldb::offset_t data_offset)
Construct with a parent module, offset, and header data.
bool IsInMemory() const
Returns true if the object file exists only in memory.
Definition ObjectFile.h:710
lldb::ProcessWP m_process_wp
Definition ObjectFile.h:796
virtual lldb::addr_t GetByteSize() const
Definition ObjectFile.h:275
static bool RegisterPlugin(llvm::StringRef name, llvm::StringRef description, ABICreateInstance create_callback)
static bool UnregisterPlugin(ABICreateInstance create_callback)
size_t GetSize() const
Definition Section.h:77
size_t AddSection(const lldb::SectionSP &section_sp)
Definition Section.cpp:488
void Dump(llvm::raw_ostream &s, unsigned indent, Target *target, bool show_header, uint32_t depth) const
Definition Section.cpp:650
lldb::SectionSP GetSectionAtIndex(size_t idx) const
Definition Section.cpp:557
An error handling class.
Definition Status.h:118
A stream class that can stream formatted output to a file.
Definition Stream.h:28
llvm::raw_ostream & AsRawOstream()
Returns a raw_ostream that forwards the data to this Stream object.
Definition Stream.h:406
size_t Indent(llvm::StringRef s="")
Indent the current line in the stream.
Definition Stream.cpp:157
unsigned GetIndentLevel() const
Get the current indentation level.
Definition Stream.cpp:187
uint32_t AddSymbol(const Symbol &symbol)
Definition Symtab.cpp:64
bool SetSectionLoadAddress(const lldb::SectionSP &section, lldb::addr_t load_addr, bool warn_multiple=false)
Definition Target.cpp:3334
Generic Wasm object file reader.
ArchSpec GetArchitecture() override
Get the ArchSpec for this object file.
std::optional< FileSpec > GetExternalDebugInfoFileSpec()
A Wasm module that has external DWARF debug information should contain a custom section named "extern...
bool SetLoadAddress(lldb_private::Target &target, lldb::addr_t value, bool value_is_offset) override
Sets the load address for an entire module, assuming a rigid slide of sections, if possible in the im...
bool DecodeNextSection(lldb::offset_t *offset_ptr)
Wasm section decoding routines.
lldb::ByteOrder GetByteOrder() const override
Gets whether endian swapping should occur when extracting data from this object file.
void CreateSections(SectionList &unified_section_list) override
ObjectFileWasm(const lldb::ModuleSP &module_sp, lldb::DataExtractorSP extractor_sp, lldb::offset_t data_offset, const FileSpec *file, lldb::offset_t offset, lldb::offset_t length)
std::optional< section_info > GetSectionInfo(uint32_t section_id)
void Dump(Stream *s) override
Dump a description of this object to a Stream.
static llvm::StringRef GetPluginNameStatic()
std::vector< section_info > m_sect_infos
static size_t GetModuleSpecifications(const FileSpec &file, lldb::DataBufferSP &data_sp, lldb::offset_t data_offset, lldb::offset_t file_offset, lldb::offset_t length, ModuleSpecList &specs)
void DumpSectionHeader(llvm::raw_ostream &ostream, const section_info &sh)
Wasm section header dump routines.
void DumpSectionHeaders(llvm::raw_ostream &ostream)
static ObjectFile * CreateInstance(const lldb::ModuleSP &module_sp, lldb::DataExtractorSP extractor_sp, lldb::offset_t data_offset, const FileSpec *file, lldb::offset_t file_offset, lldb::offset_t length)
static char ID
LLVM RTTI support.
void ParseSymtab(lldb_private::Symtab &symtab) override
Parse the symbol table into the provides symbol table object.
uint32_t GetAddressByteSize() const override
Gets the address size in bytes for the current object file.
static ObjectFile * CreateMemoryInstance(const lldb::ModuleSP &module_sp, lldb::WritableDataBufferSP data_sp, const lldb::ProcessSP &process_sp, lldb::addr_t header_addr)
bool ParseHeader() override
ObjectFile Protocol.
static const char * GetPluginDescriptionStatic()
DataExtractor ReadImageData(lldb::offset_t offset, uint32_t size)
Read a range of bytes from the Wasm module.
#define LLDB_INVALID_ADDRESS
#define LLDB_INVALID_OFFSET
#define UINT32_MAX
A class that represents a running process on the host machine.
Log * GetLog(Cat mask)
Retrieve the Log object for the channel associated with the given log enum.
Definition Log.h:332
uint64_t offset_t
Definition lldb-types.h:85
std::shared_ptr< lldb_private::Process > ProcessSP
uint64_t user_id_t
Definition lldb-types.h:82
std::shared_ptr< lldb_private::DataBuffer > DataBufferSP
std::shared_ptr< lldb_private::Section > SectionSP
std::shared_ptr< lldb_private::WritableDataBuffer > WritableDataBufferSP
uint64_t addr_t
Definition lldb-types.h:80
@ eSectionTypeWasmName
std::shared_ptr< lldb_private::DataExtractor > DataExtractorSP
std::shared_ptr< lldb_private::Module > ModuleSP
lldb::offset_t section_offset
SegmentType type
std::string name
uint32_t memory_index
lldb::offset_t section_offset
lldb::offset_t GetFileOffset() const
lldb::offset_t init_expr_offset