| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // -*- mode: C++ -*- |
| // |
| // Copyright 2022 Google LLC |
| // |
| // Licensed under the Apache License v2.0 with LLVM Exceptions (the |
| // "License"); you may not use this file except in compliance with the |
| // License. You may obtain a copy of the License at |
| // |
| // https://llvm.org/LICENSE.txt |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| // Author: Aleksei Vetrov |
| |
| #include "dwarf_wrappers.h" |
| |
| #include <dwarf.h> |
| #include <elfutils/libdw.h> |
| |
| #include <cstddef> |
| #include <cstdint> |
| #include <optional> |
| #include <ostream> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "error.h" |
| #include "hex.h" |
| |
| namespace stg { |
| namespace dwarf { |
| |
| std::ostream& operator<<(std::ostream& os, const Address& address) { |
| switch (address.kind) { |
| case Address::Kind::ADDRESS: |
| return os << Hex(address.value); |
| case Address::Kind::TLS: |
| return os << "TLS:" << Hex(address.value); |
| } |
| } |
| |
| namespace { |
| |
| constexpr int kReturnOk = 0; |
| constexpr int kReturnNoEntry = 1; |
| |
| std::optional<Dwarf_Attribute> GetAttribute(Dwarf_Die* die, |
| uint32_t attribute) { |
| // Create an optional with default-initialized value already inside |
| std::optional<Dwarf_Attribute> result(std::in_place); |
| // "integrate" automatically resolves DW_AT_abstract_origin and |
| // DW_AT_specification references, fetching the attribute from the linked DIE. |
| // |
| // libdw has infinite loop protection, as it stops after 16 dereferences. |
| // TODO: don't use dwarf_attr_integrate by default |
| if (!dwarf_attr_integrate(die, attribute, &result.value())) { |
| result.reset(); |
| } |
| return result; |
| } |
| |
| // Get the attribute directly from DIE without following DW_AT_specification and |
| // DW_AT_abstract_origin references. |
| std::optional<Dwarf_Attribute> GetDirectAttribute(Dwarf_Die* die, |
| uint32_t attribute) { |
| // Create an optional with default-initialized value already inside |
| std::optional<Dwarf_Attribute> result(std::in_place); |
| if (!dwarf_attr(die, attribute, &result.value())) { |
| result.reset(); |
| } |
| return result; |
| } |
| |
| std::optional<uint64_t> MaybeGetUnsignedOperand(const Dwarf_Op& operand) { |
| switch (operand.atom) { |
| case DW_OP_addr: |
| case DW_OP_const1u: |
| case DW_OP_const2u: |
| case DW_OP_const4u: |
| case DW_OP_const8u: |
| case DW_OP_constu: |
| return operand.number; |
| case DW_OP_const1s: |
| case DW_OP_const2s: |
| case DW_OP_const4s: |
| case DW_OP_const8s: |
| case DW_OP_consts: |
| if (static_cast<int64_t>(operand.number) < 0) { |
| // Atom is not an unsigned constant |
| return std::nullopt; |
| } |
| return operand.number; |
| case DW_OP_lit0...DW_OP_lit31: |
| return operand.atom - DW_OP_lit0; |
| default: |
| return std::nullopt; |
| } |
| } |
| |
| struct Expression { |
| const Dwarf_Op& operator[](size_t i) const { |
| return atoms[i]; |
| } |
| |
| Dwarf_Op* atoms = nullptr; |
| size_t length = 0; |
| }; |
| |
| std::optional<Expression> MaybeGetExpression(Dwarf_Attribute& attribute) { |
| Expression result; |
| |
| Check(dwarf_getlocation(&attribute, &result.atoms, &result.length) == |
| kReturnOk) << "dwarf_getlocation returned error"; |
| // If no location attribute is present or has an empty location description, |
| // the variable is present in the source but not in the object code. |
| // So zero length expression is equivalent of no location attribute. |
| if (result.length == 0) { |
| return std::nullopt; |
| } |
| Check(result.atoms != nullptr) |
| << "dwarf_getlocation returned non-empty expression with NULL atoms"; |
| return result; |
| } |
| |
| } // namespace |
| |
| std::vector<CompilationUnit> GetCompilationUnits(Dwarf& dwarf) { |
| std::vector<CompilationUnit> result; |
| Dwarf_Off offset = 0; |
| while (true) { |
| Dwarf_Off next_offset; |
| size_t header_size = 0; |
| Dwarf_Half version = 0; |
| const int return_code = |
| dwarf_next_unit(&dwarf, offset, &next_offset, &header_size, &version, |
| nullptr, nullptr, nullptr, nullptr, nullptr); |
| Check(return_code == kReturnOk || return_code == kReturnNoEntry) |
| << "dwarf_next_unit returned error"; |
| if (return_code == kReturnNoEntry) { |
| break; |
| } |
| result.push_back({version, {}}); |
| Check(dwarf_offdie(&dwarf, offset + header_size, |
| &result.back().entry.die) != nullptr) |
| << "dwarf_offdie returned error"; |
| |
| offset = next_offset; |
| } |
| return result; |
| } |
| |
| std::vector<Entry> Entry::GetChildren() { |
| Entry child; |
| int return_code = dwarf_child(&die, &child.die); |
| Check(return_code == kReturnOk || return_code == kReturnNoEntry) |
| << "dwarf_child returned error"; |
| std::vector<Entry> result; |
| while (return_code == kReturnOk) { |
| result.push_back(child); |
| return_code = dwarf_siblingof(&child.die, &child.die); |
| Check(return_code == kReturnOk || return_code == kReturnNoEntry) |
| << "dwarf_siblingof returned error"; |
| } |
| return result; |
| } |
| |
| int Entry::GetTag() { |
| return dwarf_tag(&die); |
| } |
| |
| Dwarf_Off Entry::GetOffset() { |
| return dwarf_dieoffset(&die); |
| } |
| |
| std::optional<std::string> Entry::MaybeGetString(uint32_t attribute) { |
| std::optional<std::string> result; |
| auto dwarf_attribute = GetAttribute(&die, attribute); |
| if (!dwarf_attribute) { |
| return result; |
| } |
| |
| const char* value = dwarf_formstring(&dwarf_attribute.value()); |
| Check(value != nullptr) << "dwarf_formstring returned error"; |
| result.emplace(value); |
| return result; |
| } |
| |
| std::optional<std::string> Entry::MaybeGetDirectString(uint32_t attribute) { |
| std::optional<std::string> result; |
| auto dwarf_attribute = GetDirectAttribute(&die, attribute); |
| if (!dwarf_attribute) { |
| return result; |
| } |
| |
| const char* value = dwarf_formstring(&dwarf_attribute.value()); |
| Check(value != nullptr) << "dwarf_formstring returned error"; |
| result.emplace(value); |
| return result; |
| } |
| |
| std::optional<uint64_t> Entry::MaybeGetUnsignedConstant(uint32_t attribute) { |
| auto dwarf_attribute = GetAttribute(&die, attribute); |
| if (!dwarf_attribute) { |
| return {}; |
| } |
| |
| uint64_t value; |
| if (dwarf_formudata(&dwarf_attribute.value(), &value) != kReturnOk) { |
| Die() << "dwarf_formudata returned error"; |
| } |
| return value; |
| } |
| |
| uint64_t Entry::MustGetUnsignedConstant(uint32_t attribute) { |
| auto maybe_constant = MaybeGetUnsignedConstant(attribute); |
| if (!maybe_constant) { |
| Die() << "DWARF entry <" << Hex(GetOffset()) << "> with tag " << GetTag() |
| << " is missing attribute " << Hex(attribute); |
| } |
| return maybe_constant.value(); |
| } |
| |
| bool Entry::GetFlag(uint32_t attribute) { |
| bool result = false; |
| auto dwarf_attribute = (attribute == DW_AT_declaration) |
| ? GetDirectAttribute(&die, attribute) |
| : GetAttribute(&die, attribute); |
| if (!dwarf_attribute) { |
| return result; |
| } |
| |
| Check(dwarf_formflag(&dwarf_attribute.value(), &result) == kReturnOk) |
| << "dwarf_formflag returned error"; |
| return result; |
| } |
| |
| std::optional<Entry> Entry::MaybeGetReference(uint32_t attribute) { |
| std::optional<Entry> result; |
| auto dwarf_attribute = GetAttribute(&die, attribute); |
| if (!dwarf_attribute) { |
| return result; |
| } |
| |
| result.emplace(); |
| Check(dwarf_formref_die(&dwarf_attribute.value(), &result->die)) |
| << "dwarf_formref_die returned error"; |
| return result; |
| } |
| |
| namespace { |
| |
| std::optional<Address> GetAddressFromLocation(Dwarf_Attribute& attribute) { |
| const auto expression_opt = MaybeGetExpression(attribute); |
| if (!expression_opt) { |
| return {}; |
| } |
| const Expression& expression = *expression_opt; |
| |
| Dwarf_Attribute result_attribute; |
| if (dwarf_getlocation_attr(&attribute, expression.atoms, &result_attribute) == |
| kReturnOk) { |
| uint64_t address; |
| Check(dwarf_formaddr(&result_attribute, &address) == kReturnOk) |
| << "dwarf_formaddr returned error"; |
| return Address{Address::Kind::ADDRESS, address}; |
| } |
| |
| if (expression.length == 1 && expression[0].atom == DW_OP_addr) { |
| // DW_OP_addr is unsupported by dwarf_getlocation_attr, so we need to |
| // manually extract the address from expression. |
| return Address{Address::Kind::ADDRESS, expression[0].number}; |
| } |
| if (expression.length == 2 && expression[0].atom == DW_OP_addr && |
| expression[1].atom == DW_OP_plus_uconst) { |
| // A rather odd case seen from Clang. |
| return Address{Address::Kind::ADDRESS, |
| expression[0].number + expression[1].number}; |
| } |
| |
| // TLS operation has different encodings in Clang and GCC: |
| // * Clang 14 uses DW_OP_GNU_push_tls_address |
| // * GCC 12 uses DW_OP_form_tls_address |
| if (expression.length == 2 && |
| (expression[1].atom == DW_OP_GNU_push_tls_address || |
| expression[1].atom == DW_OP_form_tls_address)) { |
| // TLS symbols address may be incorrect because of unsupported |
| // relocations. Resetting it to zero the same way as it is done in |
| // elf::Reader::MaybeAddTypeInfo. |
| // TODO: match TLS variables by address |
| return Address{Address::Kind::TLS, 0}; |
| } |
| |
| Die() << "Unsupported data location expression"; |
| } |
| |
| } // namespace |
| |
| std::optional<Address> Entry::MaybeGetAddress(uint32_t attribute) { |
| auto dwarf_attribute = GetAttribute(&die, attribute); |
| if (!dwarf_attribute) { |
| return {}; |
| } |
| if (attribute == DW_AT_location) { |
| return GetAddressFromLocation(*dwarf_attribute); |
| } |
| |
| uint64_t address; |
| Check(dwarf_formaddr(&dwarf_attribute.value(), &address) == kReturnOk) |
| << "dwarf_formaddr returned error"; |
| return Address{Address::Kind::ADDRESS, address}; |
| } |
| |
| std::optional<uint64_t> Entry::MaybeGetMemberByteOffset() { |
| auto attribute = GetAttribute(&die, DW_AT_data_member_location); |
| if (!attribute) { |
| return {}; |
| } |
| |
| uint64_t offset; |
| // Try to interpret attribute as an unsigned integer constant |
| if (dwarf_formudata(&attribute.value(), &offset) == kReturnOk) { |
| return offset; |
| } |
| |
| // Parse location expression |
| const auto expression_opt = MaybeGetExpression(attribute.value()); |
| if (!expression_opt) { |
| return {}; |
| } |
| const Expression& expression = *expression_opt; |
| |
| // Parse virtual base classes offset, which looks like this: |
| // [0] = DW_OP_dup |
| // [1] = DW_OP_deref |
| // [2] = constant operand |
| // [3] = DW_OP_minus |
| // [4] = DW_OP_deref |
| // [5] = DW_OP_plus |
| // This form is not in the standard, but hardcoded in compilers: |
| // * https://github.com/llvm/llvm-project/blob/release/17.x/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp#L1611 |
| // * https://github.com/gcc-mirror/gcc/blob/releases/gcc-13/gcc/dwarf2out.cc#L20029 |
| if (expression.length == 6 && |
| expression[0].atom == DW_OP_dup && |
| expression[1].atom == DW_OP_deref && |
| expression[3].atom == DW_OP_minus && |
| expression[4].atom == DW_OP_deref && |
| expression[5].atom == DW_OP_plus) { |
| const auto byte_offset = MaybeGetUnsignedOperand(expression[2]); |
| if (byte_offset) { |
| return byte_offset; |
| } |
| } |
| |
| Die() << "Unsupported member offset expression, " << Hex(GetOffset()); |
| } |
| |
| std::optional<uint64_t> Entry::MaybeGetVtableOffset() { |
| auto attribute = GetAttribute(&die, DW_AT_vtable_elem_location); |
| if (!attribute) { |
| return {}; |
| } |
| |
| // Parse location expression |
| const auto expression_opt = MaybeGetExpression(attribute.value()); |
| if (!expression_opt) { |
| return {}; |
| } |
| const Expression& expression = *expression_opt; |
| |
| // We expect compilers to produce expression with one constant operand |
| if (expression.length == 1) { |
| const auto offset = MaybeGetUnsignedOperand(expression[0]); |
| if (offset) { |
| return offset; |
| } |
| } |
| |
| Die() << "Unsupported vtable offset expression, " << Hex(GetOffset()); |
| } |
| |
| std::optional<uint64_t> Entry::MaybeGetCount() { |
| auto lower_bound_attribute = MaybeGetUnsignedConstant(DW_AT_lower_bound); |
| if (lower_bound_attribute && *lower_bound_attribute != 0) { |
| Die() << "Non-zero DW_AT_lower_bound is not supported"; |
| } |
| auto upper_bound_attribute = GetAttribute(&die, DW_AT_upper_bound); |
| auto count_attribute = GetAttribute(&die, DW_AT_count); |
| if (!upper_bound_attribute && !count_attribute) { |
| return {}; |
| } |
| if (upper_bound_attribute && count_attribute) { |
| Die() << "Both DW_AT_upper_bound and DW_AT_count given"; |
| } |
| Dwarf_Attribute dwarf_attribute; |
| uint64_t addend; |
| if (upper_bound_attribute) { |
| dwarf_attribute = *upper_bound_attribute; |
| addend = 1; |
| } else { |
| dwarf_attribute = *count_attribute; |
| addend = 0; |
| } |
| |
| uint64_t value; |
| if (dwarf_formudata(&dwarf_attribute, &value) == kReturnOk) { |
| return value + addend; |
| } |
| |
| // Don't fail if attribute is not a constant and treat this as no count |
| // provided. This can happen if array has variable length. |
| // TODO: implement clean solution for separating "not a |
| // constant" errors from other errors. |
| return {}; |
| } |
| |
| Files::Files(Entry& compilation_unit) { |
| if (dwarf_getsrcfiles(&compilation_unit.die, &files_, &files_count_) != |
| kReturnOk) { |
| Die() << "No source file information in DWARF"; |
| } |
| } |
| |
| std::optional<std::string> Files::MaybeGetFile(Entry& entry, |
| uint32_t attribute) const { |
| auto file_index = entry.MaybeGetUnsignedConstant(attribute); |
| if (!file_index) { |
| return std::nullopt; |
| } |
| Check(files_ != nullptr) << "dwarf::Files was not initialised"; |
| if (*file_index >= files_count_) { |
| Die() << "File index is greater than or equal files count (" << *file_index |
| << " >= " << files_count_ << ")"; |
| } |
| const char* result = dwarf_filesrc(files_, *file_index, nullptr, nullptr); |
| Check(result != nullptr) << "dwarf_filesrc returned error"; |
| return result; |
| } |
| |
| } // namespace dwarf |
| } // namespace stg |