blob: 77497b3112a6091f7e30cd3905a21e5e490c2d40 [file] [log] [blame]
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// -*- mode: C++ -*-
//
// Copyright 2022-2023 Google LLC
//
// Licensed under the Apache License v2.0 with LLVM Exceptions (the
// "License"); you may not use this file except in compliance with the
// License. You may obtain a copy of the License at
//
// https://llvm.org/LICENSE.txt
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: Aleksei Vetrov
#include "dwarf_processor.h"
#include <dwarf.h>
#include <ios>
#include <iostream>
#include <optional>
#include <sstream>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "dwarf_wrappers.h"
#include "error.h"
#include "graph.h"
namespace stg {
namespace dwarf {
namespace {
std::string EntryToString(Entry& entry) {
std::ostringstream os;
os << "DWARF entry <0x" << std::hex << entry.GetOffset() << ">";
return os.str();
}
std::optional<std::string> MaybeGetName(Entry& entry) {
return entry.MaybeGetString(DW_AT_name);
}
std::string GetName(Entry& entry) {
auto result = MaybeGetName(entry);
if (!result.has_value()) {
Die() << "Name was not found for " << EntryToString(entry);
}
return std::move(*result);
}
std::string GetNameOrEmpty(Entry& entry) {
auto result = MaybeGetName(entry);
if (!result.has_value()) {
return std::string();
}
return std::move(*result);
}
size_t GetBitSize(Entry& entry) {
if (auto byte_size = entry.MaybeGetUnsignedConstant(DW_AT_byte_size)) {
return *byte_size * 8;
} else if (auto bit_size = entry.MaybeGetUnsignedConstant(DW_AT_bit_size)) {
return *bit_size;
}
Die() << "Bit size was not found for " << EntryToString(entry);
}
size_t GetByteSize(Entry& entry) {
if (auto byte_size = entry.MaybeGetUnsignedConstant(DW_AT_byte_size)) {
return *byte_size;
} else if (auto bit_size = entry.MaybeGetUnsignedConstant(DW_AT_bit_size)) {
// Round up bit_size / 8 to get minimal needed storage size in bytes.
return (*bit_size + 7) / 8;
}
Die() << "Byte size was not found for " << EntryToString(entry);
}
Primitive::Encoding GetEncoding(Entry& entry) {
auto dwarf_encoding = entry.MaybeGetUnsignedConstant(DW_AT_encoding);
if (!dwarf_encoding) {
Die() << "Encoding was not found for " << EntryToString(entry);
}
switch (*dwarf_encoding) {
case DW_ATE_boolean:
return Primitive::Encoding::BOOLEAN;
case DW_ATE_complex_float:
return Primitive::Encoding::COMPLEX_NUMBER;
case DW_ATE_float:
return Primitive::Encoding::REAL_NUMBER;
case DW_ATE_signed:
return Primitive::Encoding::SIGNED_INTEGER;
case DW_ATE_signed_char:
return Primitive::Encoding::SIGNED_CHARACTER;
case DW_ATE_unsigned:
return Primitive::Encoding::UNSIGNED_INTEGER;
case DW_ATE_unsigned_char:
return Primitive::Encoding::UNSIGNED_CHARACTER;
case DW_ATE_UTF:
return Primitive::Encoding::UTF;
default:
Die() << "Unknown encoding 0x" << std::hex << *dwarf_encoding << " for "
<< EntryToString(entry);
}
}
std::optional<Entry> MaybeGetReferredType(Entry& entry) {
return entry.MaybeGetReference(DW_AT_type);
}
Entry GetReferredType(Entry& entry) {
auto result = MaybeGetReferredType(entry);
if (!result.has_value()) {
Die() << "Type reference was not found in " << EntryToString(entry);
}
return std::move(*result);
}
size_t GetNumberOfElements(Entry& entry) {
// DWARF standard says, that array dimensions could be an entry with
// either DW_TAG_subrange_type or DW_TAG_enumeration_type. However, this
// code supports only the DW_TAG_subrange_type.
Check(entry.GetTag() == DW_TAG_subrange_type)
<< "Array's dimensions should be an entry of DW_TAG_subrange_type";
std::optional<size_t> lower_bound_optional =
entry.MaybeGetUnsignedConstant(DW_AT_lower_bound);
Check(!lower_bound_optional.has_value() || *lower_bound_optional == 0)
<< "Non-zero DW_AT_lower_bound is not supported";
std::optional<size_t> upper_bound_optional =
entry.MaybeGetUnsignedConstant(DW_AT_upper_bound);
std::optional<size_t> number_of_elements_optional =
entry.MaybeGetUnsignedConstant(DW_AT_count);
if (upper_bound_optional && number_of_elements_optional) {
Die() << "Both DW_AT_upper_bound and DW_AT_count given";
} else if (upper_bound_optional) {
return *upper_bound_optional + 1;
} else if (number_of_elements_optional) {
return *number_of_elements_optional;
} else {
// If a subrange has no DW_AT_count and no DW_AT_upper_bound attribue, its
// size is unknown.
return 0;
}
}
} // namespace
// Transforms DWARF entries to STG.
class Processor {
public:
Processor(Graph& graph, Id void_id, Types& result)
: graph_(graph), void_id_(void_id), result_(result) {}
void Process(Entry& entry) {
++result_.processed_entries;
auto tag = entry.GetTag();
switch (tag) {
case DW_TAG_array_type:
ProcessArray(entry);
break;
case DW_TAG_class_type:
ProcessStructUnion(entry, StructUnion::Kind::CLASS);
break;
case DW_TAG_structure_type:
ProcessStructUnion(entry, StructUnion::Kind::STRUCT);
break;
case DW_TAG_union_type:
ProcessStructUnion(entry, StructUnion::Kind::UNION);
break;
case DW_TAG_member:
ProcessMember(entry);
break;
case DW_TAG_pointer_type:
ProcessReference<PointerReference>(
entry, PointerReference::Kind::POINTER);
break;
case DW_TAG_reference_type:
ProcessReference<PointerReference>(
entry, PointerReference::Kind::LVALUE_REFERENCE);
break;
case DW_TAG_rvalue_reference_type:
ProcessReference<PointerReference>(
entry, PointerReference::Kind::RVALUE_REFERENCE);
break;
case DW_TAG_compile_unit:
ProcessCompileUnit(entry);
break;
case DW_TAG_typedef:
ProcessTypedef(entry);
break;
case DW_TAG_base_type:
ProcessBaseType(entry);
break;
case DW_TAG_const_type:
ProcessReference<Qualified>(entry, Qualifier::CONST);
break;
case DW_TAG_volatile_type:
ProcessReference<Qualified>(entry, Qualifier::VOLATILE);
break;
case DW_TAG_restrict_type:
ProcessReference<Qualified>(entry, Qualifier::RESTRICT);
break;
default:
// TODO: die on unexpected tag, when this switch contains
// all expected tags
break;
}
}
std::vector<Id> GetUnresolvedIds() {
std::vector<Id> result;
for (const auto& [offset, id] : id_map_) {
if (!graph_.Is(id)) {
result.push_back(id);
}
}
return result;
}
private:
void ProcessAllChildren(Entry& entry) {
for (auto& child : entry.GetChildren()) {
Process(child);
}
}
void CheckNoChildren(Entry& entry) {
if (!entry.GetChildren().empty()) {
Die() << "Entry expected to have no children";
}
}
void ProcessCompileUnit(Entry& entry) {
ProcessAllChildren(entry);
}
void ProcessBaseType(Entry& entry) {
CheckNoChildren(entry);
auto type_name = GetName(entry);
size_t bit_size = GetBitSize(entry);
// Round up bit_size / 8 to get minimal needed storage size in bytes.
size_t byte_size = (bit_size + 7) / 8;
AddProcessedNode<Primitive>(entry, type_name, GetEncoding(entry), bit_size,
byte_size);
}
void ProcessTypedef(Entry& entry) {
std::string type_name = GetName(entry);
auto referred_type_id = GetIdForReferredType(MaybeGetReferredType(entry));
AddProcessedNode<Typedef>(entry, std::move(type_name), referred_type_id);
}
template<typename Node, typename KindType>
void ProcessReference(Entry& entry, KindType kind) {
auto referred_type_id = GetIdForReferredType(MaybeGetReferredType(entry));
AddProcessedNode<Node>(entry, kind, referred_type_id);
}
void ProcessStructUnion(Entry& entry, StructUnion::Kind kind) {
// TODO: add scoping
std::string name = GetNameOrEmpty(entry);
if (entry.GetFlag(DW_AT_declaration)) {
// It is expected to have only name and no children in declaration.
// However, it is not guaranteed and we should do something if we find an
// example.
CheckNoChildren(entry);
AddProcessedNode<StructUnion>(entry, kind, std::move(name));
return;
}
auto byte_size = GetByteSize(entry);
std::vector<Id> members;
for (auto& child : entry.GetChildren()) {
auto child_tag = child.GetTag();
// All possible children of struct/class/union
switch (child_tag) {
case DW_TAG_member:
members.push_back(GetIdForEntry(child));
break;
case DW_TAG_subprogram:
// TODO: process methods
break;
case DW_TAG_inheritance:
// TODO: process base classes
break;
case DW_TAG_structure_type:
case DW_TAG_class_type:
case DW_TAG_union_type:
case DW_TAG_enumeration_type:
case DW_TAG_typedef:
case DW_TAG_variable:
break;
default:
Die() << "Unexpected tag for child of struct/class/union: 0x"
<< std::hex << child_tag;
}
Process(child);
}
// TODO: support base classes
// TODO: support methods
AddProcessedNode<StructUnion>(entry, kind, std::move(name), byte_size,
/* base_classes = */ std::vector<Id>{},
/* methods = */ std::vector<Id>{},
std::move(members));
}
void ProcessMember(Entry& entry) {
std::string name = GetNameOrEmpty(entry);
auto referred_type = GetReferredType(entry);
auto referred_type_id = GetIdForEntry(referred_type);
// TODO: support offset and bitsize from DWARF
AddProcessedNode<Member>(entry, std::move(name), referred_type_id,
/* offset = */ 0,
/* bitsize = */ 0);
}
void ProcessArray(Entry& entry) {
auto referred_type = GetReferredType(entry);
auto referred_type_id = GetIdForEntry(referred_type);
auto children = entry.GetChildren();
// Multiple children in array describe multiple dimensions of this array.
// For example, int[M][N] contains two children, M located in the first
// child, N located in the second child. But in STG multidimensional arrays
// are represented as chain of arrays: int[M][N] is array[M] of array[N] of
// int.
//
// We need to chain children as types together in reversed order.
// "referred_type_id" is updated every time to contain the top element in
// the chain. Rightmost chldren refers to the original "referred_type_id".
for (auto it = children.rbegin(); it != children.rend(); ++it) {
auto& child = *it;
// All subarrays except the first (last in the reversed order) are
// attached to the corresponding child. First subarray (last in the
// reversed order) is attached to the original entry itself.
auto& entry_to_attach = (it + 1 == children.rend()) ? entry : child;
// Update referred_type_id so next array in chain points there.
referred_type_id = AddProcessedNode<Array>(
entry_to_attach, GetNumberOfElements(child), referred_type_id);
}
}
// Allocate or get already allocated STG Id for Entry.
Id GetIdForEntry(Entry& entry) {
const auto offset = entry.GetOffset();
const auto [it, emplaced] = id_map_.emplace(offset, Id(-1));
if (emplaced) {
it->second = graph_.Allocate();
}
return it->second;
}
// Same as GetIdForEntry, but returns "void_id_" for "unspecified" references,
// because it is normal for DWARF (5.2 Unspecified Type Entries).
Id GetIdForReferredType(std::optional<Entry> referred_type) {
return referred_type ? GetIdForEntry(*referred_type) : void_id_;
}
// Populate Id from method above with processed Node.
template <typename Node, typename... Args>
Id AddProcessedNode(Entry& entry, Args&&... args) {
auto id = GetIdForEntry(entry);
graph_.Set<Node>(id, std::forward<Args>(args)...);
result_.all_ids.push_back(id);
return id;
}
Graph& graph_;
Id void_id_;
Types& result_;
std::unordered_map<Dwarf_Off, Id> id_map_;
};
Types ProcessEntries(std::vector<Entry> entries, Graph& graph) {
Types result;
Id void_id = graph.Add<Void>();
Processor processor(graph, void_id, result);
for (auto& entry : entries) {
processor.Process(entry);
}
for (const auto& id : processor.GetUnresolvedIds()) {
// TODO: replace with "Die"
graph.Set<Variadic>(id);
}
return result;
}
Types Process(Handler& dwarf, Graph& graph) {
return ProcessEntries(dwarf.GetCompilationUnits(), graph);
}
} // namespace dwarf
} // namespace stg