blob: 1a3b89cbc78f4b0f39d048e57a787af78cc5b9b4 [file] [log] [blame]
/*
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dexanalyze_experiments.h"
#include <stdint.h>
#include <inttypes.h>
#include <iostream>
#include <map>
#include <vector>
#include "android-base/stringprintf.h"
#include "dex/class_accessor-inl.h"
#include "dex/class_iterator.h"
#include "dex/code_item_accessors-inl.h"
#include "dex/dex_instruction-inl.h"
#include "dex/standard_dex_file.h"
#include "dex/utf-inl.h"
namespace art {
static inline bool IsRange(Instruction::Code code) {
return code == Instruction::INVOKE_VIRTUAL_RANGE ||
code == Instruction::INVOKE_DIRECT_RANGE ||
code == Instruction::INVOKE_SUPER_RANGE ||
code == Instruction::INVOKE_STATIC_RANGE ||
code == Instruction::INVOKE_INTERFACE_RANGE;
}
static inline uint16_t NumberOfArgs(const Instruction& inst) {
return IsRange(inst.Opcode()) ? inst.VRegA_3rc() : inst.VRegA_35c();
}
static inline uint16_t DexMethodIndex(const Instruction& inst) {
return IsRange(inst.Opcode()) ? inst.VRegB_3rc() : inst.VRegB_35c();
}
std::string Percent(uint64_t value, uint64_t max) {
if (max == 0) {
return "0";
}
return android::base::StringPrintf(
"%" PRId64 "(%.2f%%)",
value,
static_cast<double>(value * 100) / static_cast<double>(max));
}
std::string PercentDivide(uint64_t value, uint64_t max) {
if (max == 0) {
return "0";
}
return android::base::StringPrintf(
"%" PRId64 "/%" PRId64 "(%.2f%%)",
value,
max,
static_cast<double>(value * 100) / static_cast<double>(max));
}
static size_t PrefixLen(const std::string& a, const std::string& b) {
size_t len = 0;
for (; len < a.length() && len < b.length() && a[len] == b[len]; ++len) {}
return len;
}
void AnalyzeDebugInfo::ProcessDexFile(const DexFile& dex_file) {
std::set<const uint8_t*> seen;
std::vector<size_t> counts(256, 0u);
std::vector<size_t> opcode_counts(256, 0u);
std::set<std::vector<uint8_t>> unique_non_header;
for (ClassAccessor accessor : dex_file.GetClasses()) {
for (const ClassAccessor::Method& method : accessor.GetMethods()) {
CodeItemDebugInfoAccessor code_item(dex_file, method.GetCodeItem(), method.GetIndex());
const uint8_t* debug_info = dex_file.GetDebugInfoStream(code_item.DebugInfoOffset());
if (debug_info != nullptr && seen.insert(debug_info).second) {
const uint8_t* stream = debug_info;
DecodeUnsignedLeb128(&stream); // line_start
uint32_t parameters_size = DecodeUnsignedLeb128(&stream);
for (uint32_t i = 0; i < parameters_size; ++i) {
DecodeUnsignedLeb128P1(&stream); // Parameter name.
}
bool done = false;
const uint8_t* after_header_start = stream;
while (!done) {
const uint8_t* const op_start = stream;
uint8_t opcode = *stream++;
++opcode_counts[opcode];
++total_opcode_bytes_;
switch (opcode) {
case DexFile::DBG_END_SEQUENCE:
++total_end_seq_bytes_;
done = true;
break;
case DexFile::DBG_ADVANCE_PC:
DecodeUnsignedLeb128(&stream); // addr_diff
total_advance_pc_bytes_ += stream - op_start;
break;
case DexFile::DBG_ADVANCE_LINE:
DecodeSignedLeb128(&stream); // line_diff
total_advance_line_bytes_ += stream - op_start;
break;
case DexFile::DBG_START_LOCAL:
DecodeUnsignedLeb128(&stream); // register_num
DecodeUnsignedLeb128P1(&stream); // name_idx
DecodeUnsignedLeb128P1(&stream); // type_idx
total_start_local_bytes_ += stream - op_start;
break;
case DexFile::DBG_START_LOCAL_EXTENDED:
DecodeUnsignedLeb128(&stream); // register_num
DecodeUnsignedLeb128P1(&stream); // name_idx
DecodeUnsignedLeb128P1(&stream); // type_idx
DecodeUnsignedLeb128P1(&stream); // sig_idx
total_start_local_extended_bytes_ += stream - op_start;
break;
case DexFile::DBG_END_LOCAL:
DecodeUnsignedLeb128(&stream); // register_num
total_end_local_bytes_ += stream - op_start;
break;
case DexFile::DBG_RESTART_LOCAL:
DecodeUnsignedLeb128(&stream); // register_num
total_restart_local_bytes_ += stream - op_start;
break;
case DexFile::DBG_SET_PROLOGUE_END:
case DexFile::DBG_SET_EPILOGUE_BEGIN:
total_epilogue_bytes_ += stream - op_start;
break;
case DexFile::DBG_SET_FILE: {
DecodeUnsignedLeb128P1(&stream); // name_idx
total_set_file_bytes_ += stream - op_start;
break;
}
default: {
total_other_bytes_ += stream - op_start;
break;
}
}
}
const size_t bytes = stream - debug_info;
total_bytes_ += bytes;
total_non_header_bytes_ += stream - after_header_start;
if (unique_non_header.insert(std::vector<uint8_t>(after_header_start, stream)).second) {
total_unique_non_header_bytes_ += stream - after_header_start;
}
for (size_t i = 0; i < bytes; ++i) {
++counts[debug_info[i]];
}
}
}
}
auto calc_entropy = [](std::vector<size_t> data) {
size_t total = std::accumulate(data.begin(), data.end(), 0u);
double avg_entropy = 0.0;
for (size_t c : data) {
if (c > 0) {
double ratio = static_cast<double>(c) / static_cast<double>(total);
avg_entropy -= ratio * log(ratio) / log(256.0);
}
}
return avg_entropy * total;
};
total_entropy_ += calc_entropy(counts);
total_opcode_entropy_ += calc_entropy(opcode_counts);
}
void AnalyzeDebugInfo::Dump(std::ostream& os, uint64_t total_size) const {
os << "Debug info bytes " << Percent(total_bytes_, total_size) << "\n";
os << " DBG_END_SEQUENCE: " << Percent(total_end_seq_bytes_, total_size) << "\n";
os << " DBG_ADVANCE_PC: " << Percent(total_advance_pc_bytes_, total_size) << "\n";
os << " DBG_ADVANCE_LINE: " << Percent(total_advance_line_bytes_, total_size) << "\n";
os << " DBG_START_LOCAL: " << Percent(total_start_local_bytes_, total_size) << "\n";
os << " DBG_START_LOCAL_EXTENDED: "
<< Percent(total_start_local_extended_bytes_, total_size) << "\n";
os << " DBG_END_LOCAL: " << Percent(total_end_local_bytes_, total_size) << "\n";
os << " DBG_RESTART_LOCAL: " << Percent(total_restart_local_bytes_, total_size) << "\n";
os << " DBG_SET_PROLOGUE bytes " << Percent(total_epilogue_bytes_, total_size) << "\n";
os << " DBG_SET_FILE bytes " << Percent(total_set_file_bytes_, total_size) << "\n";
os << " special: "
<< Percent(total_other_bytes_, total_size) << "\n";
os << "Debug info entropy " << Percent(total_entropy_, total_size) << "\n";
os << "Debug info opcode bytes " << Percent(total_opcode_bytes_, total_size) << "\n";
os << "Debug info opcode entropy " << Percent(total_opcode_entropy_, total_size) << "\n";
os << "Debug info non header bytes " << Percent(total_non_header_bytes_, total_size) << "\n";
os << "Debug info deduped non header bytes "
<< Percent(total_unique_non_header_bytes_, total_size) << "\n";
}
void AnalyzeStrings::ProcessDexFile(const DexFile& dex_file) {
std::vector<std::string> strings;
for (size_t i = 0; i < dex_file.NumStringIds(); ++i) {
uint32_t length = 0;
const char* data = dex_file.StringDataAndUtf16LengthByIdx(dex::StringIndex(i), &length);
// Analyze if the string has any UTF16 chars.
bool have_wide_char = false;
const char* ptr = data;
for (size_t j = 0; j < length; ++j) {
have_wide_char = have_wide_char || GetUtf16FromUtf8(&ptr) >= 0x100;
}
if (have_wide_char) {
wide_string_bytes_ += 2 * length;
} else {
ascii_string_bytes_ += length;
}
string_data_bytes_ += ptr - data;
strings.push_back(data);
}
// Note that the strings are probably already sorted.
std::sort(strings.begin(), strings.end());
// Tunable parameters.
static const size_t kMinPrefixLen = 3;
static const size_t kPrefixConstantCost = 5;
static const size_t kPrefixIndexCost = 2;
// Calculate total shared prefix.
std::vector<size_t> shared_len;
std::set<std::string> prefixes;
for (size_t i = 0; i < strings.size(); ++i) {
size_t best_len = 0;
if (i > 0) {
best_len = std::max(best_len, PrefixLen(strings[i], strings[i - 1]));
}
if (i < strings.size() - 1) {
best_len = std::max(best_len, PrefixLen(strings[i], strings[i + 1]));
}
std::string prefix;
if (best_len >= kMinPrefixLen) {
prefix = strings[i].substr(0, best_len);
prefixes.insert(prefix);
total_prefix_savings_ += prefix.length();
}
total_prefix_index_cost_ += kPrefixIndexCost;
}
total_num_prefixes_ += prefixes.size();
for (const std::string& s : prefixes) {
// 4 bytes for an offset, one for length.
total_prefix_dict_ += s.length();
total_prefix_table_ += kPrefixConstantCost;
}
}
void AnalyzeStrings::Dump(std::ostream& os, uint64_t total_size) const {
os << "Total string data bytes " << Percent(string_data_bytes_, total_size) << "\n";
os << "UTF-16 string data bytes " << Percent(wide_string_bytes_, total_size) << "\n";
os << "ASCII string data bytes " << Percent(ascii_string_bytes_, total_size) << "\n";
// Prefix based strings.
os << "Total shared prefix bytes " << Percent(total_prefix_savings_, total_size) << "\n";
os << "Prefix dictionary cost " << Percent(total_prefix_dict_, total_size) << "\n";
os << "Prefix table cost " << Percent(total_prefix_table_, total_size) << "\n";
os << "Prefix index cost " << Percent(total_prefix_index_cost_, total_size) << "\n";
int64_t net_savings = total_prefix_savings_;
net_savings -= total_prefix_dict_;
net_savings -= total_prefix_table_;
net_savings -= total_prefix_index_cost_;
os << "Prefix net savings " << Percent(net_savings, total_size) << "\n";
os << "Prefix dictionary elements " << total_num_prefixes_ << "\n";
}
void CountDexIndices::ProcessDexFile(const DexFile& dex_file) {
num_string_ids_ += dex_file.NumStringIds();
num_method_ids_ += dex_file.NumMethodIds();
num_field_ids_ += dex_file.NumFieldIds();
num_type_ids_ += dex_file.NumTypeIds();
num_class_defs_ += dex_file.NumClassDefs();
std::set<size_t> unique_code_items;
for (ClassAccessor accessor : dex_file.GetClasses()) {
std::set<size_t> unique_method_ids;
std::set<size_t> unique_string_ids;
for (const ClassAccessor::Method& method : accessor.GetMethods()) {
dex_code_bytes_ += method.GetInstructions().InsnsSizeInBytes();
unique_code_items.insert(method.GetCodeItemOffset());
for (const DexInstructionPcPair& inst : method.GetInstructions()) {
switch (inst->Opcode()) {
case Instruction::CONST_STRING: {
const dex::StringIndex string_index(inst->VRegB_21c());
unique_string_ids.insert(string_index.index_);
++num_string_ids_from_code_;
break;
}
case Instruction::CONST_STRING_JUMBO: {
const dex::StringIndex string_index(inst->VRegB_31c());
unique_string_ids.insert(string_index.index_);
++num_string_ids_from_code_;
break;
}
// Invoke cases.
case Instruction::INVOKE_VIRTUAL:
case Instruction::INVOKE_VIRTUAL_RANGE: {
uint32_t method_idx = DexMethodIndex(inst.Inst());
if (dex_file.GetMethodId(method_idx).class_idx_ == accessor.GetClassIdx()) {
++same_class_virtual_;
}
++total_virtual_;
unique_method_ids.insert(method_idx);
break;
}
case Instruction::INVOKE_DIRECT:
case Instruction::INVOKE_DIRECT_RANGE: {
uint32_t method_idx = DexMethodIndex(inst.Inst());
if (dex_file.GetMethodId(method_idx).class_idx_ == accessor.GetClassIdx()) {
++same_class_direct_;
}
++total_direct_;
unique_method_ids.insert(method_idx);
break;
}
case Instruction::INVOKE_STATIC:
case Instruction::INVOKE_STATIC_RANGE: {
uint32_t method_idx = DexMethodIndex(inst.Inst());
if (dex_file.GetMethodId(method_idx).class_idx_ == accessor.GetClassIdx()) {
++same_class_static_;
}
++total_static_;
unique_method_ids.insert(method_idx);
break;
}
case Instruction::INVOKE_INTERFACE:
case Instruction::INVOKE_INTERFACE_RANGE: {
uint32_t method_idx = DexMethodIndex(inst.Inst());
if (dex_file.GetMethodId(method_idx).class_idx_ == accessor.GetClassIdx()) {
++same_class_interface_;
}
++total_interface_;
unique_method_ids.insert(method_idx);
break;
}
case Instruction::INVOKE_SUPER:
case Instruction::INVOKE_SUPER_RANGE: {
uint32_t method_idx = DexMethodIndex(inst.Inst());
if (dex_file.GetMethodId(method_idx).class_idx_ == accessor.GetClassIdx()) {
++same_class_super_;
}
++total_super_;
unique_method_ids.insert(method_idx);
break;
}
default:
break;
}
}
}
total_unique_method_idx_ += unique_method_ids.size();
total_unique_string_ids_ += unique_string_ids.size();
}
total_unique_code_items_ += unique_code_items.size();
}
void CountDexIndices::Dump(std::ostream& os, uint64_t total_size) const {
os << "Num string ids: " << num_string_ids_ << "\n";
os << "Num method ids: " << num_method_ids_ << "\n";
os << "Num field ids: " << num_field_ids_ << "\n";
os << "Num type ids: " << num_type_ids_ << "\n";
os << "Num class defs: " << num_class_defs_ << "\n";
os << "Direct same class: " << PercentDivide(same_class_direct_, total_direct_) << "\n";
os << "Virtual same class: " << PercentDivide(same_class_virtual_, total_virtual_) << "\n";
os << "Static same class: " << PercentDivide(same_class_static_, total_static_) << "\n";
os << "Interface same class: " << PercentDivide(same_class_interface_, total_interface_) << "\n";
os << "Super same class: " << PercentDivide(same_class_super_, total_super_) << "\n";
os << "Num strings accessed from code: " << num_string_ids_from_code_ << "\n";
os << "Unique(per class) method ids accessed from code: " << total_unique_method_idx_ << "\n";
os << "Unique(per class) string ids accessed from code: " << total_unique_string_ids_ << "\n";
const size_t same_class_total =
same_class_direct_ +
same_class_virtual_ +
same_class_static_ +
same_class_interface_ +
same_class_super_;
const size_t other_class_total =
total_direct_ +
total_virtual_ +
total_static_ +
total_interface_ +
total_super_;
os << "Same class invokes: " << PercentDivide(same_class_total, other_class_total) << "\n";
os << "Invokes from code: " << (same_class_total + other_class_total) << "\n";
os << "Total Dex code bytes: " << Percent(dex_code_bytes_, total_size) << "\n";
os << "Total unique code items: " << total_unique_code_items_ << "\n";
os << "Total Dex size: " << total_size << "\n";
}
void CodeMetrics::ProcessDexFile(const DexFile& dex_file) {
for (ClassAccessor accessor : dex_file.GetClasses()) {
for (const ClassAccessor::Method& method : accessor.GetMethods()) {
bool space_for_out_arg = false;
for (const DexInstructionPcPair& inst : method.GetInstructions()) {
switch (inst->Opcode()) {
case Instruction::INVOKE_VIRTUAL:
case Instruction::INVOKE_DIRECT:
case Instruction::INVOKE_SUPER:
case Instruction::INVOKE_INTERFACE:
case Instruction::INVOKE_STATIC: {
const uint32_t args = NumberOfArgs(inst.Inst());
CHECK_LT(args, kMaxArgCount);
++arg_counts_[args];
space_for_out_arg = args < kMaxArgCount - 1;
break;
}
case Instruction::MOVE_RESULT:
case Instruction::MOVE_RESULT_OBJECT: {
if (space_for_out_arg) {
move_result_savings_ += inst->SizeInCodeUnits() * 2;
}
break;
}
default:
space_for_out_arg = false;
break;
}
}
}
}
}
void CodeMetrics::Dump(std::ostream& os, uint64_t total_size) const {
const uint64_t total = std::accumulate(arg_counts_, arg_counts_ + kMaxArgCount, 0u);
for (size_t i = 0; i < kMaxArgCount; ++i) {
os << "args=" << i << ": " << Percent(arg_counts_[i], total) << "\n";
}
os << "Move result savings: " << Percent(move_result_savings_, total_size) << "\n";
os << "One byte invoke savings: " << Percent(total, total_size) << "\n";
const uint64_t low_arg_total = std::accumulate(arg_counts_, arg_counts_ + 3, 0u);
os << "Low arg savings: " << Percent(low_arg_total * 2, total_size) << "\n";
}
} // namespace art