tools/dexanalyze/dexanalyze_strings.cc - platform/art - Git at Google

 /*
  * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include "dexanalyze_strings.h"

 #include <algorithm>
 #include <iomanip>
 #include <iostream>
 #include <queue>

 #include "dex/class_accessor-inl.h"
 #include "dex/code_item_accessors-inl.h"
 #include "dex/dex_instruction-inl.h"

 namespace art {
 namespace dexanalyze {

 // Tunable parameters.
 static const size_t kMinPrefixLen = 1;
 static const size_t kMaxPrefixLen = 255;
 static const size_t kPrefixConstantCost = 4;
 static const size_t kPrefixIndexCost = 2;

 // Node value = (distance from root) * (occurrences - 1).
 class MatchTrie {
  public:
   void Add(const std::string& str) {
     MatchTrie* node = this;
     size_t depth = 0u;
     for (uint8_t c : str) {
       ++depth;
       if (node->nodes_[c] == nullptr) {
         MatchTrie* new_node = new MatchTrie();
         node->nodes_[c].reset(new_node);
         new_node->parent_ = node;
         new_node->depth_ = depth;
         new_node->incoming_ = c;
         node = new_node;
       } else {
         node = node->nodes_[c].get();
       }
       ++node->count_;
     }
     node->is_end_ = true;
   }

   // Returns the length of the longest prefix and if it's a leaf node.
   std::pair<size_t, bool> LongestPrefix(const std::string& str) const {
     const MatchTrie* node = this;
     const MatchTrie* best_node = this;
     size_t depth = 0u;
     size_t best_depth = 0u;
     for (uint8_t c : str) {
       if (node->nodes_[c] == nullptr) {
         break;
       }
       node = node->nodes_[c].get();
       ++depth;
       if (node->is_end_) {
         best_depth = depth;
         best_node = node;
       }
     }
     bool is_leaf = true;
     for (const std::unique_ptr<MatchTrie>& cur_node : best_node->nodes_) {
       if (cur_node != nullptr) {
         is_leaf = false;
       }
     }
     return {best_depth, is_leaf};
   }

   int32_t Savings() const {
     int32_t cost = kPrefixConstantCost;
     int32_t first_used = 0u;
     if (chosen_suffix_count_ == 0u) {
       cost += depth_;
     }
     uint32_t extra_savings = 0u;
     for (MatchTrie* cur = parent_; cur != nullptr; cur = cur->parent_) {
       if (cur->chosen_) {
         first_used = cur->depth_;
         if (cur->chosen_suffix_count_ == 0u) {
           // First suffix for the chosen parent, remove the cost of the dictionary entry.
           extra_savings += first_used;
         }
         break;
       }
     }
     return count_ * (depth_ - first_used) - cost + extra_savings;
   }

   template <typename T, typename... Args, template <typename...> class Queue>
   T PopRealTop(Queue<T, Args...>& queue) {
     auto pair = queue.top();
     queue.pop();
     // Keep updating values until one sticks.
     while (pair.second->Savings() != pair.first) {
       pair.first = pair.second->Savings();
       queue.push(pair);
       pair = queue.top();
       queue.pop();
     }
     return pair;
   }

   std::vector<std::string> ExtractPrefixes(size_t max) {
     std::vector<std::string> ret;
     // Make priority queue and adaptively update it. Each node value is the savings from picking
     // it. Insert all of the interesting nodes in the queue (children != 1).
     std::priority_queue<std::pair<int32_t, MatchTrie*>> queue;
     // Add all of the nodes to the queue.
     std::vector<MatchTrie*> work(1, this);
     while (!work.empty()) {
       MatchTrie* elem = work.back();
       work.pop_back();
       size_t num_childs = 0u;
       for (const std::unique_ptr<MatchTrie>& child : elem->nodes_) {
         if (child != nullptr) {
           work.push_back(child.get());
           ++num_childs;
         }
       }
       if (num_childs > 1u || elem->is_end_) {
         queue.emplace(elem->Savings(), elem);
       }
     }
     std::priority_queue<std::pair<int32_t, MatchTrie*>> prefixes;
     // The savings can only ever go down for a given node, never up.
     while (max != 0u && !queue.empty()) {
       std::pair<int32_t, MatchTrie*> pair = PopRealTop(queue);
       if (pair.second != this && pair.first > 0) {
         // Pick this node.
         uint32_t count = pair.second->count_;
         pair.second->chosen_ = true;
         for (MatchTrie* cur = pair.second->parent_; cur != this; cur = cur->parent_) {
           if (cur->chosen_) {
             break;
           }
           cur->count_ -= count;
         }
         for (MatchTrie* cur = pair.second->parent_; cur != this; cur = cur->parent_) {
           ++cur->chosen_suffix_count_;
         }
         prefixes.emplace(pair.first, pair.second);
         --max;
       } else {
         // Negative or no EV, just delete the node.
       }
     }
     while (!prefixes.empty()) {
       std::pair<int32_t, MatchTrie*> pair = PopRealTop(prefixes);
       if (pair.first <= 0) {
         continue;
       }
       std::vector<uint8_t> chars;
       for (MatchTrie* cur = pair.second; cur != this; cur = cur->parent_) {
         chars.push_back(cur->incoming_);
       }
       ret.push_back(std::string(chars.rbegin(), chars.rend()));
       // LOG(INFO) << pair.second->Savings() << " : " << ret.back();
     }
     return ret;
   }

   std::unique_ptr<MatchTrie> nodes_[256];
   MatchTrie* parent_ = nullptr;
   uint32_t count_ = 0u;
   int32_t depth_ = 0u;
   int32_t savings_ = 0u;
   uint8_t incoming_ = 0u;
   // If the current node is the end of a possible prefix.
   bool is_end_ = false;
   // If the current node is chosen to be a used prefix.
   bool chosen_ = false;
   // If the current node is a prefix of a longer chosen prefix.
   uint32_t chosen_suffix_count_ = 0u;
 };

 void AnalyzeStrings::ProcessDexFiles(const std::vector<std::unique_ptr<const DexFile>>& dex_files) {
   std::set<std::string> unique_strings;
   // Accumulate the strings.
   for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
     for (size_t i = 0; i < dex_file->NumStringIds(); ++i) {
       uint32_t length = 0;
       const char* data = dex_file->StringDataAndUtf16LengthByIdx(dex::StringIndex(i), &length);
       // Analyze if the string has any UTF16 chars.
       bool have_wide_char = false;
       const char* ptr = data;
       for (size_t j = 0; j < length; ++j) {
         have_wide_char = have_wide_char || GetUtf16FromUtf8(&ptr) >= 0x100;
       }
       if (have_wide_char) {
         wide_string_bytes_ += 2 * length;
       } else {
         ascii_string_bytes_ += length;
       }
       string_data_bytes_ += ptr - data;
       unique_strings.insert(data);
     }
   }
   // Unique strings only since we want to exclude savings from multidex duplication.
   ProcessStrings(std::vector<std::string>(unique_strings.begin(), unique_strings.end()), 1);
 }

 void AnalyzeStrings::ProcessStrings(const std::vector<std::string>& strings, size_t iterations) {
   if (iterations == 0u) {
     return;
   }
   // Calculate total shared prefix.
   std::vector<size_t> shared_len;
   prefixes_.clear();
   std::unique_ptr<MatchTrie> prefix_construct(new MatchTrie());
   for (size_t i = 0; i < strings.size(); ++i) {
     size_t best_len = 0;
     if (i > 0) {
       best_len = std::max(best_len, PrefixLen(strings[i], strings[i - 1]));
     }
     if (i < strings.size() - 1) {
       best_len = std::max(best_len, PrefixLen(strings[i], strings[i + 1]));
     }
     best_len = std::min(best_len, kMaxPrefixLen);
     std::string prefix;
     if (best_len >= kMinPrefixLen) {
       prefix = strings[i].substr(0, best_len);
       prefix_construct->Add(prefix);
       ++prefixes_[prefix];
       total_shared_prefix_bytes_ += best_len;
     }
     total_prefix_index_cost_ += kPrefixIndexCost;
   }

   static constexpr size_t kPrefixBits = 15;
   static constexpr size_t kShortLen = (1u << (15 - kPrefixBits)) - 1;
   std::unique_ptr<MatchTrie> prefix_trie(new MatchTrie());
   static constexpr bool kUseGreedyTrie = true;
   if (kUseGreedyTrie) {
     std::vector<std::string> prefixes(prefix_construct->ExtractPrefixes(1 << kPrefixBits));
     for (auto&& str : prefixes) {
       prefix_trie->Add(str);
     }
   } else {
     // Optimize the result by moving long prefixes to shorter ones if it causes additional savings.
     while (true) {
       bool have_savings = false;
       auto it = prefixes_.begin();
       std::vector<std::string> longest;
       for (const auto& pair : prefixes_) {
         longest.push_back(pair.first);
       }
       std::sort(longest.begin(), longest.end(), [](const std::string& a, const std::string& b) {
         return a.length() > b.length();
       });
       // Do longest first since this provides the best results.
       for (const std::string& s : longest) {
         it = prefixes_.find(s);
         CHECK(it != prefixes_.end());
         const std::string& prefix = it->first;
         int64_t best_savings = 0u;
         int64_t best_len = -1;
         for (int64_t len = prefix.length() - 1; len >= 0; --len) {
           auto found = prefixes_.find(prefix.substr(0, len));
           if (len != 0 && found == prefixes_.end()) {
             continue;
           }
           // Calculate savings from downgrading the prefix.
           int64_t savings = kPrefixConstantCost + prefix.length() -
               (prefix.length() - len) * it->second;
           if (savings > best_savings) {
             best_savings = savings;
             best_len = len;
             break;
           }
         }
         if (best_len != -1) {
           prefixes_[prefix.substr(0, best_len)] += it->second;
           it = prefixes_.erase(it);
           optimization_savings_ += best_savings;
           have_savings = true;
         } else {
           ++it;
         }
       }
       if (!have_savings) {
         break;
       }
     }
     for (auto&& pair : prefixes_) {
       prefix_trie->Add(pair.first);
     }
   }

   // Count longest prefixes.
   std::set<std::string> used_prefixes;
   std::vector<std::string> suffix;
   for (const std::string& str : strings) {
     auto pair = prefix_trie->LongestPrefix(str);
     const size_t len = pair.first;
     if (len >= kMinPrefixLen) {
       ++strings_used_prefixed_;
       total_prefix_savings_ += len;
       used_prefixes.insert(str.substr(0, len));
     }
     suffix.push_back(str.substr(len));
     if (suffix.back().size() < kShortLen) {
       ++short_strings_;
     } else {
       ++long_strings_;
     }
   }
   std::sort(suffix.begin(), suffix.end());
   for (const std::string& prefix : used_prefixes) {
     // 4 bytes for an offset, one for length.
     auto pair = prefix_trie->LongestPrefix(prefix);
     CHECK_EQ(pair.first, prefix.length());
     if (pair.second) {
       // Only need to add to dictionary if it's a leaf, otherwise we can reuse string data of the
       // other prefix.
       total_prefix_dict_ += prefix.size();
     }
     total_prefix_table_ += kPrefixConstantCost;
   }
   ProcessStrings(suffix, iterations - 1);
 }

 void AnalyzeStrings::Dump(std::ostream& os, uint64_t total_size) const {
   os << "Total string data bytes " << Percent(string_data_bytes_, total_size) << "\n";
   os << "UTF-16 string data bytes " << Percent(wide_string_bytes_, total_size) << "\n";
   os << "ASCII string data bytes " << Percent(ascii_string_bytes_, total_size) << "\n";

   // Prefix based strings.
   os << "Total shared prefix bytes " << Percent(total_shared_prefix_bytes_, total_size) << "\n";
   os << "Prefix dictionary cost " << Percent(total_prefix_dict_, total_size) << "\n";
   os << "Prefix table cost " << Percent(total_prefix_table_, total_size) << "\n";
   os << "Prefix index cost " << Percent(total_prefix_index_cost_, total_size) << "\n";
   int64_t net_savings = total_prefix_savings_ + short_strings_;
   net_savings -= total_prefix_dict_;
   net_savings -= total_prefix_table_;
   net_savings -= total_prefix_index_cost_;
   os << "Prefix dictionary elements " << total_num_prefixes_ << "\n";
   os << "Optimization savings " << Percent(optimization_savings_, total_size) << "\n";
   os << "Prefix net savings " << Percent(net_savings, total_size) << "\n";
   os << "Strings using prefix "
      << Percent(strings_used_prefixed_, total_prefix_index_cost_ / kPrefixIndexCost) << "\n";
   os << "Short strings " << Percent(short_strings_, short_strings_ + long_strings_) << "\n";
   if (verbose_level_ >= VerboseLevel::kEverything) {
     std::vector<std::pair<std::string, size_t>> pairs(prefixes_.begin(), prefixes_.end());
     // Sort lexicographically.
     std::sort(pairs.begin(), pairs.end());
     for (const auto& pair : pairs) {
       os << pair.first << " : " << pair.second << "\n";
     }
   }
 }

 }  // namespace dexanalyze
 }  // namespace art
	/*
	* Copyright (C) 2018 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include "dexanalyze_strings.h"

	#include <algorithm>
	#include <iomanip>
	#include <iostream>
	#include <queue>

	#include "dex/class_accessor-inl.h"
	#include "dex/code_item_accessors-inl.h"
	#include "dex/dex_instruction-inl.h"

	namespace art {
	namespace dexanalyze {

	// Tunable parameters.
	static const size_t kMinPrefixLen = 1;
	static const size_t kMaxPrefixLen = 255;
	static const size_t kPrefixConstantCost = 4;
	static const size_t kPrefixIndexCost = 2;

	// Node value = (distance from root) * (occurrences - 1).
	class MatchTrie {
	public:
	void Add(const std::string& str) {
	MatchTrie* node = this;
	size_t depth = 0u;
	for (uint8_t c : str) {
	++depth;
	if (node->nodes_[c] == nullptr) {
	MatchTrie* new_node = new MatchTrie();
	node->nodes_[c].reset(new_node);
	new_node->parent_ = node;
	new_node->depth_ = depth;
	new_node->incoming_ = c;
	node = new_node;
	} else {
	node = node->nodes_[c].get();
	}
	++node->count_;
	}
	node->is_end_ = true;
	}

	// Returns the length of the longest prefix and if it's a leaf node.
	std::pair<size_t, bool> LongestPrefix(const std::string& str) const {
	const MatchTrie* node = this;
	const MatchTrie* best_node = this;
	size_t depth = 0u;
	size_t best_depth = 0u;
	for (uint8_t c : str) {
	if (node->nodes_[c] == nullptr) {
	break;
	}
	node = node->nodes_[c].get();
	++depth;
	if (node->is_end_) {
	best_depth = depth;
	best_node = node;
	}
	}
	bool is_leaf = true;
	for (const std::unique_ptr<MatchTrie>& cur_node : best_node->nodes_) {
	if (cur_node != nullptr) {
	is_leaf = false;
	}
	}
	return {best_depth, is_leaf};
	}

	int32_t Savings() const {
	int32_t cost = kPrefixConstantCost;
	int32_t first_used = 0u;
	if (chosen_suffix_count_ == 0u) {
	cost += depth_;
	}
	uint32_t extra_savings = 0u;
	for (MatchTrie* cur = parent_; cur != nullptr; cur = cur->parent_) {
	if (cur->chosen_) {
	first_used = cur->depth_;
	if (cur->chosen_suffix_count_ == 0u) {
	// First suffix for the chosen parent, remove the cost of the dictionary entry.
	extra_savings += first_used;
	}
	break;
	}
	}
	return count_ * (depth_ - first_used) - cost + extra_savings;
	}

	template <typename T, typename... Args, template <typename...> class Queue>
	T PopRealTop(Queue<T, Args...>& queue) {
	auto pair = queue.top();
	queue.pop();
	// Keep updating values until one sticks.
	while (pair.second->Savings() != pair.first) {
	pair.first = pair.second->Savings();
	queue.push(pair);
	pair = queue.top();
	queue.pop();
	}
	return pair;
	}

	std::vector<std::string> ExtractPrefixes(size_t max) {
	std::vector<std::string> ret;
	// Make priority queue and adaptively update it. Each node value is the savings from picking
	// it. Insert all of the interesting nodes in the queue (children != 1).
	std::priority_queue<std::pair<int32_t, MatchTrie*>> queue;
	// Add all of the nodes to the queue.
	std::vector<MatchTrie*> work(1, this);
	while (!work.empty()) {
	MatchTrie* elem = work.back();
	work.pop_back();
	size_t num_childs = 0u;
	for (const std::unique_ptr<MatchTrie>& child : elem->nodes_) {
	if (child != nullptr) {
	work.push_back(child.get());
	++num_childs;
	}
	}
	if (num_childs > 1u \|\| elem->is_end_) {
	queue.emplace(elem->Savings(), elem);
	}
	}
	std::priority_queue<std::pair<int32_t, MatchTrie*>> prefixes;
	// The savings can only ever go down for a given node, never up.
	while (max != 0u && !queue.empty()) {
	std::pair<int32_t, MatchTrie*> pair = PopRealTop(queue);
	if (pair.second != this && pair.first > 0) {
	// Pick this node.
	uint32_t count = pair.second->count_;
	pair.second->chosen_ = true;
	for (MatchTrie* cur = pair.second->parent_; cur != this; cur = cur->parent_) {
	if (cur->chosen_) {
	break;
	}
	cur->count_ -= count;
	}
	for (MatchTrie* cur = pair.second->parent_; cur != this; cur = cur->parent_) {
	++cur->chosen_suffix_count_;
	}
	prefixes.emplace(pair.first, pair.second);
	--max;
	} else {
	// Negative or no EV, just delete the node.
	}
	}
	while (!prefixes.empty()) {
	std::pair<int32_t, MatchTrie*> pair = PopRealTop(prefixes);
	if (pair.first <= 0) {
	continue;
	}
	std::vector<uint8_t> chars;
	for (MatchTrie* cur = pair.second; cur != this; cur = cur->parent_) {
	chars.push_back(cur->incoming_);
	}
	ret.push_back(std::string(chars.rbegin(), chars.rend()));
	// LOG(INFO) << pair.second->Savings() << " : " << ret.back();
	}
	return ret;
	}

	std::unique_ptr<MatchTrie> nodes_[256];
	MatchTrie* parent_ = nullptr;
	uint32_t count_ = 0u;
	int32_t depth_ = 0u;
	int32_t savings_ = 0u;
	uint8_t incoming_ = 0u;
	// If the current node is the end of a possible prefix.
	bool is_end_ = false;
	// If the current node is chosen to be a used prefix.
	bool chosen_ = false;
	// If the current node is a prefix of a longer chosen prefix.
	uint32_t chosen_suffix_count_ = 0u;
	};

	void AnalyzeStrings::ProcessDexFiles(const std::vector<std::unique_ptr<const DexFile>>& dex_files) {
	std::set<std::string> unique_strings;
	// Accumulate the strings.
	for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
	for (size_t i = 0; i < dex_file->NumStringIds(); ++i) {
	uint32_t length = 0;
	const char* data = dex_file->StringDataAndUtf16LengthByIdx(dex::StringIndex(i), &length);
	// Analyze if the string has any UTF16 chars.
	bool have_wide_char = false;
	const char* ptr = data;
	for (size_t j = 0; j < length; ++j) {
	have_wide_char = have_wide_char \|\| GetUtf16FromUtf8(&ptr) >= 0x100;
	}
	if (have_wide_char) {
	wide_string_bytes_ += 2 * length;
	} else {
	ascii_string_bytes_ += length;
	}
	string_data_bytes_ += ptr - data;
	unique_strings.insert(data);
	}
	}
	// Unique strings only since we want to exclude savings from multidex duplication.
	ProcessStrings(std::vector<std::string>(unique_strings.begin(), unique_strings.end()), 1);
	}

	void AnalyzeStrings::ProcessStrings(const std::vector<std::string>& strings, size_t iterations) {
	if (iterations == 0u) {
	return;
	}
	// Calculate total shared prefix.
	std::vector<size_t> shared_len;
	prefixes_.clear();
	std::unique_ptr<MatchTrie> prefix_construct(new MatchTrie());
	for (size_t i = 0; i < strings.size(); ++i) {
	size_t best_len = 0;
	if (i > 0) {
	best_len = std::max(best_len, PrefixLen(strings[i], strings[i - 1]));
	}
	if (i < strings.size() - 1) {
	best_len = std::max(best_len, PrefixLen(strings[i], strings[i + 1]));
	}
	best_len = std::min(best_len, kMaxPrefixLen);
	std::string prefix;
	if (best_len >= kMinPrefixLen) {
	prefix = strings[i].substr(0, best_len);
	prefix_construct->Add(prefix);
	++prefixes_[prefix];
	total_shared_prefix_bytes_ += best_len;
	}
	total_prefix_index_cost_ += kPrefixIndexCost;
	}

	static constexpr size_t kPrefixBits = 15;
	static constexpr size_t kShortLen = (1u << (15 - kPrefixBits)) - 1;
	std::unique_ptr<MatchTrie> prefix_trie(new MatchTrie());
	static constexpr bool kUseGreedyTrie = true;
	if (kUseGreedyTrie) {
	std::vector<std::string> prefixes(prefix_construct->ExtractPrefixes(1 << kPrefixBits));
	for (auto&& str : prefixes) {
	prefix_trie->Add(str);
	}
	} else {
	// Optimize the result by moving long prefixes to shorter ones if it causes additional savings.
	while (true) {
	bool have_savings = false;
	auto it = prefixes_.begin();
	std::vector<std::string> longest;
	for (const auto& pair : prefixes_) {
	longest.push_back(pair.first);
	}
	std::sort(longest.begin(), longest.end(), [](const std::string& a, const std::string& b) {
	return a.length() > b.length();
	});
	// Do longest first since this provides the best results.
	for (const std::string& s : longest) {
	it = prefixes_.find(s);
	CHECK(it != prefixes_.end());
	const std::string& prefix = it->first;
	int64_t best_savings = 0u;
	int64_t best_len = -1;
	for (int64_t len = prefix.length() - 1; len >= 0; --len) {
	auto found = prefixes_.find(prefix.substr(0, len));
	if (len != 0 && found == prefixes_.end()) {
	continue;
	}
	// Calculate savings from downgrading the prefix.
	int64_t savings = kPrefixConstantCost + prefix.length() -
	(prefix.length() - len) * it->second;
	if (savings > best_savings) {
	best_savings = savings;
	best_len = len;
	break;
	}
	}
	if (best_len != -1) {
	prefixes_[prefix.substr(0, best_len)] += it->second;
	it = prefixes_.erase(it);
	optimization_savings_ += best_savings;
	have_savings = true;
	} else {
	++it;
	}
	}
	if (!have_savings) {
	break;
	}
	}
	for (auto&& pair : prefixes_) {
	prefix_trie->Add(pair.first);
	}
	}

	// Count longest prefixes.
	std::set<std::string> used_prefixes;
	std::vector<std::string> suffix;
	for (const std::string& str : strings) {
	auto pair = prefix_trie->LongestPrefix(str);
	const size_t len = pair.first;
	if (len >= kMinPrefixLen) {
	++strings_used_prefixed_;
	total_prefix_savings_ += len;
	used_prefixes.insert(str.substr(0, len));
	}
	suffix.push_back(str.substr(len));
	if (suffix.back().size() < kShortLen) {
	++short_strings_;
	} else {
	++long_strings_;
	}
	}
	std::sort(suffix.begin(), suffix.end());
	for (const std::string& prefix : used_prefixes) {
	// 4 bytes for an offset, one for length.
	auto pair = prefix_trie->LongestPrefix(prefix);
	CHECK_EQ(pair.first, prefix.length());
	if (pair.second) {
	// Only need to add to dictionary if it's a leaf, otherwise we can reuse string data of the
	// other prefix.
	total_prefix_dict_ += prefix.size();
	}
	total_prefix_table_ += kPrefixConstantCost;
	}
	ProcessStrings(suffix, iterations - 1);
	}

	void AnalyzeStrings::Dump(std::ostream& os, uint64_t total_size) const {
	os << "Total string data bytes " << Percent(string_data_bytes_, total_size) << "\n";
	os << "UTF-16 string data bytes " << Percent(wide_string_bytes_, total_size) << "\n";
	os << "ASCII string data bytes " << Percent(ascii_string_bytes_, total_size) << "\n";

	// Prefix based strings.
	os << "Total shared prefix bytes " << Percent(total_shared_prefix_bytes_, total_size) << "\n";
	os << "Prefix dictionary cost " << Percent(total_prefix_dict_, total_size) << "\n";
	os << "Prefix table cost " << Percent(total_prefix_table_, total_size) << "\n";
	os << "Prefix index cost " << Percent(total_prefix_index_cost_, total_size) << "\n";
	int64_t net_savings = total_prefix_savings_ + short_strings_;
	net_savings -= total_prefix_dict_;
	net_savings -= total_prefix_table_;
	net_savings -= total_prefix_index_cost_;
	os << "Prefix dictionary elements " << total_num_prefixes_ << "\n";
	os << "Optimization savings " << Percent(optimization_savings_, total_size) << "\n";
	os << "Prefix net savings " << Percent(net_savings, total_size) << "\n";
	os << "Strings using prefix "
	<< Percent(strings_used_prefixed_, total_prefix_index_cost_ / kPrefixIndexCost) << "\n";
	os << "Short strings " << Percent(short_strings_, short_strings_ + long_strings_) << "\n";
	if (verbose_level_ >= VerboseLevel::kEverything) {
	std::vector<std::pair<std::string, size_t>> pairs(prefixes_.begin(), prefixes_.end());
	// Sort lexicographically.
	std::sort(pairs.begin(), pairs.end());
	for (const auto& pair : pairs) {
	os << pair.first << " : " << pair.second << "\n";
	}
	}
	}

	} // namespace dexanalyze
	} // namespace art