blob: f3a54f8cf9bfcda5190c18ee7bf98e23b60bc007 [file] [log] [blame]
// Copyright 2017 The Chromium OS Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "puffin/src/include/puffin/puffer.h"
#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "puffin/src/bit_reader.h"
#include "puffin/src/huffman_table.h"
#include "puffin/src/include/puffin/common.h"
#include "puffin/src/include/puffin/stream.h"
#include "puffin/src/logging.h"
#include "puffin/src/puff_data.h"
#include "puffin/src/puff_writer.h"
using std::string;
using std::vector;
namespace puffin {
Puffer::Puffer(bool exclude_bad_distance_caches)
: dyn_ht_(new HuffmanTable()),
fix_ht_(new HuffmanTable()),
exclude_bad_distance_caches_(exclude_bad_distance_caches) {}
Puffer::Puffer() : Puffer(false) {}
Puffer::~Puffer() {}
bool Puffer::PuffDeflate(BitReaderInterface* br,
PuffWriterInterface* pw,
vector<BitExtent>* deflates) const {
PuffData pd;
HuffmanTable* cur_ht;
bool end_loop = false;
// No bits left to read, return. We try to cache at least eight bits because
// the minimum length of a deflate bit stream is 8: (fixed huffman table) 3
// bits header + 5 bits just one len/dist symbol.
while (!end_loop && br->CacheBits(8)) {
auto start_bit_offset = br->OffsetInBits();
TEST_AND_RETURN_FALSE(br->CacheBits(3));
uint8_t final_bit = br->ReadBits(1); // BFINAL
br->DropBits(1);
uint8_t type = br->ReadBits(2); // BTYPE
br->DropBits(2);
DVLOG(2) << "Read block type: "
<< BlockTypeToString(static_cast<BlockType>(type));
// If it is the final block and we are just looking for deflate locations,
// we consider this the end of the search.
if (deflates != nullptr && final_bit) {
end_loop = true;
}
// Header structure
// +-+-+-+-+-+-+-+-+
// |F| TP| SKIP |
// +-+-+-+-+-+-+-+-+
// F -> final_bit
// TP -> type
// SKIP -> skipped_bits (only in kUncompressed type)
auto block_header = (final_bit << 7) | (type << 5);
switch (static_cast<BlockType>(type)) {
case BlockType::kUncompressed: {
auto skipped_bits = br->ReadBoundaryBits();
br->SkipBoundaryBits();
TEST_AND_RETURN_FALSE(br->CacheBits(32));
auto len = br->ReadBits(16); // LEN
br->DropBits(16);
auto nlen = br->ReadBits(16); // NLEN
br->DropBits(16);
if ((len ^ nlen) != 0xFFFF) {
LOG(ERROR) << "Length of uncompressed data is invalid;"
<< " LEN(" << len << ") NLEN(" << nlen << ")";
return false;
}
// Put skipped bits into header.
block_header |= skipped_bits;
// Insert block header.
pd.type = PuffData::Type::kBlockMetadata;
pd.block_metadata[0] = block_header;
pd.length = 1;
TEST_AND_RETURN_FALSE(pw->Insert(pd));
// Insert all the raw literals.
pd.type = PuffData::Type::kLiterals;
pd.length = len;
TEST_AND_RETURN_FALSE(br->GetByteReaderFn(pd.length, &pd.read_fn));
TEST_AND_RETURN_FALSE(pw->Insert(pd));
pd.type = PuffData::Type::kEndOfBlock;
TEST_AND_RETURN_FALSE(pw->Insert(pd));
// There is no need to insert the location of uncompressed deflates
// because we do not want the uncompressed blocks when trying to find
// the bit-addressed location of deflates. They better be ignored.
// continue the loop. Do not read any literal/length/distance.
continue;
}
case BlockType::kFixed:
fix_ht_->BuildFixedHuffmanTable();
cur_ht = fix_ht_.get();
pd.type = PuffData::Type::kBlockMetadata;
pd.block_metadata[0] = block_header;
pd.length = 1;
TEST_AND_RETURN_FALSE(pw->Insert(pd));
break;
case BlockType::kDynamic:
pd.type = PuffData::Type::kBlockMetadata;
pd.block_metadata[0] = block_header;
pd.length = sizeof(pd.block_metadata) - 1;
TEST_AND_RETURN_FALSE(dyn_ht_->BuildDynamicHuffmanTable(
br, &pd.block_metadata[1], &pd.length));
pd.length += 1; // For the header.
TEST_AND_RETURN_FALSE(pw->Insert(pd));
cur_ht = dyn_ht_.get();
break;
default:
LOG(ERROR) << "Invalid block compression type: "
<< static_cast<int>(type);
return false;
}
// If true and the list of output |deflates| is non-null, the current
// deflate location will be added to that list.
bool include_deflate = true;
while (true) { // Breaks when the end of block is reached.
auto max_bits = cur_ht->LitLenMaxBits();
if (!br->CacheBits(max_bits)) {
// It could be the end of buffer and the bit length of the end_of_block
// symbol has less than maximum bit length of current Huffman table. So
// only asking for the size of end of block symbol (256).
TEST_AND_RETURN_FALSE(cur_ht->EndOfBlockBitLength(&max_bits));
}
TEST_AND_RETURN_FALSE(br->CacheBits(max_bits));
auto bits = br->ReadBits(max_bits);
uint16_t lit_len_alphabet;
size_t nbits;
TEST_AND_RETURN_FALSE(
cur_ht->LitLenAlphabet(bits, &lit_len_alphabet, &nbits));
br->DropBits(nbits);
if (lit_len_alphabet < 256) {
pd.type = PuffData::Type::kLiteral;
pd.byte = lit_len_alphabet;
TEST_AND_RETURN_FALSE(pw->Insert(pd));
} else if (256 == lit_len_alphabet) {
pd.type = PuffData::Type::kEndOfBlock;
TEST_AND_RETURN_FALSE(pw->Insert(pd));
if (deflates != nullptr && include_deflate) {
deflates->emplace_back(start_bit_offset,
br->OffsetInBits() - start_bit_offset);
}
break; // Breaks the loop.
} else {
TEST_AND_RETURN_FALSE(lit_len_alphabet <= 285);
// Reading length.
auto len_code_start = lit_len_alphabet - 257;
auto extra_bits_len = kLengthExtraBits[len_code_start];
uint16_t extra_bits_value = 0;
if (extra_bits_len) {
TEST_AND_RETURN_FALSE(br->CacheBits(extra_bits_len));
extra_bits_value = br->ReadBits(extra_bits_len);
br->DropBits(extra_bits_len);
}
auto length = kLengthBases[len_code_start] + extra_bits_value;
auto bits_to_cache = cur_ht->DistanceMaxBits();
if (!br->CacheBits(bits_to_cache)) {
// This is a corner case that is present in the older versions of the
// puffin. So we need to catch it and correctly discard this kind of
// deflate when we encounter it. See crbug.com/915559 for more info.
bits_to_cache = br->BitsRemaining();
TEST_AND_RETURN_FALSE(br->CacheBits(bits_to_cache));
if (exclude_bad_distance_caches_) {
include_deflate = false;
}
LOG(WARNING) << "A rare condition that older puffin clients fail to"
<< " recognize happened. Nothing to worry about."
<< " See crbug.com/915559";
}
auto bits = br->ReadBits(bits_to_cache);
uint16_t distance_alphabet;
size_t nbits;
TEST_AND_RETURN_FALSE(
cur_ht->DistanceAlphabet(bits, &distance_alphabet, &nbits));
br->DropBits(nbits);
// Reading distance.
extra_bits_len = kDistanceExtraBits[distance_alphabet];
extra_bits_value = 0;
if (extra_bits_len) {
TEST_AND_RETURN_FALSE(br->CacheBits(extra_bits_len));
extra_bits_value = br->ReadBits(extra_bits_len);
br->DropBits(extra_bits_len);
}
pd.type = PuffData::Type::kLenDist;
pd.length = length;
pd.distance = kDistanceBases[distance_alphabet] + extra_bits_value;
TEST_AND_RETURN_FALSE(pw->Insert(pd));
}
}
}
TEST_AND_RETURN_FALSE(pw->Flush());
return true;
}
} // namespace puffin