blob: 7e6646fad63e51da4c0b5c537bacec84cec4729e [file] [log] [blame]
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "zlib-utils.h"
#include <memory>
#include "util/base/logging.h"
#include "util/flatbuffers.h"
namespace libtextclassifier2 {
std::unique_ptr<ZlibDecompressor> ZlibDecompressor::Instance() {
std::unique_ptr<ZlibDecompressor> result(new ZlibDecompressor());
if (!result->initialized_) {
result.reset();
}
return result;
}
ZlibDecompressor::ZlibDecompressor() {
memset(&stream_, 0, sizeof(stream_));
stream_.zalloc = Z_NULL;
stream_.zfree = Z_NULL;
initialized_ = (inflateInit(&stream_) == Z_OK);
}
ZlibDecompressor::~ZlibDecompressor() {
if (initialized_) {
inflateEnd(&stream_);
}
}
bool ZlibDecompressor::Decompress(const CompressedBuffer* compressed_buffer,
std::string* out) {
out->resize(compressed_buffer->uncompressed_size());
stream_.next_in =
reinterpret_cast<const Bytef*>(compressed_buffer->buffer()->Data());
stream_.avail_in = compressed_buffer->buffer()->Length();
stream_.next_out = reinterpret_cast<Bytef*>(const_cast<char*>(out->c_str()));
stream_.avail_out = compressed_buffer->uncompressed_size();
return (inflate(&stream_, Z_SYNC_FLUSH) == Z_OK);
}
std::unique_ptr<ZlibCompressor> ZlibCompressor::Instance() {
std::unique_ptr<ZlibCompressor> result(new ZlibCompressor());
if (!result->initialized_) {
result.reset();
}
return result;
}
ZlibCompressor::ZlibCompressor(int level, int tmp_buffer_size) {
memset(&stream_, 0, sizeof(stream_));
stream_.zalloc = Z_NULL;
stream_.zfree = Z_NULL;
buffer_size_ = tmp_buffer_size;
buffer_.reset(new Bytef[buffer_size_]);
initialized_ = (deflateInit(&stream_, level) == Z_OK);
}
ZlibCompressor::~ZlibCompressor() { deflateEnd(&stream_); }
void ZlibCompressor::Compress(const std::string& uncompressed_content,
CompressedBufferT* out) {
out->uncompressed_size = uncompressed_content.size();
out->buffer.clear();
stream_.next_in =
reinterpret_cast<const Bytef*>(uncompressed_content.c_str());
stream_.avail_in = uncompressed_content.size();
stream_.next_out = buffer_.get();
stream_.avail_out = buffer_size_;
unsigned char* buffer_deflate_start_position =
reinterpret_cast<unsigned char*>(buffer_.get());
int status;
do {
// Deflate chunk-wise.
// Z_SYNC_FLUSH causes all pending output to be flushed, but doesn't
// reset the compression state.
// As we do not know how big the compressed buffer will be, we compress
// chunk wise and append the flushed content to the output string buffer.
// As we store the uncompressed size, we do not have to do this during
// decompression.
status = deflate(&stream_, Z_SYNC_FLUSH);
unsigned char* buffer_deflate_end_position =
reinterpret_cast<unsigned char*>(stream_.next_out);
if (buffer_deflate_end_position != buffer_deflate_start_position) {
out->buffer.insert(out->buffer.end(), buffer_deflate_start_position,
buffer_deflate_end_position);
stream_.next_out = buffer_deflate_start_position;
stream_.avail_out = buffer_size_;
} else {
break;
}
} while (status == Z_OK);
}
// Compress rule fields in the model.
bool CompressModel(ModelT* model) {
std::unique_ptr<ZlibCompressor> zlib_compressor = ZlibCompressor::Instance();
if (!zlib_compressor) {
TC_LOG(ERROR) << "Cannot compress model.";
return false;
}
// Compress regex rules.
if (model->regex_model != nullptr) {
for (int i = 0; i < model->regex_model->patterns.size(); i++) {
RegexModel_::PatternT* pattern = model->regex_model->patterns[i].get();
pattern->compressed_pattern.reset(new CompressedBufferT);
zlib_compressor->Compress(pattern->pattern,
pattern->compressed_pattern.get());
pattern->pattern.clear();
}
}
// Compress date-time rules.
if (model->datetime_model != nullptr) {
for (int i = 0; i < model->datetime_model->patterns.size(); i++) {
DatetimeModelPatternT* pattern = model->datetime_model->patterns[i].get();
for (int j = 0; j < pattern->regexes.size(); j++) {
DatetimeModelPattern_::RegexT* regex = pattern->regexes[j].get();
regex->compressed_pattern.reset(new CompressedBufferT);
zlib_compressor->Compress(regex->pattern,
regex->compressed_pattern.get());
regex->pattern.clear();
}
}
for (int i = 0; i < model->datetime_model->extractors.size(); i++) {
DatetimeModelExtractorT* extractor =
model->datetime_model->extractors[i].get();
extractor->compressed_pattern.reset(new CompressedBufferT);
zlib_compressor->Compress(extractor->pattern,
extractor->compressed_pattern.get());
extractor->pattern.clear();
}
}
return true;
}
namespace {
bool DecompressBuffer(const CompressedBufferT* compressed_pattern,
ZlibDecompressor* zlib_decompressor,
std::string* uncompressed_pattern) {
std::string packed_pattern =
PackFlatbuffer<CompressedBuffer>(compressed_pattern);
if (!zlib_decompressor->Decompress(
LoadAndVerifyFlatbuffer<CompressedBuffer>(packed_pattern),
uncompressed_pattern)) {
return false;
}
return true;
}
} // namespace
bool DecompressModel(ModelT* model) {
std::unique_ptr<ZlibDecompressor> zlib_decompressor =
ZlibDecompressor::Instance();
if (!zlib_decompressor) {
TC_LOG(ERROR) << "Cannot initialize decompressor.";
return false;
}
// Decompress regex rules.
if (model->regex_model != nullptr) {
for (int i = 0; i < model->regex_model->patterns.size(); i++) {
RegexModel_::PatternT* pattern = model->regex_model->patterns[i].get();
if (!DecompressBuffer(pattern->compressed_pattern.get(),
zlib_decompressor.get(), &pattern->pattern)) {
TC_LOG(ERROR) << "Cannot decompress pattern: " << i;
return false;
}
pattern->compressed_pattern.reset(nullptr);
}
}
// Decompress date-time rules.
if (model->datetime_model != nullptr) {
for (int i = 0; i < model->datetime_model->patterns.size(); i++) {
DatetimeModelPatternT* pattern = model->datetime_model->patterns[i].get();
for (int j = 0; j < pattern->regexes.size(); j++) {
DatetimeModelPattern_::RegexT* regex = pattern->regexes[j].get();
if (!DecompressBuffer(regex->compressed_pattern.get(),
zlib_decompressor.get(), &regex->pattern)) {
TC_LOG(ERROR) << "Cannot decompress pattern: " << i << " " << j;
return false;
}
regex->compressed_pattern.reset(nullptr);
}
}
for (int i = 0; i < model->datetime_model->extractors.size(); i++) {
DatetimeModelExtractorT* extractor =
model->datetime_model->extractors[i].get();
if (!DecompressBuffer(extractor->compressed_pattern.get(),
zlib_decompressor.get(), &extractor->pattern)) {
TC_LOG(ERROR) << "Cannot decompress pattern: " << i;
return false;
}
extractor->compressed_pattern.reset(nullptr);
}
}
return true;
}
std::string CompressSerializedModel(const std::string& model) {
std::unique_ptr<ModelT> unpacked_model = UnPackModel(model.c_str());
TC_CHECK(unpacked_model != nullptr);
TC_CHECK(CompressModel(unpacked_model.get()));
flatbuffers::FlatBufferBuilder builder;
FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
return std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
builder.GetSize());
}
std::unique_ptr<UniLib::RegexPattern> UncompressMakeRegexPattern(
const UniLib& unilib, const flatbuffers::String* uncompressed_pattern,
const CompressedBuffer* compressed_pattern, ZlibDecompressor* decompressor,
std::string* result_pattern_text) {
UnicodeText unicode_regex_pattern;
std::string decompressed_pattern;
if (compressed_pattern != nullptr &&
compressed_pattern->buffer() != nullptr) {
if (decompressor == nullptr ||
!decompressor->Decompress(compressed_pattern, &decompressed_pattern)) {
TC_LOG(ERROR) << "Cannot decompress pattern.";
return nullptr;
}
unicode_regex_pattern =
UTF8ToUnicodeText(decompressed_pattern.data(),
decompressed_pattern.size(), /*do_copy=*/false);
} else {
if (uncompressed_pattern == nullptr) {
TC_LOG(ERROR) << "Cannot load uncompressed pattern.";
return nullptr;
}
unicode_regex_pattern =
UTF8ToUnicodeText(uncompressed_pattern->c_str(),
uncompressed_pattern->Length(), /*do_copy=*/false);
}
if (result_pattern_text != nullptr) {
*result_pattern_text = unicode_regex_pattern.ToUTF8String();
}
std::unique_ptr<UniLib::RegexPattern> regex_pattern =
unilib.CreateRegexPattern(unicode_regex_pattern);
if (!regex_pattern) {
TC_LOG(ERROR) << "Could not create pattern: "
<< unicode_regex_pattern.ToUTF8String();
}
return regex_pattern;
}
} // namespace libtextclassifier2