annotator/model-executor.cc - platform/external/libtextclassifier - Git at Google

 /*
  * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include "annotator/model-executor.h"

 #include "annotator/quantization.h"
 #include "utils/base/logging.h"

 namespace libtextclassifier3 {

 TensorView<float> ModelExecutor::ComputeLogits(
     const TensorView<float>& features, tflite::Interpreter* interpreter) const {
   if (!interpreter) {
     return TensorView<float>::Invalid();
   }
   interpreter->ResizeInputTensor(kInputIndexFeatures, features.shape());
   if (interpreter->AllocateTensors() != kTfLiteOk) {
     TC3_VLOG(1) << "Allocation failed.";
     return TensorView<float>::Invalid();
   }

   SetInput<float>(kInputIndexFeatures, features, interpreter);

   if (interpreter->Invoke() != kTfLiteOk) {
     TC3_VLOG(1) << "Interpreter failed.";
     return TensorView<float>::Invalid();
   }

   return OutputView<float>(kOutputIndexLogits, interpreter);
 }

 std::unique_ptr<TFLiteEmbeddingExecutor> TFLiteEmbeddingExecutor::FromBuffer(
     const flatbuffers::Vector<uint8_t>* model_spec_buffer, int embedding_size,
     int quantization_bits,
     const Model_::EmbeddingPruningMask* embedding_pruning_mask) {
   std::unique_ptr<TfLiteModelExecutor> executor =
       TfLiteModelExecutor::FromBuffer(model_spec_buffer);
   if (!executor) {
     TC3_LOG(ERROR) << "Could not load TFLite model for embeddings.";
     return nullptr;
   }

   std::unique_ptr<tflite::Interpreter> interpreter =
       executor->CreateInterpreter();
   if (!interpreter) {
     TC3_LOG(ERROR) << "Could not build TFLite interpreter for embeddings.";
     return nullptr;
   }

   if (interpreter->tensors_size() != 2) {
     return nullptr;
   }
   const TfLiteTensor* embeddings = interpreter->tensor(0);
   if (embeddings->dims->size != 2) {
     return nullptr;
   }
   int num_buckets = embeddings->dims->data[0];
   const TfLiteTensor* scales = interpreter->tensor(1);
   if (scales->dims->size != 2 || scales->dims->data[0] != num_buckets ||
       scales->dims->data[1] != 1) {
     return nullptr;
   }
   int bytes_per_embedding = embeddings->dims->data[1];
   if (!CheckQuantizationParams(bytes_per_embedding, quantization_bits,
                                embedding_size)) {
     TC3_LOG(ERROR) << "Mismatch in quantization parameters.";
     return nullptr;
   }

   return std::unique_ptr<TFLiteEmbeddingExecutor>(new TFLiteEmbeddingExecutor(
       std::move(executor), quantization_bits, num_buckets, bytes_per_embedding,
       embedding_size, scales, embeddings, std::move(interpreter),
       embedding_pruning_mask));
 }

 TFLiteEmbeddingExecutor::TFLiteEmbeddingExecutor(
     std::unique_ptr<TfLiteModelExecutor> executor, int quantization_bits,
     int num_buckets, int bytes_per_embedding, int output_embedding_size,
     const TfLiteTensor* scales, const TfLiteTensor* embeddings,
     std::unique_ptr<tflite::Interpreter> interpreter,
     const Model_::EmbeddingPruningMask* embedding_pruning_mask)
     : executor_(std::move(executor)),
       quantization_bits_(quantization_bits),
       num_buckets_(num_buckets),
       bytes_per_embedding_(bytes_per_embedding),
       output_embedding_size_(output_embedding_size),
       scales_(scales),
       embeddings_(embeddings),
       interpreter_(std::move(interpreter)) {
   if ((embedding_pruning_mask != nullptr) &&
       (embedding_pruning_mask->enabled())) {
     for (int i = 0; i < embedding_pruning_mask->pruning_mask()->size(); i++) {
       pruning_mask_.push_back((*(embedding_pruning_mask->pruning_mask()))[i]);
     }
     ComputePrefixCounts();
     full_num_buckets_ = embedding_pruning_mask->full_num_buckets();
     pruned_row_bucket_id_ = embedding_pruning_mask->pruned_row_bucket_id();
   } else {
     full_num_buckets_ = num_buckets;
   }
 }

 void TFLiteEmbeddingExecutor::ComputePrefixCounts() {
   // Pre-compute the prefix sums.
   // For each i in {0, 1,...,pruning_mask_.size()-1}, we compute number of 1s
   // in binary representations of the uint64 values in pruning_mask_ before
   // index i. We set pruned_row_bucket_id_ to the total number of 1s
   // in binary representations of all values in pruning_mask_.
   int count = 0;
   for (const uint64 mask : pruning_mask_) {
     prefix_counts_.push_back(count);
     count += __builtin_popcountll(mask);
   }
 }

 int TFLiteEmbeddingExecutor::PruneBucketId(int bucket_id) const {
   // Implements auxiliary data structure for computing the pruned index of a
   // given bucket_id.
   // If bucket_id is present in pruning_mask_, we compute floor(bucket_id/64),
   // look it up in the auxiliary array prefix_counts_, and add to it the number
   // of 1s before before bucket_id % 64 in the 64-bit sequence
   // pruning_mask_[floor(bucket_id/64)].
   // If bucket_id is absent from pruning_mask_, we return pruned_row_bucket_id_.
   const int bucket_id_major = bucket_id >> 6;
   const int bucket_id_minor = bucket_id & 63;
   uint64_t one = 1;
   if (!(pruning_mask_[bucket_id_major] & (one << bucket_id_minor)))
     return pruned_row_bucket_id_;
   const uint64 zero = 0;
   uint64 minor_mask;
   if (bucket_id_minor == 0)
     minor_mask = zero;
   else
     minor_mask = ((~zero) >> (64 - bucket_id_minor));
   return prefix_counts_[bucket_id_major] +
          __builtin_popcountll(pruning_mask_[bucket_id_major] & minor_mask);
 }

 bool TFLiteEmbeddingExecutor::AddEmbedding(
     const TensorView<int>& sparse_features, float* dest, int dest_size) const {
   if (dest_size != output_embedding_size_) {
     TC3_LOG(ERROR) << "Mismatching dest_size and output_embedding_size: "
                    << dest_size << " " << output_embedding_size_;
     return false;
   }
   const int num_sparse_features = sparse_features.size();
   for (int i = 0; i < num_sparse_features; ++i) {
     const int bucket_id = sparse_features.data()[i];
     int full_num_buckets;
     if (!pruning_mask_.empty()) {
       full_num_buckets = full_num_buckets_;
     } else {
       full_num_buckets = num_buckets_;
     }
     if (bucket_id >= full_num_buckets) {
       return false;
     }
     int final_bucket_id;
     if (!pruning_mask_.empty()) {
       final_bucket_id = PruneBucketId(bucket_id);
     } else {
       final_bucket_id = bucket_id;
     }
     if (!DequantizeAdd(scales_->data.f, embeddings_->data.uint8,
                        bytes_per_embedding_, num_sparse_features,
                        quantization_bits_, final_bucket_id, dest, dest_size)) {
       return false;
     }
   }
   return true;
 }

 }  // namespace libtextclassifier3
	/*
	* Copyright (C) 2018 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include "annotator/model-executor.h"

	#include "annotator/quantization.h"
	#include "utils/base/logging.h"

	namespace libtextclassifier3 {

	TensorView<float> ModelExecutor::ComputeLogits(
	const TensorView<float>& features, tflite::Interpreter* interpreter) const {
	if (!interpreter) {
	return TensorView<float>::Invalid();
	}
	interpreter->ResizeInputTensor(kInputIndexFeatures, features.shape());
	if (interpreter->AllocateTensors() != kTfLiteOk) {
	TC3_VLOG(1) << "Allocation failed.";
	return TensorView<float>::Invalid();
	}

	SetInput<float>(kInputIndexFeatures, features, interpreter);

	if (interpreter->Invoke() != kTfLiteOk) {
	TC3_VLOG(1) << "Interpreter failed.";
	return TensorView<float>::Invalid();
	}

	return OutputView<float>(kOutputIndexLogits, interpreter);
	}

	std::unique_ptr<TFLiteEmbeddingExecutor> TFLiteEmbeddingExecutor::FromBuffer(
	const flatbuffers::Vector<uint8_t>* model_spec_buffer, int embedding_size,
	int quantization_bits,
	const Model_::EmbeddingPruningMask* embedding_pruning_mask) {
	std::unique_ptr<TfLiteModelExecutor> executor =
	TfLiteModelExecutor::FromBuffer(model_spec_buffer);
	if (!executor) {
	TC3_LOG(ERROR) << "Could not load TFLite model for embeddings.";
	return nullptr;
	}

	std::unique_ptr<tflite::Interpreter> interpreter =
	executor->CreateInterpreter();
	if (!interpreter) {
	TC3_LOG(ERROR) << "Could not build TFLite interpreter for embeddings.";
	return nullptr;
	}

	if (interpreter->tensors_size() != 2) {
	return nullptr;
	}
	const TfLiteTensor* embeddings = interpreter->tensor(0);
	if (embeddings->dims->size != 2) {
	return nullptr;
	}
	int num_buckets = embeddings->dims->data[0];
	const TfLiteTensor* scales = interpreter->tensor(1);
	if (scales->dims->size != 2 \|\| scales->dims->data[0] != num_buckets \|\|
	scales->dims->data[1] != 1) {
	return nullptr;
	}
	int bytes_per_embedding = embeddings->dims->data[1];
	if (!CheckQuantizationParams(bytes_per_embedding, quantization_bits,
	embedding_size)) {
	TC3_LOG(ERROR) << "Mismatch in quantization parameters.";
	return nullptr;
	}

	return std::unique_ptr<TFLiteEmbeddingExecutor>(new TFLiteEmbeddingExecutor(
	std::move(executor), quantization_bits, num_buckets, bytes_per_embedding,
	embedding_size, scales, embeddings, std::move(interpreter),
	embedding_pruning_mask));
	}

	TFLiteEmbeddingExecutor::TFLiteEmbeddingExecutor(
	std::unique_ptr<TfLiteModelExecutor> executor, int quantization_bits,
	int num_buckets, int bytes_per_embedding, int output_embedding_size,
	const TfLiteTensor* scales, const TfLiteTensor* embeddings,
	std::unique_ptr<tflite::Interpreter> interpreter,
	const Model_::EmbeddingPruningMask* embedding_pruning_mask)
	: executor_(std::move(executor)),
	quantization_bits_(quantization_bits),
	num_buckets_(num_buckets),
	bytes_per_embedding_(bytes_per_embedding),
	output_embedding_size_(output_embedding_size),
	scales_(scales),
	embeddings_(embeddings),
	interpreter_(std::move(interpreter)) {
	if ((embedding_pruning_mask != nullptr) &&
	(embedding_pruning_mask->enabled())) {
	for (int i = 0; i < embedding_pruning_mask->pruning_mask()->size(); i++) {
	pruning_mask_.push_back((*(embedding_pruning_mask->pruning_mask()))[i]);
	}
	ComputePrefixCounts();
	full_num_buckets_ = embedding_pruning_mask->full_num_buckets();
	pruned_row_bucket_id_ = embedding_pruning_mask->pruned_row_bucket_id();
	} else {
	full_num_buckets_ = num_buckets;
	}
	}

	void TFLiteEmbeddingExecutor::ComputePrefixCounts() {
	// Pre-compute the prefix sums.
	// For each i in {0, 1,...,pruning_mask_.size()-1}, we compute number of 1s
	// in binary representations of the uint64 values in pruning_mask_ before
	// index i. We set pruned_row_bucket_id_ to the total number of 1s
	// in binary representations of all values in pruning_mask_.
	int count = 0;
	for (const uint64 mask : pruning_mask_) {
	prefix_counts_.push_back(count);
	count += __builtin_popcountll(mask);
	}
	}

	int TFLiteEmbeddingExecutor::PruneBucketId(int bucket_id) const {
	// Implements auxiliary data structure for computing the pruned index of a
	// given bucket_id.
	// If bucket_id is present in pruning_mask_, we compute floor(bucket_id/64),
	// look it up in the auxiliary array prefix_counts_, and add to it the number
	// of 1s before before bucket_id % 64 in the 64-bit sequence
	// pruning_mask_[floor(bucket_id/64)].
	// If bucket_id is absent from pruning_mask_, we return pruned_row_bucket_id_.
	const int bucket_id_major = bucket_id >> 6;
	const int bucket_id_minor = bucket_id & 63;
	uint64_t one = 1;
	if (!(pruning_mask_[bucket_id_major] & (one << bucket_id_minor)))
	return pruned_row_bucket_id_;
	const uint64 zero = 0;
	uint64 minor_mask;
	if (bucket_id_minor == 0)
	minor_mask = zero;
	else
	minor_mask = ((~zero) >> (64 - bucket_id_minor));
	return prefix_counts_[bucket_id_major] +
	__builtin_popcountll(pruning_mask_[bucket_id_major] & minor_mask);
	}

	bool TFLiteEmbeddingExecutor::AddEmbedding(
	const TensorView<int>& sparse_features, float* dest, int dest_size) const {
	if (dest_size != output_embedding_size_) {
	TC3_LOG(ERROR) << "Mismatching dest_size and output_embedding_size: "
	<< dest_size << " " << output_embedding_size_;
	return false;
	}
	const int num_sparse_features = sparse_features.size();
	for (int i = 0; i < num_sparse_features; ++i) {
	const int bucket_id = sparse_features.data()[i];
	int full_num_buckets;
	if (!pruning_mask_.empty()) {
	full_num_buckets = full_num_buckets_;
	} else {
	full_num_buckets = num_buckets_;
	}
	if (bucket_id >= full_num_buckets) {
	return false;
	}
	int final_bucket_id;
	if (!pruning_mask_.empty()) {
	final_bucket_id = PruneBucketId(bucket_id);
	} else {
	final_bucket_id = bucket_id;
	}
	if (!DequantizeAdd(scales_->data.f, embeddings_->data.uint8,
	bytes_per_embedding_, num_sparse_features,
	quantization_bits_, final_bucket_id, dest, dest_size)) {
	return false;
	}
	}
	return true;
	}

	} // namespace libtextclassifier3