blob: 0f1d767c816b90cdc1d86700d6d5d69cabf5beec [file] [log] [blame]
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <google/protobuf/compiler/cpp/parse_function_generator.h>
#include <algorithm>
#include <limits>
#include <string>
#include <utility>
#include <google/protobuf/wire_format.h>
#include <google/protobuf/compiler/cpp/helpers.h>
namespace google {
namespace protobuf {
namespace compiler {
namespace cpp {
namespace {
using google::protobuf::internal::WireFormat;
using google::protobuf::internal::WireFormatLite;
std::vector<const FieldDescriptor*> GetOrderedFields(
const Descriptor* descriptor, const Options& options) {
std::vector<const FieldDescriptor*> ordered_fields;
for (auto field : FieldRange(descriptor)) {
if (!IsFieldStripped(field, options)) {
ordered_fields.push_back(field);
}
}
std::sort(ordered_fields.begin(), ordered_fields.end(),
[](const FieldDescriptor* a, const FieldDescriptor* b) {
return a->number() < b->number();
});
return ordered_fields;
}
bool HasInternalAccessors(const FieldOptions::CType ctype) {
return ctype == FieldOptions::STRING || ctype == FieldOptions::CORD;
}
int TagSize(uint32_t field_number) {
if (field_number < 16) return 1;
GOOGLE_CHECK_LT(field_number, (1 << 14))
<< "coded tag for " << field_number << " too big for uint16_t";
return 2;
}
std::string FieldParseFunctionName(
const TailCallTableInfo::FieldEntryInfo& entry, const Options& options);
bool IsFieldEligibleForFastParsing(
const TailCallTableInfo::FieldEntryInfo& entry, const Options& options,
MessageSCCAnalyzer* scc_analyzer) {
const auto* field = entry.field;
// Map, oneof, weak, and lazy fields are not handled on the fast path.
if (field->is_map() || field->real_containing_oneof() ||
field->options().weak() ||
IsImplicitWeakField(field, options, scc_analyzer) ||
IsLazy(field, options, scc_analyzer)) {
return false;
}
// We will check for a valid auxiliary index range later. However, we might
// want to change the value we check for inlined string fields.
int aux_idx = entry.aux_idx;
switch (field->type()) {
case FieldDescriptor::TYPE_ENUM:
// If enum values are not validated at parse time, then this field can be
// handled on the fast path like an int32.
if (HasPreservingUnknownEnumSemantics(field)) {
break;
}
if (field->is_repeated() && field->is_packed()) {
return false;
}
break;
// Some bytes fields can be handled on fast path.
case FieldDescriptor::TYPE_STRING:
case FieldDescriptor::TYPE_BYTES:
if (field->options().ctype() != FieldOptions::STRING) {
return false;
}
if (IsStringInlined(field, options)) {
GOOGLE_CHECK(!field->is_repeated());
// For inlined strings, the donation state index is stored in the
// `aux_idx` field of the fast parsing info. We need to check the range
// of that value instead of the auxiliary index.
aux_idx = entry.inlined_string_idx;
}
break;
default:
break;
}
if (HasHasbit(field)) {
// The tailcall parser can only update the first 32 hasbits. Fields with
// has-bits beyond the first 32 are handled by mini parsing/fallback.
GOOGLE_CHECK_GE(entry.hasbit_idx, 0) << field->DebugString();
if (entry.hasbit_idx >= 32) return false;
}
// If the field needs auxiliary data, then the aux index is needed. This
// must fit in a uint8_t.
if (aux_idx > std::numeric_limits<uint8_t>::max()) {
return false;
}
// The largest tag that can be read by the tailcall parser is two bytes
// when varint-coded. This allows 14 bits for the numeric tag value:
// byte 0 byte 1
// 1nnnnttt 0nnnnnnn
// ^^^^^^^ ^^^^^^^
if (field->number() >= 1 << 11) return false;
return true;
}
std::vector<TailCallTableInfo::FastFieldInfo> SplitFastFieldsForSize(
const std::vector<TailCallTableInfo::FieldEntryInfo>& field_entries,
int table_size_log2, const Options& options,
MessageSCCAnalyzer* scc_analyzer) {
std::vector<TailCallTableInfo::FastFieldInfo> result(1 << table_size_log2);
const uint32_t idx_mask = result.size() - 1;
for (const auto& entry : field_entries) {
if (!IsFieldEligibleForFastParsing(entry, options, scc_analyzer)) {
continue;
}
const auto* field = entry.field;
uint32_t tag = WireFormat::MakeTag(field);
// Construct the varint-coded tag. If it is more than 7 bits, we need to
// shift the high bits and add a continue bit.
if (uint32_t hibits = tag & 0xFFFFFF80) {
tag = tag + hibits + 128; // tag = lobits + 2*hibits + 128
}
// The field index is determined by the low bits of the field number, where
// the table size determines the width of the mask. The largest table
// supported is 32 entries. The parse loop uses these bits directly, so that
// the dispatch does not require arithmetic:
// byte 0 byte 1
// tag: 1nnnnttt 0nnnnnnn
// ^^^^^
// idx (table_size_log2=5)
// This means that any field number that does not fit in the lower 4 bits
// will always have the top bit of its table index asserted.
const uint32_t fast_idx = (tag >> 3) & idx_mask;
TailCallTableInfo::FastFieldInfo& info = result[fast_idx];
if (info.field != nullptr) {
// This field entry is already filled.
continue;
}
// Fill in this field's entry:
GOOGLE_CHECK(info.func_name.empty()) << info.func_name;
info.func_name = FieldParseFunctionName(entry, options);
info.field = field;
info.coded_tag = tag;
// If this field does not have presence, then it can set an out-of-bounds
// bit (tailcall parsing uses a uint64_t for hasbits, but only stores 32).
info.hasbit_idx = HasHasbit(field) ? entry.hasbit_idx : 63;
if (IsStringInlined(field, options)) {
GOOGLE_CHECK(!field->is_repeated());
info.aux_idx = static_cast<uint8_t>(entry.inlined_string_idx);
} else {
info.aux_idx = static_cast<uint8_t>(entry.aux_idx);
}
}
return result;
}
// Filter out fields that will be handled by mini parsing.
std::vector<const FieldDescriptor*> FilterMiniParsedFields(
const std::vector<const FieldDescriptor*>& fields, const Options& options,
MessageSCCAnalyzer* scc_analyzer) {
std::vector<const FieldDescriptor*> generated_fallback_fields;
for (const auto* field : fields) {
bool handled = false;
switch (field->type()) {
case FieldDescriptor::TYPE_DOUBLE:
case FieldDescriptor::TYPE_FLOAT:
case FieldDescriptor::TYPE_FIXED32:
case FieldDescriptor::TYPE_SFIXED32:
case FieldDescriptor::TYPE_FIXED64:
case FieldDescriptor::TYPE_SFIXED64:
case FieldDescriptor::TYPE_BOOL:
case FieldDescriptor::TYPE_UINT32:
case FieldDescriptor::TYPE_SINT32:
case FieldDescriptor::TYPE_INT32:
case FieldDescriptor::TYPE_UINT64:
case FieldDescriptor::TYPE_SINT64:
case FieldDescriptor::TYPE_INT64:
// These are handled by MiniParse, so we don't need any generated
// fallback code.
handled = true;
break;
case FieldDescriptor::TYPE_ENUM:
if (field->is_repeated() && !HasPreservingUnknownEnumSemantics(field)) {
// TODO(b/206890171): handle packed repeated closed enums
// Non-packed repeated can be handled using tables, but we still
// need to generate fallback code for all repeated enums in order to
// handle packed encoding. This is because of the lite/full split
// when handling invalid enum values in a packed field.
handled = false;
} else {
handled = true;
}
break;
case FieldDescriptor::TYPE_BYTES:
case FieldDescriptor::TYPE_STRING:
if (IsStringInlined(field, options)) {
// TODO(b/198211897): support InilnedStringField.
handled = false;
} else {
handled = true;
}
break;
case FieldDescriptor::TYPE_MESSAGE:
case FieldDescriptor::TYPE_GROUP:
// TODO(b/210762816): support remaining field types.
if (field->is_map() || IsWeak(field, options) ||
IsImplicitWeakField(field, options, scc_analyzer) ||
IsLazy(field, options, scc_analyzer)) {
handled = false;
} else {
handled = true;
}
break;
default:
handled = false;
break;
}
if (!handled) generated_fallback_fields.push_back(field);
}
return generated_fallback_fields;
}
} // namespace
TailCallTableInfo::TailCallTableInfo(
const Descriptor* descriptor, const Options& options,
const std::vector<const FieldDescriptor*>& ordered_fields,
const std::vector<int>& has_bit_indices,
const std::vector<int>& inlined_string_indices,
MessageSCCAnalyzer* scc_analyzer) {
int oneof_count = descriptor->real_oneof_decl_count();
// If this message has any oneof fields, store the case offset in the first
// auxiliary entry.
if (oneof_count > 0) {
GOOGLE_LOG_IF(DFATAL, ordered_fields.empty())
<< "Invalid message: " << descriptor->full_name() << " has "
<< oneof_count << " oneof declarations, but no fields";
aux_entries.push_back(StrCat("_fl::Offset{offsetof(",
ClassName(descriptor),
", _impl_._oneof_case_)}"));
}
// If this message has any inlined string fields, store the donation state
// offset in the second auxiliary entry.
if (!inlined_string_indices.empty()) {
aux_entries.resize(2); // pad if necessary
aux_entries[1] =
StrCat("_fl::Offset{offsetof(", ClassName(descriptor),
", _impl_._inlined_string_donated_)}");
}
// Fill in mini table entries.
for (const FieldDescriptor* field : ordered_fields) {
field_entries.push_back(
{field, (HasHasbit(field) ? has_bit_indices[field->index()] : -1)});
auto& entry = field_entries.back();
if (field->type() == FieldDescriptor::TYPE_MESSAGE ||
field->type() == FieldDescriptor::TYPE_GROUP) {
// Message-typed fields have a FieldAux with the default instance pointer.
if (field->is_map()) {
// TODO(b/205904770): generate aux entries for maps
} else if (IsWeak(field, options)) {
// Don't generate anything for weak fields. They are handled by the
// generated fallback.
} else if (IsImplicitWeakField(field, options, scc_analyzer)) {
// Implicit weak fields don't need to store a default instance pointer.
} else if (IsLazy(field, options, scc_analyzer)) {
// Lazy fields are handled by the generated fallback function.
} else {
field_entries.back().aux_idx = aux_entries.size();
const Descriptor* field_type = field->message_type();
aux_entries.push_back(StrCat(
"reinterpret_cast<const ", QualifiedClassName(field_type, options),
"*>(&", QualifiedDefaultInstanceName(field_type, options), ")"));
}
} else if (field->type() == FieldDescriptor::TYPE_ENUM &&
!HasPreservingUnknownEnumSemantics(field)) {
// Enum fields which preserve unknown values (proto3 behavior) are
// effectively int32 fields with respect to parsing -- i.e., the value
// does not need to be validated at parse time.
//
// Enum fields which do not preserve unknown values (proto2 behavior) use
// a FieldAux to store validation information. If the enum values are
// sequential (and within a range we can represent), then the FieldAux
// entry represents the range using the minimum value (which must fit in
// an int16_t) and count (a uint16_t). Otherwise, the entry holds a
// pointer to the generated Name_IsValid function.
entry.aux_idx = aux_entries.size();
const EnumDescriptor* enum_type = field->enum_type();
GOOGLE_CHECK_GT(enum_type->value_count(), 0) << enum_type->DebugString();
// Check if the enum values are a single, contiguous range.
std::vector<int> enum_values;
for (int i = 0, N = enum_type->value_count(); i < N; ++i) {
enum_values.push_back(enum_type->value(i)->number());
}
auto values_begin = enum_values.begin();
auto values_end = enum_values.end();
std::sort(values_begin, values_end);
enum_values.erase(std::unique(values_begin, values_end), values_end);
if (enum_values.back() - enum_values[0] == enum_values.size() - 1 &&
enum_values[0] >= std::numeric_limits<int16_t>::min() &&
enum_values[0] <= std::numeric_limits<int16_t>::max() &&
enum_values.size() <= std::numeric_limits<uint16_t>::max()) {
entry.is_enum_range = true;
aux_entries.push_back(
StrCat(enum_values[0], ", ", enum_values.size()));
} else {
entry.is_enum_range = false;
aux_entries.push_back(
StrCat(QualifiedClassName(enum_type, options), "_IsValid"));
}
} else if ((field->type() == FieldDescriptor::TYPE_STRING ||
field->type() == FieldDescriptor::TYPE_BYTES) &&
IsStringInlined(field, options)) {
GOOGLE_CHECK(!field->is_repeated());
// Inlined strings have an extra marker to represent their donation state.
int idx = inlined_string_indices[field->index()];
// For mini parsing, the donation state index is stored as an `offset`
// auxiliary entry.
entry.aux_idx = aux_entries.size();
aux_entries.push_back(StrCat("_fl::Offset{", idx, "}"));
// For fast table parsing, the donation state index is stored instead of
// the aux_idx (this will limit the range to 8 bits).
entry.inlined_string_idx = idx;
}
}
// Choose the smallest fast table that covers the maximum number of fields.
table_size_log2 = 0; // fallback value
int num_fast_fields = -1;
for (int try_size_log2 : {0, 1, 2, 3, 4, 5}) {
size_t try_size = 1 << try_size_log2;
auto split_fields = SplitFastFieldsForSize(field_entries, try_size_log2,
options, scc_analyzer);
GOOGLE_CHECK_EQ(split_fields.size(), try_size);
int try_num_fast_fields = 0;
for (const auto& info : split_fields) {
if (info.field != nullptr) ++try_num_fast_fields;
}
// Use this size if (and only if) it covers more fields.
if (try_num_fast_fields > num_fast_fields) {
fast_path_fields = std::move(split_fields);
table_size_log2 = try_size_log2;
num_fast_fields = try_num_fast_fields;
}
// The largest table we allow has the same number of entries as the message
// has fields, rounded up to the next power of 2 (e.g., a message with 5
// fields can have a fast table of size 8). A larger table *might* cover
// more fields in certain cases, but a larger table in that case would have
// mostly empty entries; so, we cap the size to avoid pathologically sparse
// tables.
if (try_size > ordered_fields.size()) {
break;
}
}
// Filter out fields that are handled by MiniParse. We don't need to generate
// a fallback for these, which saves code size.
fallback_fields = FilterMiniParsedFields(ordered_fields, options,
scc_analyzer);
// If there are no fallback fields, and at most one extension range, the
// parser can use a generic fallback function. Otherwise, a message-specific
// fallback routine is needed.
use_generated_fallback =
!fallback_fields.empty() || descriptor->extension_range_count() > 1;
}
ParseFunctionGenerator::ParseFunctionGenerator(
const Descriptor* descriptor, int max_has_bit_index,
const std::vector<int>& has_bit_indices,
const std::vector<int>& inlined_string_indices, const Options& options,
MessageSCCAnalyzer* scc_analyzer,
const std::map<std::string, std::string>& vars)
: descriptor_(descriptor),
scc_analyzer_(scc_analyzer),
options_(options),
variables_(vars),
inlined_string_indices_(inlined_string_indices),
ordered_fields_(GetOrderedFields(descriptor_, options_)),
num_hasbits_(max_has_bit_index) {
if (should_generate_tctable()) {
tc_table_info_.reset(new TailCallTableInfo(
descriptor_, options_, ordered_fields_, has_bit_indices,
inlined_string_indices, scc_analyzer));
}
SetCommonVars(options_, &variables_);
SetCommonMessageDataVariables(descriptor_, &variables_);
SetUnknownFieldsVariable(descriptor_, options_, &variables_);
variables_["classname"] = ClassName(descriptor, false);
}
void ParseFunctionGenerator::GenerateMethodDecls(io::Printer* printer) {
Formatter format(printer, variables_);
if (should_generate_tctable()) {
format.Outdent();
if (should_generate_guarded_tctable()) {
format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n");
}
format(
" private:\n"
" static const char* Tct_ParseFallback(PROTOBUF_TC_PARAM_DECL);\n"
" public:\n");
if (should_generate_guarded_tctable()) {
format("#endif\n");
}
format.Indent();
}
format(
"const char* _InternalParse(const char* ptr, "
"::$proto_ns$::internal::ParseContext* ctx) final;\n");
}
void ParseFunctionGenerator::GenerateMethodImpls(io::Printer* printer) {
Formatter format(printer, variables_);
bool need_parse_function = true;
if (descriptor_->options().message_set_wire_format()) {
// Special-case MessageSet.
need_parse_function = false;
format(
"const char* $classname$::_InternalParse(const char* ptr,\n"
" ::_pbi::ParseContext* ctx) {\n"
"$annotate_deserialize$");
if (!options_.unverified_lazy_message_sets &&
ShouldVerify(descriptor_, options_, scc_analyzer_)) {
format(
" ctx->set_lazy_eager_verify_func(&$classname$::InternalVerify);\n");
}
format(
" return $extensions$.ParseMessageSet(ptr, \n"
" internal_default_instance(), &_internal_metadata_, ctx);\n"
"}\n");
}
if (!should_generate_tctable()) {
if (need_parse_function) {
GenerateLoopingParseFunction(format);
}
return;
}
if (should_generate_guarded_tctable()) {
format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n\n");
}
if (need_parse_function) {
GenerateTailcallParseFunction(format);
}
if (tc_table_info_->use_generated_fallback) {
GenerateTailcallFallbackFunction(format);
}
if (should_generate_guarded_tctable()) {
if (need_parse_function) {
format("\n#else // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n\n");
GenerateLoopingParseFunction(format);
}
format("\n#endif // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n");
}
}
bool ParseFunctionGenerator::should_generate_tctable() const {
if (options_.tctable_mode == Options::kTCTableNever) {
return false;
}
return true;
}
void ParseFunctionGenerator::GenerateTailcallParseFunction(Formatter& format) {
GOOGLE_CHECK(should_generate_tctable());
// Generate an `_InternalParse` that starts the tail-calling loop.
format(
"const char* $classname$::_InternalParse(\n"
" const char* ptr, ::_pbi::ParseContext* ctx) {\n"
"$annotate_deserialize$"
" ptr = ::_pbi::TcParser::ParseLoop(this, ptr, ctx, "
"&_table_.header);\n");
format(
" return ptr;\n"
"}\n\n");
}
void ParseFunctionGenerator::GenerateTailcallFallbackFunction(
Formatter& format) {
GOOGLE_CHECK(should_generate_tctable());
format(
"const char* $classname$::Tct_ParseFallback(PROTOBUF_TC_PARAM_DECL) {\n"
"#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) return nullptr\n");
format.Indent();
format("auto* typed_msg = static_cast<$classname$*>(msg);\n");
if (num_hasbits_ > 0) {
// Sync hasbits
format("typed_msg->_impl_._has_bits_[0] = hasbits;\n");
}
format("uint32_t tag = data.tag();\n");
format.Set("msg", "typed_msg->");
format.Set("this", "typed_msg");
format.Set("has_bits", "typed_msg->_impl_._has_bits_");
format.Set("next_tag", "goto next_tag");
GenerateParseIterationBody(format, descriptor_,
tc_table_info_->fallback_fields);
format.Outdent();
format(
"next_tag:\n"
"message_done:\n"
" return ptr;\n"
"#undef CHK_\n"
"}\n");
}
struct SkipEntry16 {
uint16_t skipmap;
uint16_t field_entry_offset;
};
struct SkipEntryBlock {
uint32_t first_fnum;
std::vector<SkipEntry16> entries;
};
struct NumToEntryTable {
uint32_t skipmap32; // for fields #1 - #32
std::vector<SkipEntryBlock> blocks;
// Compute the number of uint16_t required to represent this table.
int size16() const {
int size = 2; // for the termination field#
for (const auto& block : blocks) {
// 2 for the field#, 1 for a count of skip entries, 2 for each entry.
size += 3 + block.entries.size() * 2;
}
return size;
}
};
static NumToEntryTable MakeNumToEntryTable(
const std::vector<const FieldDescriptor*>& field_descriptors);
void ParseFunctionGenerator::GenerateDataDecls(io::Printer* printer) {
if (!should_generate_tctable()) {
return;
}
Formatter format(printer, variables_);
if (should_generate_guarded_tctable()) {
format.Outdent();
format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n");
format.Indent();
}
auto field_num_to_entry_table = MakeNumToEntryTable(ordered_fields_);
format(
"static const ::$proto_ns$::internal::"
"TcParseTable<$1$, $2$, $3$, $4$, $5$> _table_;\n",
tc_table_info_->table_size_log2, ordered_fields_.size(),
tc_table_info_->aux_entries.size(), CalculateFieldNamesSize(),
field_num_to_entry_table.size16());
if (should_generate_guarded_tctable()) {
format.Outdent();
format("#endif // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n");
format.Indent();
}
}
void ParseFunctionGenerator::GenerateDataDefinitions(io::Printer* printer) {
if (!should_generate_tctable()) {
return;
}
Formatter format(printer, variables_);
if (should_generate_guarded_tctable()) {
format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n");
}
GenerateTailCallTable(format);
if (should_generate_guarded_tctable()) {
format("#endif // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n");
}
}
void ParseFunctionGenerator::GenerateLoopingParseFunction(Formatter& format) {
format(
"const char* $classname$::_InternalParse(const char* ptr, "
"::_pbi::ParseContext* ctx) {\n"
"$annotate_deserialize$"
"#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure\n");
format.Indent();
format.Set("msg", "");
format.Set("this", "this");
int hasbits_size = 0;
if (num_hasbits_ > 0) {
hasbits_size = (num_hasbits_ + 31) / 32;
}
// For now only optimize small hasbits.
if (hasbits_size != 1) hasbits_size = 0;
if (hasbits_size) {
format("_Internal::HasBits has_bits{};\n");
format.Set("has_bits", "has_bits");
} else {
format.Set("has_bits", "_impl_._has_bits_");
}
format.Set("next_tag", "continue");
format("while (!ctx->Done(&ptr)) {\n");
format.Indent();
format(
"uint32_t tag;\n"
"ptr = ::_pbi::ReadTag(ptr, &tag);\n");
GenerateParseIterationBody(format, descriptor_, ordered_fields_);
format.Outdent();
format("} // while\n");
format.Outdent();
format("message_done:\n");
if (hasbits_size) format(" _impl_._has_bits_.Or(has_bits);\n");
format(
" return ptr;\n"
"failure:\n"
" ptr = nullptr;\n"
" goto message_done;\n"
"#undef CHK_\n"
"}\n");
}
static NumToEntryTable MakeNumToEntryTable(
const std::vector<const FieldDescriptor*>& field_descriptors) {
NumToEntryTable num_to_entry_table;
num_to_entry_table.skipmap32 = static_cast<uint32_t>(-1);
// skip_entry_block is the current block of SkipEntries that we're
// appending to. cur_block_first_fnum is the number of the first
// field represented by the block.
uint16_t field_entry_index = 0;
uint16_t N = field_descriptors.size();
// First, handle field numbers 1-32, which affect only the initial
// skipmap32 and don't generate additional skip-entry blocks.
for (; field_entry_index != N; ++field_entry_index) {
auto* field_descriptor = field_descriptors[field_entry_index];
if (field_descriptor->number() > 32) break;
auto skipmap32_index = field_descriptor->number() - 1;
num_to_entry_table.skipmap32 -= 1 << skipmap32_index;
}
// If all the field numbers were less than or equal to 32, we will have
// no further entries to process, and we are already done.
if (field_entry_index == N) return num_to_entry_table;
SkipEntryBlock* block = nullptr;
bool start_new_block = true;
// To determine sparseness, track the field number corresponding to
// the start of the most recent skip entry.
uint32_t last_skip_entry_start = 0;
for (; field_entry_index != N; ++field_entry_index) {
auto* field_descriptor = field_descriptors[field_entry_index];
uint32_t fnum = field_descriptor->number();
GOOGLE_CHECK_GT(fnum, last_skip_entry_start);
if (start_new_block == false) {
// If the next field number is within 15 of the last_skip_entry_start, we
// continue writing just to that entry. If it's between 16 and 31 more,
// then we just extend the current block by one. If it's more than 31
// more, we have to add empty skip entries in order to continue using the
// existing block. Obviously it's just 32 more, it doesn't make sense to
// start a whole new block, since new blocks mean having to write out
// their starting field number, which is 32 bits, as well as the size of
// the additional block, which is 16... while an empty SkipEntry16 only
// costs 32 bits. So if it was 48 more, it's a slight space win; we save
// 16 bits, but probably at the cost of slower run time. We're choosing
// 96 for now.
if (fnum - last_skip_entry_start > 96) start_new_block = true;
}
if (start_new_block) {
num_to_entry_table.blocks.push_back(SkipEntryBlock{fnum});
block = &num_to_entry_table.blocks.back();
start_new_block = false;
}
auto skip_entry_num = (fnum - block->first_fnum) / 16;
auto skip_entry_index = (fnum - block->first_fnum) % 16;
while (skip_entry_num >= block->entries.size())
block->entries.push_back({0xFFFF, field_entry_index});
block->entries[skip_entry_num].skipmap -= 1 << (skip_entry_index);
last_skip_entry_start = fnum - skip_entry_index;
}
return num_to_entry_table;
}
void ParseFunctionGenerator::GenerateTailCallTable(Formatter& format) {
GOOGLE_CHECK(should_generate_tctable());
// All entries without a fast-path parsing function need a fallback.
std::string fallback;
if (tc_table_info_->use_generated_fallback) {
fallback = ClassName(descriptor_) + "::Tct_ParseFallback";
} else {
fallback = "::_pbi::TcParser::GenericFallback";
if (GetOptimizeFor(descriptor_->file(), options_) ==
FileOptions::LITE_RUNTIME) {
fallback += "Lite";
}
}
// For simplicity and speed, the table is not covering all proto
// configurations. This model uses a fallback to cover all situations that
// the table can't accommodate, together with unknown fields or extensions.
// These are number of fields over 32, fields with 3 or more tag bytes,
// maps, weak fields, lazy, more than 1 extension range. In the cases
// the table is sufficient we can use a generic routine, that just handles
// unknown fields and potentially an extension range.
auto field_num_to_entry_table = MakeNumToEntryTable(ordered_fields_);
format(
"PROTOBUF_ATTRIBUTE_INIT_PRIORITY1\n"
"const ::_pbi::TcParseTable<$1$, $2$, $3$, $4$, $5$> "
"$classname$::_table_ = "
"{\n",
tc_table_info_->table_size_log2, ordered_fields_.size(),
tc_table_info_->aux_entries.size(), CalculateFieldNamesSize(),
field_num_to_entry_table.size16());
{
auto table_scope = format.ScopedIndent();
format("{\n");
{
auto header_scope = format.ScopedIndent();
if (num_hasbits_ > 0 || IsMapEntryMessage(descriptor_)) {
format("PROTOBUF_FIELD_OFFSET($classname$, _impl_._has_bits_),\n");
} else {
format("0, // no _has_bits_\n");
}
if (descriptor_->extension_range_count() == 1) {
format(
"PROTOBUF_FIELD_OFFSET($classname$, $extensions$),\n"
"$1$, $2$, // extension_range_{low,high}\n",
descriptor_->extension_range(0)->start,
descriptor_->extension_range(0)->end);
} else {
format("0, 0, 0, // no _extensions_\n");
}
format("$1$, $2$, // max_field_number, fast_idx_mask\n",
(ordered_fields_.empty() ? 0 : ordered_fields_.back()->number()),
(((1 << tc_table_info_->table_size_log2) - 1) << 3));
format(
"offsetof(decltype(_table_), field_lookup_table),\n"
"$1$, // skipmap\n",
field_num_to_entry_table.skipmap32);
if (ordered_fields_.empty()) {
format(
"offsetof(decltype(_table_), field_names), // no field_entries\n");
} else {
format("offsetof(decltype(_table_), field_entries),\n");
}
format(
"$1$, // num_field_entries\n"
"$2$, // num_aux_entries\n",
ordered_fields_.size(), tc_table_info_->aux_entries.size());
if (tc_table_info_->aux_entries.empty()) {
format(
"offsetof(decltype(_table_), field_names), // no aux_entries\n");
} else {
format("offsetof(decltype(_table_), aux_entries),\n");
}
format(
"&$1$._instance,\n"
"$2$, // fallback\n"
"",
DefaultInstanceName(descriptor_, options_), fallback);
}
format("}, {{\n");
{
// fast_entries[]
auto fast_scope = format.ScopedIndent();
GenerateFastFieldEntries(format);
}
format("}}, {{\n");
{
// field_lookup_table[]
auto field_lookup_scope = format.ScopedIndent();
int line_entries = 0;
for (int i = 0, N = field_num_to_entry_table.blocks.size(); i < N; ++i) {
SkipEntryBlock& entry_block = field_num_to_entry_table.blocks[i];
format("$1$, $2$, $3$,\n", entry_block.first_fnum & 65535,
entry_block.first_fnum / 65536, entry_block.entries.size());
for (auto se16 : entry_block.entries) {
if (line_entries == 0) {
format("$1$, $2$,", se16.skipmap, se16.field_entry_offset);
++line_entries;
} else if (line_entries < 5) {
format(" $1$, $2$,", se16.skipmap, se16.field_entry_offset);
++line_entries;
} else {
format(" $1$, $2$,\n", se16.skipmap, se16.field_entry_offset);
line_entries = 0;
}
}
}
if (line_entries) format("\n");
format("65535, 65535\n");
}
if (ordered_fields_.empty()) {
GOOGLE_LOG_IF(DFATAL, !tc_table_info_->aux_entries.empty())
<< "Invalid message: " << descriptor_->full_name() << " has "
<< tc_table_info_->aux_entries.size()
<< " auxiliary field entries, but no fields";
format(
"}},\n"
"// no field_entries, or aux_entries\n"
"{{\n");
} else {
format("}}, {{\n");
{
// field_entries[]
auto field_scope = format.ScopedIndent();
GenerateFieldEntries(format);
}
if (tc_table_info_->aux_entries.empty()) {
format(
"}},\n"
"// no aux_entries\n"
"{{\n");
} else {
format("}}, {{\n");
{
// aux_entries[]
auto aux_scope = format.ScopedIndent();
for (const std::string& aux_entry : tc_table_info_->aux_entries) {
format("{$1$},\n", aux_entry);
}
}
format("}}, {{\n");
}
} // ordered_fields_.empty()
{
// field_names[]
auto field_name_scope = format.ScopedIndent();
GenerateFieldNames(format);
}
format("}},\n");
}
format("};\n\n"); // _table_
}
void ParseFunctionGenerator::GenerateFastFieldEntries(Formatter& format) {
for (const auto& info : tc_table_info_->fast_path_fields) {
if (info.field != nullptr) {
PrintFieldComment(format, info.field);
}
if (info.func_name.empty()) {
format("{::_pbi::TcParser::MiniParse, {}},\n");
} else {
bool cold = ShouldSplit(info.field, options_);
format(
"{$1$,\n"
" {$2$, $3$, $4$, PROTOBUF_FIELD_OFFSET($classname$$5$, $6$)}},\n",
info.func_name, info.coded_tag, info.hasbit_idx, info.aux_idx,
cold ? "::Impl_::Split" : "",
cold ? FieldName(info.field) + "_"
: FieldMemberName(info.field, /*cold=*/false));
}
}
}
static void FormatFieldKind(Formatter& format,
const TailCallTableInfo::FieldEntryInfo& entry,
const Options& options,
MessageSCCAnalyzer* scc_analyzer) {
const FieldDescriptor* field = entry.field;
// Spell the field kind in proto language declaration order, starting with
// cardinality:
format("(::_fl::kFc");
if (HasHasbit(field)) {
format("Optional");
} else if (field->is_repeated()) {
format("Repeated");
} else if (field->real_containing_oneof()) {
format("Oneof");
} else {
format("Singular");
}
// The rest of the type uses convenience aliases:
format(" | ::_fl::k");
if (field->is_repeated() && field->is_packed()) {
format("Packed");
}
switch (field->type()) {
case FieldDescriptor::TYPE_DOUBLE:
format("Double");
break;
case FieldDescriptor::TYPE_FLOAT:
format("Float");
break;
case FieldDescriptor::TYPE_FIXED32:
format("Fixed32");
break;
case FieldDescriptor::TYPE_SFIXED32:
format("SFixed32");
break;
case FieldDescriptor::TYPE_FIXED64:
format("Fixed64");
break;
case FieldDescriptor::TYPE_SFIXED64:
format("SFixed64");
break;
case FieldDescriptor::TYPE_BOOL:
format("Bool");
break;
case FieldDescriptor::TYPE_ENUM:
if (HasPreservingUnknownEnumSemantics(field)) {
// No validation is required.
format("OpenEnum");
} else if (entry.is_enum_range) {
// Validation is done by range check (start/length in FieldAux).
format("EnumRange");
} else {
// Validation uses the generated _IsValid function.
format("Enum");
}
break;
case FieldDescriptor::TYPE_UINT32:
format("UInt32");
break;
case FieldDescriptor::TYPE_SINT32:
format("SInt32");
break;
case FieldDescriptor::TYPE_INT32:
format("Int32");
break;
case FieldDescriptor::TYPE_UINT64:
format("UInt64");
break;
case FieldDescriptor::TYPE_SINT64:
format("SInt64");
break;
case FieldDescriptor::TYPE_INT64:
format("Int64");
break;
case FieldDescriptor::TYPE_BYTES:
format("Bytes");
break;
case FieldDescriptor::TYPE_STRING: {
auto mode = GetUtf8CheckMode(field, options);
switch (mode) {
case Utf8CheckMode::kStrict:
format("Utf8String");
break;
case Utf8CheckMode::kVerify:
format("RawString");
break;
case Utf8CheckMode::kNone:
// Treat LITE_RUNTIME strings as bytes.
format("Bytes");
break;
default:
GOOGLE_LOG(FATAL) << "Invalid Utf8CheckMode (" << static_cast<int>(mode)
<< ") for " << field->DebugString();
}
break;
}
case FieldDescriptor::TYPE_GROUP:
format("Message | ::_fl::kRepGroup");
break;
case FieldDescriptor::TYPE_MESSAGE:
if (field->is_map()) {
format("Map");
} else {
format("Message");
if (IsLazy(field, options, scc_analyzer)) {
format(" | ::_fl::kRepLazy");
} else if (IsImplicitWeakField(field, options, scc_analyzer)) {
format(" | ::_fl::kRepIWeak");
}
}
break;
}
// Fill in extra information about string and bytes field representations.
if (field->type() == FieldDescriptor::TYPE_BYTES ||
field->type() == FieldDescriptor::TYPE_STRING) {
if (field->is_repeated()) {
format(" | ::_fl::kRepSString");
} else {
format(" | ::_fl::kRepAString");
}
}
format(")");
}
void ParseFunctionGenerator::GenerateFieldEntries(Formatter& format) {
for (const auto& entry : tc_table_info_->field_entries) {
const FieldDescriptor* field = entry.field;
PrintFieldComment(format, field);
format("{");
if (IsWeak(field, options_)) {
// Weak fields are handled by the generated fallback function.
// (These are handled by legacy Google-internal logic.)
format("/* weak */ 0, 0, 0, 0");
} else {
const OneofDescriptor* oneof = field->real_containing_oneof();
bool cold = ShouldSplit(field, options_);
format("PROTOBUF_FIELD_OFFSET($classname$$1$, $2$), $3$, $4$,\n ",
cold ? "::Impl_::Split" : "",
cold ? FieldName(field) + "_"
: FieldMemberName(field, /*cold=*/false),
(oneof ? oneof->index() : entry.hasbit_idx), entry.aux_idx);
FormatFieldKind(format, entry, options_, scc_analyzer_);
}
format("},\n");
}
}
static constexpr int kMaxNameLength = 255;
int ParseFunctionGenerator::CalculateFieldNamesSize() const {
// The full name of the message appears first.
int size = std::min(static_cast<int>(descriptor_->full_name().size()),
kMaxNameLength);
int lengths_size = 1;
for (const auto& entry : tc_table_info_->field_entries) {
const FieldDescriptor* field = entry.field;
GOOGLE_CHECK_LE(field->name().size(), kMaxNameLength);
size += field->name().size();
lengths_size += 1;
}
// align to an 8-byte boundary
lengths_size = (lengths_size + 7) & -8;
return size + lengths_size + 1;
}
static void FormatOctal(Formatter& format, int size) {
int octal_size = ((size >> 6) & 3) * 100 + //
((size >> 3) & 7) * 10 + //
((size >> 0) & 7);
format("\\$1$", octal_size);
}
void ParseFunctionGenerator::GenerateFieldNames(Formatter& format) {
// First, we output the size of each string, as an unsigned byte. The first
// string is the message name.
int count = 1;
format("\"");
FormatOctal(format,
std::min(static_cast<int>(descriptor_->full_name().size()), 255));
for (const auto& entry : tc_table_info_->field_entries) {
FormatOctal(format, entry.field->name().size());
++count;
}
while (count & 7) { // align to an 8-byte boundary
format("\\0");
++count;
}
format("\"\n");
// The message name is stored at the beginning of the string
std::string message_name = descriptor_->full_name();
if (message_name.size() > kMaxNameLength) {
static constexpr int kNameHalfLength = (kMaxNameLength - 3) / 2;
message_name = StrCat(
message_name.substr(0, kNameHalfLength), "...",
message_name.substr(message_name.size() - kNameHalfLength));
}
format("\"$1$\"\n", message_name);
// Then we output the actual field names
for (const auto& entry : tc_table_info_->field_entries) {
const FieldDescriptor* field = entry.field;
format("\"$1$\"\n", field->name());
}
}
void ParseFunctionGenerator::GenerateArenaString(Formatter& format,
const FieldDescriptor* field) {
if (HasHasbit(field)) {
format("_Internal::set_has_$1$(&$has_bits$);\n", FieldName(field));
}
format(
"if (arena != nullptr) {\n"
" ptr = ctx->ReadArenaString(ptr, &$msg$$field$, arena");
if (IsStringInlined(field, options_)) {
GOOGLE_DCHECK(!inlined_string_indices_.empty());
int inlined_string_index = inlined_string_indices_[field->index()];
GOOGLE_DCHECK_GT(inlined_string_index, 0);
format(", &$msg$$inlined_string_donated_array$[0], $1$, $this$",
inlined_string_index);
} else {
GOOGLE_DCHECK(field->default_value_string().empty());
}
format(
");\n"
"} else {\n"
" ptr = ::_pbi::InlineGreedyStringParser("
"$msg$$field$.MutableNoCopy(nullptr), ptr, ctx);\n"
"}\n"
"const std::string* str = &$msg$$field$.Get(); (void)str;\n");
}
void ParseFunctionGenerator::GenerateStrings(Formatter& format,
const FieldDescriptor* field,
bool check_utf8) {
FieldOptions::CType ctype = FieldOptions::STRING;
if (!options_.opensource_runtime) {
// Open source doesn't support other ctypes;
ctype = field->options().ctype();
}
if (!field->is_repeated() && !options_.opensource_runtime &&
GetOptimizeFor(field->file(), options_) != FileOptions::LITE_RUNTIME &&
// For now only use arena string for strings with empty defaults.
field->default_value_string().empty() &&
!field->real_containing_oneof() && ctype == FieldOptions::STRING) {
GenerateArenaString(format, field);
} else {
std::string parser_name;
switch (ctype) {
case FieldOptions::STRING:
parser_name = "GreedyStringParser";
break;
case FieldOptions::CORD:
parser_name = "CordParser";
break;
case FieldOptions::STRING_PIECE:
parser_name = "StringPieceParser";
break;
}
format(
"auto str = $msg$$1$$2$_$name$();\n"
"ptr = ::_pbi::Inline$3$(str, ptr, ctx);\n",
HasInternalAccessors(ctype) ? "_internal_" : "",
field->is_repeated() && !field->is_packable() ? "add" : "mutable",
parser_name);
}
// It is intentionally placed before VerifyUTF8 because it doesn't make sense
// to verify UTF8 when we already know parsing failed.
format("CHK_(ptr);\n");
if (!check_utf8) return; // return if this is a bytes field
auto level = GetUtf8CheckMode(field, options_);
switch (level) {
case Utf8CheckMode::kNone:
return;
case Utf8CheckMode::kVerify:
format("#ifndef NDEBUG\n");
break;
case Utf8CheckMode::kStrict:
format("CHK_(");
break;
}
std::string field_name;
field_name = "nullptr";
if (HasDescriptorMethods(field->file(), options_)) {
field_name = StrCat("\"", field->full_name(), "\"");
}
format("::_pbi::VerifyUTF8(str, $1$)", field_name);
switch (level) {
case Utf8CheckMode::kNone:
return;
case Utf8CheckMode::kVerify:
format(
";\n"
"#endif // !NDEBUG\n");
break;
case Utf8CheckMode::kStrict:
format(");\n");
break;
}
}
void ParseFunctionGenerator::GenerateLengthDelim(Formatter& format,
const FieldDescriptor* field) {
if (field->is_packable()) {
if (field->type() == FieldDescriptor::TYPE_ENUM &&
!HasPreservingUnknownEnumSemantics(field)) {
std::string enum_type = QualifiedClassName(field->enum_type(), options_);
format(
"ptr = "
"::$proto_ns$::internal::Packed$1$Parser<$unknown_fields_type$>("
"$msg$_internal_mutable_$name$(), ptr, ctx, $2$_IsValid, "
"&$msg$_internal_metadata_, $3$);\n",
DeclaredTypeMethodName(field->type()), enum_type, field->number());
} else {
format(
"ptr = ::$proto_ns$::internal::Packed$1$Parser("
"$msg$_internal_mutable_$name$(), ptr, ctx);\n",
DeclaredTypeMethodName(field->type()));
}
format("CHK_(ptr);\n");
} else {
auto field_type = field->type();
switch (field_type) {
case FieldDescriptor::TYPE_STRING:
GenerateStrings(format, field, true /* utf8 */);
break;
case FieldDescriptor::TYPE_BYTES:
GenerateStrings(format, field, false /* utf8 */);
break;
case FieldDescriptor::TYPE_MESSAGE: {
if (field->is_map()) {
const FieldDescriptor* val = field->message_type()->map_value();
GOOGLE_CHECK(val);
if (val->type() == FieldDescriptor::TYPE_ENUM &&
!HasPreservingUnknownEnumSemantics(field)) {
format(
"auto object = "
"::$proto_ns$::internal::InitEnumParseWrapper<"
"$unknown_fields_type$>(&$msg$$field$, $1$_IsValid, "
"$2$, &$msg$_internal_metadata_);\n"
"ptr = ctx->ParseMessage(&object, ptr);\n",
QualifiedClassName(val->enum_type(), options_),
field->number());
} else {
format("ptr = ctx->ParseMessage(&$msg$$field$, ptr);\n");
}
} else if (IsLazy(field, options_, scc_analyzer_)) {
bool eager_verify =
IsEagerlyVerifiedLazy(field, options_, scc_analyzer_);
if (ShouldVerify(descriptor_, options_, scc_analyzer_)) {
format(
"ctx->set_lazy_eager_verify_func($1$);\n",
eager_verify
? StrCat("&", ClassName(field->message_type(), true),
"::InternalVerify")
: "nullptr");
}
if (field->real_containing_oneof()) {
format(
"if (!$msg$_internal_has_$name$()) {\n"
" $msg$clear_$1$();\n"
" $msg$$field$ = ::$proto_ns$::Arena::CreateMessage<\n"
" ::$proto_ns$::internal::LazyField>("
"$msg$GetArenaForAllocation());\n"
" $msg$set_has_$name$();\n"
"}\n"
"auto* lazy_field = $msg$$field$;\n",
field->containing_oneof()->name());
} else if (HasHasbit(field)) {
format(
"_Internal::set_has_$name$(&$has_bits$);\n"
"auto* lazy_field = &$msg$$field$;\n");
} else {
format("auto* lazy_field = &$msg$$field$;\n");
}
format(
"::$proto_ns$::internal::LazyFieldParseHelper<\n"
" ::$proto_ns$::internal::LazyField> parse_helper(\n"
" $1$::default_instance(),\n"
" $msg$GetArenaForAllocation(),\n"
" ::google::protobuf::internal::LazyVerifyOption::$2$,\n"
" lazy_field);\n"
"ptr = ctx->ParseMessage(&parse_helper, ptr);\n",
FieldMessageTypeName(field, options_),
eager_verify ? "kEager" : "kLazy");
if (ShouldVerify(descriptor_, options_, scc_analyzer_) &&
eager_verify) {
format("ctx->set_lazy_eager_verify_func(nullptr);\n");
}
} else if (IsImplicitWeakField(field, options_, scc_analyzer_)) {
if (!field->is_repeated()) {
format(
"ptr = ctx->ParseMessage(_Internal::mutable_$name$($this$), "
"ptr);\n");
} else {
format(
"ptr = ctx->ParseMessage($msg$$field$.AddWeak("
"reinterpret_cast<const ::$proto_ns$::MessageLite*>($1$ptr_)"
"), ptr);\n",
QualifiedDefaultInstanceName(field->message_type(), options_));
}
} else if (IsWeak(field, options_)) {
format(
"{\n"
" auto* default_ = &reinterpret_cast<const Message&>($1$);\n"
" ptr = ctx->ParseMessage($msg$$weak_field_map$.MutableMessage("
"$2$, default_), ptr);\n"
"}\n",
QualifiedDefaultInstanceName(field->message_type(), options_),
field->number());
} else {
format(
"ptr = ctx->ParseMessage($msg$_internal_$mutable_field$(), "
"ptr);\n");
}
format("CHK_(ptr);\n");
break;
}
default:
GOOGLE_LOG(FATAL) << "Illegal combination for length delimited wiretype "
<< " filed type is " << field->type();
}
}
}
static bool ShouldRepeat(const FieldDescriptor* descriptor,
WireFormatLite::WireType wiretype) {
constexpr int kMaxTwoByteFieldNumber = 16 * 128;
return descriptor->number() < kMaxTwoByteFieldNumber &&
descriptor->is_repeated() &&
(!descriptor->is_packable() ||
wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
}
void ParseFunctionGenerator::GenerateFieldBody(
Formatter& format, WireFormatLite::WireType wiretype,
const FieldDescriptor* field) {
Formatter::SaveState formatter_state(&format);
format.AddMap(
{{"name", FieldName(field)},
{"primitive_type", PrimitiveTypeName(options_, field->cpp_type())}});
if (field->is_repeated()) {
format.AddMap({{"put_field", StrCat("add_", FieldName(field))},
{"mutable_field", StrCat("add_", FieldName(field))}});
} else {
format.AddMap(
{{"put_field", StrCat("set_", FieldName(field))},
{"mutable_field", StrCat("mutable_", FieldName(field))}});
}
uint32_t tag = WireFormatLite::MakeTag(field->number(), wiretype);
switch (wiretype) {
case WireFormatLite::WIRETYPE_VARINT: {
std::string type = PrimitiveTypeName(options_, field->cpp_type());
if (field->type() == FieldDescriptor::TYPE_ENUM) {
format.Set("enum_type",
QualifiedClassName(field->enum_type(), options_));
format(
"$uint64$ val = ::$proto_ns$::internal::ReadVarint64(&ptr);\n"
"CHK_(ptr);\n");
if (!HasPreservingUnknownEnumSemantics(field)) {
format("if (PROTOBUF_PREDICT_TRUE($enum_type$_IsValid(val))) {\n");
format.Indent();
}
format("$msg$_internal_$put_field$(static_cast<$enum_type$>(val));\n");
if (!HasPreservingUnknownEnumSemantics(field)) {
format.Outdent();
format(
"} else {\n"
" ::$proto_ns$::internal::WriteVarint("
"$1$, val, $msg$mutable_unknown_fields());\n"
"}\n",
field->number());
}
} else {
std::string size = (field->type() == FieldDescriptor::TYPE_INT32 ||
field->type() == FieldDescriptor::TYPE_SINT32 ||
field->type() == FieldDescriptor::TYPE_UINT32)
? "32"
: "64";
std::string zigzag;
if ((field->type() == FieldDescriptor::TYPE_SINT32 ||
field->type() == FieldDescriptor::TYPE_SINT64)) {
zigzag = "ZigZag";
}
if (field->is_repeated() || field->real_containing_oneof()) {
format(
"$msg$_internal_$put_field$("
"::$proto_ns$::internal::ReadVarint$1$$2$(&ptr));\n"
"CHK_(ptr);\n",
zigzag, size);
} else {
if (HasHasbit(field)) {
format("_Internal::set_has_$name$(&$has_bits$);\n");
}
format(
"$msg$$field$ = ::$proto_ns$::internal::ReadVarint$1$$2$(&ptr);\n"
"CHK_(ptr);\n",
zigzag, size);
}
}
break;
}
case WireFormatLite::WIRETYPE_FIXED32:
case WireFormatLite::WIRETYPE_FIXED64: {
if (field->is_repeated() || field->real_containing_oneof()) {
format(
"$msg$_internal_$put_field$("
"::$proto_ns$::internal::UnalignedLoad<$primitive_type$>(ptr));\n"
"ptr += sizeof($primitive_type$);\n");
} else {
if (HasHasbit(field)) {
format("_Internal::set_has_$name$(&$has_bits$);\n");
}
format(
"$msg$$field$ = "
"::$proto_ns$::internal::UnalignedLoad<$primitive_type$>(ptr);\n"
"ptr += sizeof($primitive_type$);\n");
}
break;
}
case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
GenerateLengthDelim(format, field);
break;
}
case WireFormatLite::WIRETYPE_START_GROUP: {
format(
"ptr = ctx->ParseGroup($msg$_internal_$mutable_field$(), ptr, $1$);\n"
"CHK_(ptr);\n",
tag);
break;
}
case WireFormatLite::WIRETYPE_END_GROUP: {
GOOGLE_LOG(FATAL) << "Can't have end group field\n";
break;
}
} // switch (wire_type)
}
// Returns the tag for this field and in case of repeated packable fields,
// sets a fallback tag in fallback_tag_ptr.
static uint32_t ExpectedTag(const FieldDescriptor* field,
uint32_t* fallback_tag_ptr) {
uint32_t expected_tag;
if (field->is_packable()) {
auto expected_wiretype = WireFormat::WireTypeForFieldType(field->type());
expected_tag = WireFormatLite::MakeTag(field->number(), expected_wiretype);
GOOGLE_CHECK(expected_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
auto fallback_wiretype = WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
uint32_t fallback_tag =
WireFormatLite::MakeTag(field->number(), fallback_wiretype);
if (field->is_packed()) std::swap(expected_tag, fallback_tag);
*fallback_tag_ptr = fallback_tag;
} else {
auto expected_wiretype = WireFormat::WireTypeForField(field);
expected_tag = WireFormatLite::MakeTag(field->number(), expected_wiretype);
}
return expected_tag;
}
// These variables are used by the generated parse iteration, and must already
// be defined in the generated code:
// - `const char* ptr`: the input buffer.
// - `ParseContext* ctx`: the associated context for `ptr`.
// - implicit `this`: i.e., we must be in a non-static member function.
//
// The macro `CHK_(x)` must be defined. It should return an error condition if
// the macro parameter is false.
//
// Whenever an END_GROUP tag was read, or tag 0 was read, the generated code
// branches to the label `message_done`.
//
// These formatter variables are used:
// - `next_tag`: a single statement to begin parsing the next tag.
//
// At the end of the generated code, the enclosing function should proceed to
// parse the next tag in the stream.
void ParseFunctionGenerator::GenerateParseIterationBody(
Formatter& format, const Descriptor* descriptor,
const std::vector<const FieldDescriptor*>& fields) {
if (!fields.empty()) {
GenerateFieldSwitch(format, fields);
// Each field `case` only considers field number. Field numbers that are
// not defined in the message, or tags with an incompatible wire type, are
// considered "unusual" cases. They will be handled by the logic below.
format.Outdent();
format("handle_unusual:\n");
format.Indent();
}
// Unusual/extension/unknown case:
format(
"if ((tag == 0) || ((tag & 7) == 4)) {\n"
" CHK_(ptr);\n"
" ctx->SetLastTag(tag);\n"
" goto message_done;\n"
"}\n");
if (IsMapEntryMessage(descriptor)) {
format("$next_tag$;\n");
} else {
if (descriptor->extension_range_count() > 0) {
format("if (");
for (int i = 0; i < descriptor->extension_range_count(); i++) {
const Descriptor::ExtensionRange* range =
descriptor->extension_range(i);
if (i > 0) format(" ||\n ");
uint32_t start_tag = WireFormatLite::MakeTag(
range->start, static_cast<WireFormatLite::WireType>(0));
uint32_t end_tag = WireFormatLite::MakeTag(
range->end, static_cast<WireFormatLite::WireType>(0));
if (range->end > FieldDescriptor::kMaxNumber) {
format("($1$u <= tag)", start_tag);
} else {
format("($1$u <= tag && tag < $2$u)", start_tag, end_tag);
}
}
format(
") {\n"
" ptr = $msg$$extensions$.ParseField(tag, ptr, "
"internal_default_instance(), &$msg$_internal_metadata_, ctx);\n"
" CHK_(ptr != nullptr);\n"
" $next_tag$;\n"
"}\n");
}
format(
"ptr = UnknownFieldParse(\n"
" tag,\n"
" $msg$_internal_metadata_.mutable_unknown_fields<"
"$unknown_fields_type$>(),\n"
" ptr, ctx);\n"
"CHK_(ptr != nullptr);\n");
}
}
void ParseFunctionGenerator::GenerateFieldSwitch(
Formatter& format, const std::vector<const FieldDescriptor*>& fields) {
format("switch (tag >> 3) {\n");
format.Indent();
for (const auto* field : fields) {
bool cold = ShouldSplit(field, options_);
format.Set("field", FieldMemberName(field, cold));
PrintFieldComment(format, field);
format("case $1$:\n", field->number());
format.Indent();
uint32_t fallback_tag = 0;
uint32_t expected_tag = ExpectedTag(field, &fallback_tag);
format("if (PROTOBUF_PREDICT_TRUE(static_cast<$uint8$>(tag) == $1$)) {\n",
expected_tag & 0xFF);
format.Indent();
if (cold) {
format("$msg$PrepareSplitMessageForWrite();\n");
}
auto wiretype = WireFormatLite::GetTagWireType(expected_tag);
uint32_t tag = WireFormatLite::MakeTag(field->number(), wiretype);
int tag_size = io::CodedOutputStream::VarintSize32(tag);
bool is_repeat = ShouldRepeat(field, wiretype);
if (is_repeat) {
format(
"ptr -= $1$;\n"
"do {\n"
" ptr += $1$;\n",
tag_size);
format.Indent();
}
GenerateFieldBody(format, wiretype, field);
if (is_repeat) {
format.Outdent();
format(
" if (!ctx->DataAvailable(ptr)) break;\n"
"} while (::$proto_ns$::internal::ExpectTag<$1$>(ptr));\n",
tag);
}
format.Outdent();
if (fallback_tag) {
format("} else if (static_cast<$uint8$>(tag) == $1$) {\n",
fallback_tag & 0xFF);
format.Indent();
GenerateFieldBody(format, WireFormatLite::GetTagWireType(fallback_tag),
field);
format.Outdent();
}
format(
"} else\n"
" goto handle_unusual;\n"
"$next_tag$;\n");
format.Outdent();
} // for loop over ordered fields
format(
"default:\n"
" goto handle_unusual;\n");
format.Outdent();
format("} // switch\n");
}
namespace {
std::string FieldParseFunctionName(
const TailCallTableInfo::FieldEntryInfo& entry, const Options& options) {
const FieldDescriptor* field = entry.field;
std::string name = "::_pbi::TcParser::Fast";
switch (field->type()) {
case FieldDescriptor::TYPE_FIXED32:
case FieldDescriptor::TYPE_SFIXED32:
case FieldDescriptor::TYPE_FLOAT:
name.append("F32");
break;
case FieldDescriptor::TYPE_FIXED64:
case FieldDescriptor::TYPE_SFIXED64:
case FieldDescriptor::TYPE_DOUBLE:
name.append("F64");
break;
case FieldDescriptor::TYPE_BOOL:
name.append("V8");
break;
case FieldDescriptor::TYPE_INT32:
case FieldDescriptor::TYPE_UINT32:
name.append("V32");
break;
case FieldDescriptor::TYPE_INT64:
case FieldDescriptor::TYPE_UINT64:
name.append("V64");
break;
case FieldDescriptor::TYPE_ENUM:
if (HasPreservingUnknownEnumSemantics(field)) {
name.append("V32");
break;
}
if (field->is_repeated() && field->is_packed()) {
GOOGLE_LOG(DFATAL) << "Enum validation not handled: " << field->DebugString();
return "";
}
name.append(entry.is_enum_range ? "Er" : "Ev");
break;
case FieldDescriptor::TYPE_SINT32:
name.append("Z32");
break;
case FieldDescriptor::TYPE_SINT64:
name.append("Z64");
break;
case FieldDescriptor::TYPE_BYTES:
name.append("B");
if (IsStringInlined(field, options)) {
name.append("i");
}
break;
case FieldDescriptor::TYPE_STRING:
switch (GetUtf8CheckMode(field, options)) {
case Utf8CheckMode::kNone:
name.append("B");
break;
case Utf8CheckMode::kVerify:
name.append("S");
break;
case Utf8CheckMode::kStrict:
name.append("U");
break;
default:
GOOGLE_LOG(DFATAL) << "Mode not handled: "
<< static_cast<int>(GetUtf8CheckMode(field, options));
return "";
}
if (IsStringInlined(field, options)) {
name.append("i");
}
break;
case FieldDescriptor::TYPE_MESSAGE:
name.append("M");
break;
case FieldDescriptor::TYPE_GROUP:
name.append("G");
break;
default:
GOOGLE_LOG(DFATAL) << "Type not handled: " << field->DebugString();
return "";
}
// The field implementation functions are prefixed by cardinality:
// `S` for optional or implicit fields.
// `R` for non-packed repeated.
// `P` for packed repeated.
name.append(field->is_packed() ? "P"
: field->is_repeated() ? "R"
: field->real_containing_oneof() ? "O"
: "S");
// Append the tag length. Fast parsing only handles 1- or 2-byte tags.
name.append(TagSize(field->number()) == 1 ? "1" : "2");
return name;
}
} // namespace
} // namespace cpp
} // namespace compiler
} // namespace protobuf
} // namespace google