| // Protocol Buffers - Google's data interchange format |
| // Copyright 2008 Google Inc. All rights reserved. |
| // https://developers.google.com/protocol-buffers/ |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions are |
| // met: |
| // |
| // * Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // * Redistributions in binary form must reproduce the above |
| // copyright notice, this list of conditions and the following disclaimer |
| // in the documentation and/or other materials provided with the |
| // distribution. |
| // * Neither the name of Google Inc. nor the names of its |
| // contributors may be used to endorse or promote products derived from |
| // this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #include <google/protobuf/compiler/cpp/parse_function_generator.h> |
| |
| #include <algorithm> |
| #include <limits> |
| #include <string> |
| #include <utility> |
| |
| #include <google/protobuf/wire_format.h> |
| #include <google/protobuf/compiler/cpp/helpers.h> |
| |
| namespace google { |
| namespace protobuf { |
| namespace compiler { |
| namespace cpp { |
| |
| namespace { |
| using google::protobuf::internal::WireFormat; |
| using google::protobuf::internal::WireFormatLite; |
| |
| std::vector<const FieldDescriptor*> GetOrderedFields( |
| const Descriptor* descriptor, const Options& options) { |
| std::vector<const FieldDescriptor*> ordered_fields; |
| for (auto field : FieldRange(descriptor)) { |
| if (!IsFieldStripped(field, options)) { |
| ordered_fields.push_back(field); |
| } |
| } |
| std::sort(ordered_fields.begin(), ordered_fields.end(), |
| [](const FieldDescriptor* a, const FieldDescriptor* b) { |
| return a->number() < b->number(); |
| }); |
| return ordered_fields; |
| } |
| |
| bool HasInternalAccessors(const FieldOptions::CType ctype) { |
| return ctype == FieldOptions::STRING || ctype == FieldOptions::CORD; |
| } |
| |
| int TagSize(uint32_t field_number) { |
| if (field_number < 16) return 1; |
| GOOGLE_CHECK_LT(field_number, (1 << 14)) |
| << "coded tag for " << field_number << " too big for uint16_t"; |
| return 2; |
| } |
| |
| std::string FieldParseFunctionName( |
| const TailCallTableInfo::FieldEntryInfo& entry, const Options& options); |
| |
| bool IsFieldEligibleForFastParsing( |
| const TailCallTableInfo::FieldEntryInfo& entry, const Options& options, |
| MessageSCCAnalyzer* scc_analyzer) { |
| const auto* field = entry.field; |
| // Map, oneof, weak, and lazy fields are not handled on the fast path. |
| if (field->is_map() || field->real_containing_oneof() || |
| field->options().weak() || |
| IsImplicitWeakField(field, options, scc_analyzer) || |
| IsLazy(field, options, scc_analyzer)) { |
| return false; |
| } |
| |
| // We will check for a valid auxiliary index range later. However, we might |
| // want to change the value we check for inlined string fields. |
| int aux_idx = entry.aux_idx; |
| |
| switch (field->type()) { |
| case FieldDescriptor::TYPE_ENUM: |
| // If enum values are not validated at parse time, then this field can be |
| // handled on the fast path like an int32. |
| if (HasPreservingUnknownEnumSemantics(field)) { |
| break; |
| } |
| if (field->is_repeated() && field->is_packed()) { |
| return false; |
| } |
| break; |
| |
| // Some bytes fields can be handled on fast path. |
| case FieldDescriptor::TYPE_STRING: |
| case FieldDescriptor::TYPE_BYTES: |
| if (field->options().ctype() != FieldOptions::STRING) { |
| return false; |
| } |
| if (IsStringInlined(field, options)) { |
| GOOGLE_CHECK(!field->is_repeated()); |
| // For inlined strings, the donation state index is stored in the |
| // `aux_idx` field of the fast parsing info. We need to check the range |
| // of that value instead of the auxiliary index. |
| aux_idx = entry.inlined_string_idx; |
| } |
| break; |
| |
| default: |
| break; |
| } |
| |
| if (HasHasbit(field)) { |
| // The tailcall parser can only update the first 32 hasbits. Fields with |
| // has-bits beyond the first 32 are handled by mini parsing/fallback. |
| GOOGLE_CHECK_GE(entry.hasbit_idx, 0) << field->DebugString(); |
| if (entry.hasbit_idx >= 32) return false; |
| } |
| |
| // If the field needs auxiliary data, then the aux index is needed. This |
| // must fit in a uint8_t. |
| if (aux_idx > std::numeric_limits<uint8_t>::max()) { |
| return false; |
| } |
| |
| // The largest tag that can be read by the tailcall parser is two bytes |
| // when varint-coded. This allows 14 bits for the numeric tag value: |
| // byte 0 byte 1 |
| // 1nnnnttt 0nnnnnnn |
| // ^^^^^^^ ^^^^^^^ |
| if (field->number() >= 1 << 11) return false; |
| |
| return true; |
| } |
| |
| std::vector<TailCallTableInfo::FastFieldInfo> SplitFastFieldsForSize( |
| const std::vector<TailCallTableInfo::FieldEntryInfo>& field_entries, |
| int table_size_log2, const Options& options, |
| MessageSCCAnalyzer* scc_analyzer) { |
| std::vector<TailCallTableInfo::FastFieldInfo> result(1 << table_size_log2); |
| const uint32_t idx_mask = result.size() - 1; |
| |
| for (const auto& entry : field_entries) { |
| if (!IsFieldEligibleForFastParsing(entry, options, scc_analyzer)) { |
| continue; |
| } |
| |
| const auto* field = entry.field; |
| uint32_t tag = WireFormat::MakeTag(field); |
| |
| // Construct the varint-coded tag. If it is more than 7 bits, we need to |
| // shift the high bits and add a continue bit. |
| if (uint32_t hibits = tag & 0xFFFFFF80) { |
| tag = tag + hibits + 128; // tag = lobits + 2*hibits + 128 |
| } |
| |
| // The field index is determined by the low bits of the field number, where |
| // the table size determines the width of the mask. The largest table |
| // supported is 32 entries. The parse loop uses these bits directly, so that |
| // the dispatch does not require arithmetic: |
| // byte 0 byte 1 |
| // tag: 1nnnnttt 0nnnnnnn |
| // ^^^^^ |
| // idx (table_size_log2=5) |
| // This means that any field number that does not fit in the lower 4 bits |
| // will always have the top bit of its table index asserted. |
| const uint32_t fast_idx = (tag >> 3) & idx_mask; |
| |
| TailCallTableInfo::FastFieldInfo& info = result[fast_idx]; |
| if (info.field != nullptr) { |
| // This field entry is already filled. |
| continue; |
| } |
| |
| // Fill in this field's entry: |
| GOOGLE_CHECK(info.func_name.empty()) << info.func_name; |
| info.func_name = FieldParseFunctionName(entry, options); |
| info.field = field; |
| info.coded_tag = tag; |
| // If this field does not have presence, then it can set an out-of-bounds |
| // bit (tailcall parsing uses a uint64_t for hasbits, but only stores 32). |
| info.hasbit_idx = HasHasbit(field) ? entry.hasbit_idx : 63; |
| if (IsStringInlined(field, options)) { |
| GOOGLE_CHECK(!field->is_repeated()); |
| info.aux_idx = static_cast<uint8_t>(entry.inlined_string_idx); |
| } else { |
| info.aux_idx = static_cast<uint8_t>(entry.aux_idx); |
| } |
| } |
| return result; |
| } |
| |
| // Filter out fields that will be handled by mini parsing. |
| std::vector<const FieldDescriptor*> FilterMiniParsedFields( |
| const std::vector<const FieldDescriptor*>& fields, const Options& options, |
| MessageSCCAnalyzer* scc_analyzer) { |
| std::vector<const FieldDescriptor*> generated_fallback_fields; |
| |
| for (const auto* field : fields) { |
| bool handled = false; |
| switch (field->type()) { |
| case FieldDescriptor::TYPE_DOUBLE: |
| case FieldDescriptor::TYPE_FLOAT: |
| case FieldDescriptor::TYPE_FIXED32: |
| case FieldDescriptor::TYPE_SFIXED32: |
| case FieldDescriptor::TYPE_FIXED64: |
| case FieldDescriptor::TYPE_SFIXED64: |
| case FieldDescriptor::TYPE_BOOL: |
| case FieldDescriptor::TYPE_UINT32: |
| case FieldDescriptor::TYPE_SINT32: |
| case FieldDescriptor::TYPE_INT32: |
| case FieldDescriptor::TYPE_UINT64: |
| case FieldDescriptor::TYPE_SINT64: |
| case FieldDescriptor::TYPE_INT64: |
| // These are handled by MiniParse, so we don't need any generated |
| // fallback code. |
| handled = true; |
| break; |
| |
| case FieldDescriptor::TYPE_ENUM: |
| if (field->is_repeated() && !HasPreservingUnknownEnumSemantics(field)) { |
| // TODO(b/206890171): handle packed repeated closed enums |
| // Non-packed repeated can be handled using tables, but we still |
| // need to generate fallback code for all repeated enums in order to |
| // handle packed encoding. This is because of the lite/full split |
| // when handling invalid enum values in a packed field. |
| handled = false; |
| } else { |
| handled = true; |
| } |
| break; |
| |
| case FieldDescriptor::TYPE_BYTES: |
| case FieldDescriptor::TYPE_STRING: |
| if (IsStringInlined(field, options)) { |
| // TODO(b/198211897): support InilnedStringField. |
| handled = false; |
| } else { |
| handled = true; |
| } |
| break; |
| |
| case FieldDescriptor::TYPE_MESSAGE: |
| case FieldDescriptor::TYPE_GROUP: |
| // TODO(b/210762816): support remaining field types. |
| if (field->is_map() || IsWeak(field, options) || |
| IsImplicitWeakField(field, options, scc_analyzer) || |
| IsLazy(field, options, scc_analyzer)) { |
| handled = false; |
| } else { |
| handled = true; |
| } |
| break; |
| |
| default: |
| handled = false; |
| break; |
| } |
| if (!handled) generated_fallback_fields.push_back(field); |
| } |
| |
| return generated_fallback_fields; |
| } |
| |
| } // namespace |
| |
| TailCallTableInfo::TailCallTableInfo( |
| const Descriptor* descriptor, const Options& options, |
| const std::vector<const FieldDescriptor*>& ordered_fields, |
| const std::vector<int>& has_bit_indices, |
| const std::vector<int>& inlined_string_indices, |
| MessageSCCAnalyzer* scc_analyzer) { |
| int oneof_count = descriptor->real_oneof_decl_count(); |
| // If this message has any oneof fields, store the case offset in the first |
| // auxiliary entry. |
| if (oneof_count > 0) { |
| GOOGLE_LOG_IF(DFATAL, ordered_fields.empty()) |
| << "Invalid message: " << descriptor->full_name() << " has " |
| << oneof_count << " oneof declarations, but no fields"; |
| aux_entries.push_back(StrCat("_fl::Offset{offsetof(", |
| ClassName(descriptor), |
| ", _impl_._oneof_case_)}")); |
| } |
| |
| // If this message has any inlined string fields, store the donation state |
| // offset in the second auxiliary entry. |
| if (!inlined_string_indices.empty()) { |
| aux_entries.resize(2); // pad if necessary |
| aux_entries[1] = |
| StrCat("_fl::Offset{offsetof(", ClassName(descriptor), |
| ", _impl_._inlined_string_donated_)}"); |
| } |
| |
| // Fill in mini table entries. |
| for (const FieldDescriptor* field : ordered_fields) { |
| field_entries.push_back( |
| {field, (HasHasbit(field) ? has_bit_indices[field->index()] : -1)}); |
| auto& entry = field_entries.back(); |
| |
| if (field->type() == FieldDescriptor::TYPE_MESSAGE || |
| field->type() == FieldDescriptor::TYPE_GROUP) { |
| // Message-typed fields have a FieldAux with the default instance pointer. |
| if (field->is_map()) { |
| // TODO(b/205904770): generate aux entries for maps |
| } else if (IsWeak(field, options)) { |
| // Don't generate anything for weak fields. They are handled by the |
| // generated fallback. |
| } else if (IsImplicitWeakField(field, options, scc_analyzer)) { |
| // Implicit weak fields don't need to store a default instance pointer. |
| } else if (IsLazy(field, options, scc_analyzer)) { |
| // Lazy fields are handled by the generated fallback function. |
| } else { |
| field_entries.back().aux_idx = aux_entries.size(); |
| const Descriptor* field_type = field->message_type(); |
| aux_entries.push_back(StrCat( |
| "reinterpret_cast<const ", QualifiedClassName(field_type, options), |
| "*>(&", QualifiedDefaultInstanceName(field_type, options), ")")); |
| } |
| } else if (field->type() == FieldDescriptor::TYPE_ENUM && |
| !HasPreservingUnknownEnumSemantics(field)) { |
| // Enum fields which preserve unknown values (proto3 behavior) are |
| // effectively int32 fields with respect to parsing -- i.e., the value |
| // does not need to be validated at parse time. |
| // |
| // Enum fields which do not preserve unknown values (proto2 behavior) use |
| // a FieldAux to store validation information. If the enum values are |
| // sequential (and within a range we can represent), then the FieldAux |
| // entry represents the range using the minimum value (which must fit in |
| // an int16_t) and count (a uint16_t). Otherwise, the entry holds a |
| // pointer to the generated Name_IsValid function. |
| |
| entry.aux_idx = aux_entries.size(); |
| const EnumDescriptor* enum_type = field->enum_type(); |
| GOOGLE_CHECK_GT(enum_type->value_count(), 0) << enum_type->DebugString(); |
| |
| // Check if the enum values are a single, contiguous range. |
| std::vector<int> enum_values; |
| for (int i = 0, N = enum_type->value_count(); i < N; ++i) { |
| enum_values.push_back(enum_type->value(i)->number()); |
| } |
| auto values_begin = enum_values.begin(); |
| auto values_end = enum_values.end(); |
| std::sort(values_begin, values_end); |
| enum_values.erase(std::unique(values_begin, values_end), values_end); |
| |
| if (enum_values.back() - enum_values[0] == enum_values.size() - 1 && |
| enum_values[0] >= std::numeric_limits<int16_t>::min() && |
| enum_values[0] <= std::numeric_limits<int16_t>::max() && |
| enum_values.size() <= std::numeric_limits<uint16_t>::max()) { |
| entry.is_enum_range = true; |
| aux_entries.push_back( |
| StrCat(enum_values[0], ", ", enum_values.size())); |
| } else { |
| entry.is_enum_range = false; |
| aux_entries.push_back( |
| StrCat(QualifiedClassName(enum_type, options), "_IsValid")); |
| } |
| } else if ((field->type() == FieldDescriptor::TYPE_STRING || |
| field->type() == FieldDescriptor::TYPE_BYTES) && |
| IsStringInlined(field, options)) { |
| GOOGLE_CHECK(!field->is_repeated()); |
| // Inlined strings have an extra marker to represent their donation state. |
| int idx = inlined_string_indices[field->index()]; |
| // For mini parsing, the donation state index is stored as an `offset` |
| // auxiliary entry. |
| entry.aux_idx = aux_entries.size(); |
| aux_entries.push_back(StrCat("_fl::Offset{", idx, "}")); |
| // For fast table parsing, the donation state index is stored instead of |
| // the aux_idx (this will limit the range to 8 bits). |
| entry.inlined_string_idx = idx; |
| } |
| } |
| |
| // Choose the smallest fast table that covers the maximum number of fields. |
| table_size_log2 = 0; // fallback value |
| int num_fast_fields = -1; |
| for (int try_size_log2 : {0, 1, 2, 3, 4, 5}) { |
| size_t try_size = 1 << try_size_log2; |
| auto split_fields = SplitFastFieldsForSize(field_entries, try_size_log2, |
| options, scc_analyzer); |
| GOOGLE_CHECK_EQ(split_fields.size(), try_size); |
| int try_num_fast_fields = 0; |
| for (const auto& info : split_fields) { |
| if (info.field != nullptr) ++try_num_fast_fields; |
| } |
| // Use this size if (and only if) it covers more fields. |
| if (try_num_fast_fields > num_fast_fields) { |
| fast_path_fields = std::move(split_fields); |
| table_size_log2 = try_size_log2; |
| num_fast_fields = try_num_fast_fields; |
| } |
| // The largest table we allow has the same number of entries as the message |
| // has fields, rounded up to the next power of 2 (e.g., a message with 5 |
| // fields can have a fast table of size 8). A larger table *might* cover |
| // more fields in certain cases, but a larger table in that case would have |
| // mostly empty entries; so, we cap the size to avoid pathologically sparse |
| // tables. |
| if (try_size > ordered_fields.size()) { |
| break; |
| } |
| } |
| |
| // Filter out fields that are handled by MiniParse. We don't need to generate |
| // a fallback for these, which saves code size. |
| fallback_fields = FilterMiniParsedFields(ordered_fields, options, |
| scc_analyzer); |
| |
| // If there are no fallback fields, and at most one extension range, the |
| // parser can use a generic fallback function. Otherwise, a message-specific |
| // fallback routine is needed. |
| use_generated_fallback = |
| !fallback_fields.empty() || descriptor->extension_range_count() > 1; |
| } |
| |
| ParseFunctionGenerator::ParseFunctionGenerator( |
| const Descriptor* descriptor, int max_has_bit_index, |
| const std::vector<int>& has_bit_indices, |
| const std::vector<int>& inlined_string_indices, const Options& options, |
| MessageSCCAnalyzer* scc_analyzer, |
| const std::map<std::string, std::string>& vars) |
| : descriptor_(descriptor), |
| scc_analyzer_(scc_analyzer), |
| options_(options), |
| variables_(vars), |
| inlined_string_indices_(inlined_string_indices), |
| ordered_fields_(GetOrderedFields(descriptor_, options_)), |
| num_hasbits_(max_has_bit_index) { |
| if (should_generate_tctable()) { |
| tc_table_info_.reset(new TailCallTableInfo( |
| descriptor_, options_, ordered_fields_, has_bit_indices, |
| inlined_string_indices, scc_analyzer)); |
| } |
| SetCommonVars(options_, &variables_); |
| SetCommonMessageDataVariables(descriptor_, &variables_); |
| SetUnknownFieldsVariable(descriptor_, options_, &variables_); |
| variables_["classname"] = ClassName(descriptor, false); |
| } |
| |
| void ParseFunctionGenerator::GenerateMethodDecls(io::Printer* printer) { |
| Formatter format(printer, variables_); |
| if (should_generate_tctable()) { |
| format.Outdent(); |
| if (should_generate_guarded_tctable()) { |
| format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n"); |
| } |
| format( |
| " private:\n" |
| " static const char* Tct_ParseFallback(PROTOBUF_TC_PARAM_DECL);\n" |
| " public:\n"); |
| if (should_generate_guarded_tctable()) { |
| format("#endif\n"); |
| } |
| format.Indent(); |
| } |
| format( |
| "const char* _InternalParse(const char* ptr, " |
| "::$proto_ns$::internal::ParseContext* ctx) final;\n"); |
| } |
| |
| void ParseFunctionGenerator::GenerateMethodImpls(io::Printer* printer) { |
| Formatter format(printer, variables_); |
| bool need_parse_function = true; |
| if (descriptor_->options().message_set_wire_format()) { |
| // Special-case MessageSet. |
| need_parse_function = false; |
| format( |
| "const char* $classname$::_InternalParse(const char* ptr,\n" |
| " ::_pbi::ParseContext* ctx) {\n" |
| "$annotate_deserialize$"); |
| if (!options_.unverified_lazy_message_sets && |
| ShouldVerify(descriptor_, options_, scc_analyzer_)) { |
| format( |
| " ctx->set_lazy_eager_verify_func(&$classname$::InternalVerify);\n"); |
| } |
| format( |
| " return $extensions$.ParseMessageSet(ptr, \n" |
| " internal_default_instance(), &_internal_metadata_, ctx);\n" |
| "}\n"); |
| } |
| if (!should_generate_tctable()) { |
| if (need_parse_function) { |
| GenerateLoopingParseFunction(format); |
| } |
| return; |
| } |
| if (should_generate_guarded_tctable()) { |
| format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n\n"); |
| } |
| if (need_parse_function) { |
| GenerateTailcallParseFunction(format); |
| } |
| if (tc_table_info_->use_generated_fallback) { |
| GenerateTailcallFallbackFunction(format); |
| } |
| if (should_generate_guarded_tctable()) { |
| if (need_parse_function) { |
| format("\n#else // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n\n"); |
| GenerateLoopingParseFunction(format); |
| } |
| format("\n#endif // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n"); |
| } |
| } |
| |
| bool ParseFunctionGenerator::should_generate_tctable() const { |
| if (options_.tctable_mode == Options::kTCTableNever) { |
| return false; |
| } |
| return true; |
| } |
| |
| void ParseFunctionGenerator::GenerateTailcallParseFunction(Formatter& format) { |
| GOOGLE_CHECK(should_generate_tctable()); |
| |
| // Generate an `_InternalParse` that starts the tail-calling loop. |
| format( |
| "const char* $classname$::_InternalParse(\n" |
| " const char* ptr, ::_pbi::ParseContext* ctx) {\n" |
| "$annotate_deserialize$" |
| " ptr = ::_pbi::TcParser::ParseLoop(this, ptr, ctx, " |
| "&_table_.header);\n"); |
| format( |
| " return ptr;\n" |
| "}\n\n"); |
| } |
| |
| void ParseFunctionGenerator::GenerateTailcallFallbackFunction( |
| Formatter& format) { |
| GOOGLE_CHECK(should_generate_tctable()); |
| format( |
| "const char* $classname$::Tct_ParseFallback(PROTOBUF_TC_PARAM_DECL) {\n" |
| "#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) return nullptr\n"); |
| format.Indent(); |
| format("auto* typed_msg = static_cast<$classname$*>(msg);\n"); |
| |
| if (num_hasbits_ > 0) { |
| // Sync hasbits |
| format("typed_msg->_impl_._has_bits_[0] = hasbits;\n"); |
| } |
| format("uint32_t tag = data.tag();\n"); |
| |
| format.Set("msg", "typed_msg->"); |
| format.Set("this", "typed_msg"); |
| format.Set("has_bits", "typed_msg->_impl_._has_bits_"); |
| format.Set("next_tag", "goto next_tag"); |
| GenerateParseIterationBody(format, descriptor_, |
| tc_table_info_->fallback_fields); |
| |
| format.Outdent(); |
| format( |
| "next_tag:\n" |
| "message_done:\n" |
| " return ptr;\n" |
| "#undef CHK_\n" |
| "}\n"); |
| } |
| |
| struct SkipEntry16 { |
| uint16_t skipmap; |
| uint16_t field_entry_offset; |
| }; |
| struct SkipEntryBlock { |
| uint32_t first_fnum; |
| std::vector<SkipEntry16> entries; |
| }; |
| struct NumToEntryTable { |
| uint32_t skipmap32; // for fields #1 - #32 |
| std::vector<SkipEntryBlock> blocks; |
| // Compute the number of uint16_t required to represent this table. |
| int size16() const { |
| int size = 2; // for the termination field# |
| for (const auto& block : blocks) { |
| // 2 for the field#, 1 for a count of skip entries, 2 for each entry. |
| size += 3 + block.entries.size() * 2; |
| } |
| return size; |
| } |
| }; |
| |
| static NumToEntryTable MakeNumToEntryTable( |
| const std::vector<const FieldDescriptor*>& field_descriptors); |
| |
| void ParseFunctionGenerator::GenerateDataDecls(io::Printer* printer) { |
| if (!should_generate_tctable()) { |
| return; |
| } |
| Formatter format(printer, variables_); |
| if (should_generate_guarded_tctable()) { |
| format.Outdent(); |
| format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n"); |
| format.Indent(); |
| } |
| auto field_num_to_entry_table = MakeNumToEntryTable(ordered_fields_); |
| format( |
| "static const ::$proto_ns$::internal::" |
| "TcParseTable<$1$, $2$, $3$, $4$, $5$> _table_;\n", |
| tc_table_info_->table_size_log2, ordered_fields_.size(), |
| tc_table_info_->aux_entries.size(), CalculateFieldNamesSize(), |
| field_num_to_entry_table.size16()); |
| if (should_generate_guarded_tctable()) { |
| format.Outdent(); |
| format("#endif // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n"); |
| format.Indent(); |
| } |
| } |
| |
| void ParseFunctionGenerator::GenerateDataDefinitions(io::Printer* printer) { |
| if (!should_generate_tctable()) { |
| return; |
| } |
| Formatter format(printer, variables_); |
| if (should_generate_guarded_tctable()) { |
| format("#ifdef PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n"); |
| } |
| GenerateTailCallTable(format); |
| if (should_generate_guarded_tctable()) { |
| format("#endif // PROTOBUF_TAIL_CALL_TABLE_PARSER_ENABLED\n"); |
| } |
| } |
| |
| void ParseFunctionGenerator::GenerateLoopingParseFunction(Formatter& format) { |
| format( |
| "const char* $classname$::_InternalParse(const char* ptr, " |
| "::_pbi::ParseContext* ctx) {\n" |
| "$annotate_deserialize$" |
| "#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure\n"); |
| format.Indent(); |
| format.Set("msg", ""); |
| format.Set("this", "this"); |
| int hasbits_size = 0; |
| if (num_hasbits_ > 0) { |
| hasbits_size = (num_hasbits_ + 31) / 32; |
| } |
| // For now only optimize small hasbits. |
| if (hasbits_size != 1) hasbits_size = 0; |
| if (hasbits_size) { |
| format("_Internal::HasBits has_bits{};\n"); |
| format.Set("has_bits", "has_bits"); |
| } else { |
| format.Set("has_bits", "_impl_._has_bits_"); |
| } |
| format.Set("next_tag", "continue"); |
| format("while (!ctx->Done(&ptr)) {\n"); |
| format.Indent(); |
| |
| format( |
| "uint32_t tag;\n" |
| "ptr = ::_pbi::ReadTag(ptr, &tag);\n"); |
| GenerateParseIterationBody(format, descriptor_, ordered_fields_); |
| |
| format.Outdent(); |
| format("} // while\n"); |
| |
| format.Outdent(); |
| format("message_done:\n"); |
| if (hasbits_size) format(" _impl_._has_bits_.Or(has_bits);\n"); |
| |
| format( |
| " return ptr;\n" |
| "failure:\n" |
| " ptr = nullptr;\n" |
| " goto message_done;\n" |
| "#undef CHK_\n" |
| "}\n"); |
| } |
| |
| static NumToEntryTable MakeNumToEntryTable( |
| const std::vector<const FieldDescriptor*>& field_descriptors) { |
| NumToEntryTable num_to_entry_table; |
| num_to_entry_table.skipmap32 = static_cast<uint32_t>(-1); |
| |
| // skip_entry_block is the current block of SkipEntries that we're |
| // appending to. cur_block_first_fnum is the number of the first |
| // field represented by the block. |
| uint16_t field_entry_index = 0; |
| uint16_t N = field_descriptors.size(); |
| // First, handle field numbers 1-32, which affect only the initial |
| // skipmap32 and don't generate additional skip-entry blocks. |
| for (; field_entry_index != N; ++field_entry_index) { |
| auto* field_descriptor = field_descriptors[field_entry_index]; |
| if (field_descriptor->number() > 32) break; |
| auto skipmap32_index = field_descriptor->number() - 1; |
| num_to_entry_table.skipmap32 -= 1 << skipmap32_index; |
| } |
| // If all the field numbers were less than or equal to 32, we will have |
| // no further entries to process, and we are already done. |
| if (field_entry_index == N) return num_to_entry_table; |
| |
| SkipEntryBlock* block = nullptr; |
| bool start_new_block = true; |
| // To determine sparseness, track the field number corresponding to |
| // the start of the most recent skip entry. |
| uint32_t last_skip_entry_start = 0; |
| for (; field_entry_index != N; ++field_entry_index) { |
| auto* field_descriptor = field_descriptors[field_entry_index]; |
| uint32_t fnum = field_descriptor->number(); |
| GOOGLE_CHECK_GT(fnum, last_skip_entry_start); |
| if (start_new_block == false) { |
| // If the next field number is within 15 of the last_skip_entry_start, we |
| // continue writing just to that entry. If it's between 16 and 31 more, |
| // then we just extend the current block by one. If it's more than 31 |
| // more, we have to add empty skip entries in order to continue using the |
| // existing block. Obviously it's just 32 more, it doesn't make sense to |
| // start a whole new block, since new blocks mean having to write out |
| // their starting field number, which is 32 bits, as well as the size of |
| // the additional block, which is 16... while an empty SkipEntry16 only |
| // costs 32 bits. So if it was 48 more, it's a slight space win; we save |
| // 16 bits, but probably at the cost of slower run time. We're choosing |
| // 96 for now. |
| if (fnum - last_skip_entry_start > 96) start_new_block = true; |
| } |
| if (start_new_block) { |
| num_to_entry_table.blocks.push_back(SkipEntryBlock{fnum}); |
| block = &num_to_entry_table.blocks.back(); |
| start_new_block = false; |
| } |
| |
| auto skip_entry_num = (fnum - block->first_fnum) / 16; |
| auto skip_entry_index = (fnum - block->first_fnum) % 16; |
| while (skip_entry_num >= block->entries.size()) |
| block->entries.push_back({0xFFFF, field_entry_index}); |
| block->entries[skip_entry_num].skipmap -= 1 << (skip_entry_index); |
| |
| last_skip_entry_start = fnum - skip_entry_index; |
| } |
| return num_to_entry_table; |
| } |
| |
| void ParseFunctionGenerator::GenerateTailCallTable(Formatter& format) { |
| GOOGLE_CHECK(should_generate_tctable()); |
| // All entries without a fast-path parsing function need a fallback. |
| std::string fallback; |
| if (tc_table_info_->use_generated_fallback) { |
| fallback = ClassName(descriptor_) + "::Tct_ParseFallback"; |
| } else { |
| fallback = "::_pbi::TcParser::GenericFallback"; |
| if (GetOptimizeFor(descriptor_->file(), options_) == |
| FileOptions::LITE_RUNTIME) { |
| fallback += "Lite"; |
| } |
| } |
| |
| // For simplicity and speed, the table is not covering all proto |
| // configurations. This model uses a fallback to cover all situations that |
| // the table can't accommodate, together with unknown fields or extensions. |
| // These are number of fields over 32, fields with 3 or more tag bytes, |
| // maps, weak fields, lazy, more than 1 extension range. In the cases |
| // the table is sufficient we can use a generic routine, that just handles |
| // unknown fields and potentially an extension range. |
| auto field_num_to_entry_table = MakeNumToEntryTable(ordered_fields_); |
| format( |
| "PROTOBUF_ATTRIBUTE_INIT_PRIORITY1\n" |
| "const ::_pbi::TcParseTable<$1$, $2$, $3$, $4$, $5$> " |
| "$classname$::_table_ = " |
| "{\n", |
| tc_table_info_->table_size_log2, ordered_fields_.size(), |
| tc_table_info_->aux_entries.size(), CalculateFieldNamesSize(), |
| field_num_to_entry_table.size16()); |
| { |
| auto table_scope = format.ScopedIndent(); |
| format("{\n"); |
| { |
| auto header_scope = format.ScopedIndent(); |
| if (num_hasbits_ > 0 || IsMapEntryMessage(descriptor_)) { |
| format("PROTOBUF_FIELD_OFFSET($classname$, _impl_._has_bits_),\n"); |
| } else { |
| format("0, // no _has_bits_\n"); |
| } |
| if (descriptor_->extension_range_count() == 1) { |
| format( |
| "PROTOBUF_FIELD_OFFSET($classname$, $extensions$),\n" |
| "$1$, $2$, // extension_range_{low,high}\n", |
| descriptor_->extension_range(0)->start, |
| descriptor_->extension_range(0)->end); |
| } else { |
| format("0, 0, 0, // no _extensions_\n"); |
| } |
| format("$1$, $2$, // max_field_number, fast_idx_mask\n", |
| (ordered_fields_.empty() ? 0 : ordered_fields_.back()->number()), |
| (((1 << tc_table_info_->table_size_log2) - 1) << 3)); |
| format( |
| "offsetof(decltype(_table_), field_lookup_table),\n" |
| "$1$, // skipmap\n", |
| field_num_to_entry_table.skipmap32); |
| if (ordered_fields_.empty()) { |
| format( |
| "offsetof(decltype(_table_), field_names), // no field_entries\n"); |
| } else { |
| format("offsetof(decltype(_table_), field_entries),\n"); |
| } |
| |
| format( |
| "$1$, // num_field_entries\n" |
| "$2$, // num_aux_entries\n", |
| ordered_fields_.size(), tc_table_info_->aux_entries.size()); |
| if (tc_table_info_->aux_entries.empty()) { |
| format( |
| "offsetof(decltype(_table_), field_names), // no aux_entries\n"); |
| } else { |
| format("offsetof(decltype(_table_), aux_entries),\n"); |
| } |
| format( |
| "&$1$._instance,\n" |
| "$2$, // fallback\n" |
| "", |
| DefaultInstanceName(descriptor_, options_), fallback); |
| } |
| format("}, {{\n"); |
| { |
| // fast_entries[] |
| auto fast_scope = format.ScopedIndent(); |
| GenerateFastFieldEntries(format); |
| } |
| format("}}, {{\n"); |
| { |
| // field_lookup_table[] |
| auto field_lookup_scope = format.ScopedIndent(); |
| int line_entries = 0; |
| for (int i = 0, N = field_num_to_entry_table.blocks.size(); i < N; ++i) { |
| SkipEntryBlock& entry_block = field_num_to_entry_table.blocks[i]; |
| format("$1$, $2$, $3$,\n", entry_block.first_fnum & 65535, |
| entry_block.first_fnum / 65536, entry_block.entries.size()); |
| for (auto se16 : entry_block.entries) { |
| if (line_entries == 0) { |
| format("$1$, $2$,", se16.skipmap, se16.field_entry_offset); |
| ++line_entries; |
| } else if (line_entries < 5) { |
| format(" $1$, $2$,", se16.skipmap, se16.field_entry_offset); |
| ++line_entries; |
| } else { |
| format(" $1$, $2$,\n", se16.skipmap, se16.field_entry_offset); |
| line_entries = 0; |
| } |
| } |
| } |
| if (line_entries) format("\n"); |
| format("65535, 65535\n"); |
| } |
| if (ordered_fields_.empty()) { |
| GOOGLE_LOG_IF(DFATAL, !tc_table_info_->aux_entries.empty()) |
| << "Invalid message: " << descriptor_->full_name() << " has " |
| << tc_table_info_->aux_entries.size() |
| << " auxiliary field entries, but no fields"; |
| format( |
| "}},\n" |
| "// no field_entries, or aux_entries\n" |
| "{{\n"); |
| } else { |
| format("}}, {{\n"); |
| { |
| // field_entries[] |
| auto field_scope = format.ScopedIndent(); |
| GenerateFieldEntries(format); |
| } |
| if (tc_table_info_->aux_entries.empty()) { |
| format( |
| "}},\n" |
| "// no aux_entries\n" |
| "{{\n"); |
| } else { |
| format("}}, {{\n"); |
| { |
| // aux_entries[] |
| auto aux_scope = format.ScopedIndent(); |
| for (const std::string& aux_entry : tc_table_info_->aux_entries) { |
| format("{$1$},\n", aux_entry); |
| } |
| } |
| format("}}, {{\n"); |
| } |
| } // ordered_fields_.empty() |
| { |
| // field_names[] |
| auto field_name_scope = format.ScopedIndent(); |
| GenerateFieldNames(format); |
| } |
| format("}},\n"); |
| } |
| format("};\n\n"); // _table_ |
| } |
| |
| void ParseFunctionGenerator::GenerateFastFieldEntries(Formatter& format) { |
| for (const auto& info : tc_table_info_->fast_path_fields) { |
| if (info.field != nullptr) { |
| PrintFieldComment(format, info.field); |
| } |
| if (info.func_name.empty()) { |
| format("{::_pbi::TcParser::MiniParse, {}},\n"); |
| } else { |
| bool cold = ShouldSplit(info.field, options_); |
| format( |
| "{$1$,\n" |
| " {$2$, $3$, $4$, PROTOBUF_FIELD_OFFSET($classname$$5$, $6$)}},\n", |
| info.func_name, info.coded_tag, info.hasbit_idx, info.aux_idx, |
| cold ? "::Impl_::Split" : "", |
| cold ? FieldName(info.field) + "_" |
| : FieldMemberName(info.field, /*cold=*/false)); |
| } |
| } |
| } |
| |
| static void FormatFieldKind(Formatter& format, |
| const TailCallTableInfo::FieldEntryInfo& entry, |
| const Options& options, |
| MessageSCCAnalyzer* scc_analyzer) { |
| const FieldDescriptor* field = entry.field; |
| // Spell the field kind in proto language declaration order, starting with |
| // cardinality: |
| format("(::_fl::kFc"); |
| if (HasHasbit(field)) { |
| format("Optional"); |
| } else if (field->is_repeated()) { |
| format("Repeated"); |
| } else if (field->real_containing_oneof()) { |
| format("Oneof"); |
| } else { |
| format("Singular"); |
| } |
| |
| // The rest of the type uses convenience aliases: |
| format(" | ::_fl::k"); |
| if (field->is_repeated() && field->is_packed()) { |
| format("Packed"); |
| } |
| switch (field->type()) { |
| case FieldDescriptor::TYPE_DOUBLE: |
| format("Double"); |
| break; |
| case FieldDescriptor::TYPE_FLOAT: |
| format("Float"); |
| break; |
| case FieldDescriptor::TYPE_FIXED32: |
| format("Fixed32"); |
| break; |
| case FieldDescriptor::TYPE_SFIXED32: |
| format("SFixed32"); |
| break; |
| case FieldDescriptor::TYPE_FIXED64: |
| format("Fixed64"); |
| break; |
| case FieldDescriptor::TYPE_SFIXED64: |
| format("SFixed64"); |
| break; |
| case FieldDescriptor::TYPE_BOOL: |
| format("Bool"); |
| break; |
| case FieldDescriptor::TYPE_ENUM: |
| if (HasPreservingUnknownEnumSemantics(field)) { |
| // No validation is required. |
| format("OpenEnum"); |
| } else if (entry.is_enum_range) { |
| // Validation is done by range check (start/length in FieldAux). |
| format("EnumRange"); |
| } else { |
| // Validation uses the generated _IsValid function. |
| format("Enum"); |
| } |
| break; |
| case FieldDescriptor::TYPE_UINT32: |
| format("UInt32"); |
| break; |
| case FieldDescriptor::TYPE_SINT32: |
| format("SInt32"); |
| break; |
| case FieldDescriptor::TYPE_INT32: |
| format("Int32"); |
| break; |
| case FieldDescriptor::TYPE_UINT64: |
| format("UInt64"); |
| break; |
| case FieldDescriptor::TYPE_SINT64: |
| format("SInt64"); |
| break; |
| case FieldDescriptor::TYPE_INT64: |
| format("Int64"); |
| break; |
| |
| case FieldDescriptor::TYPE_BYTES: |
| format("Bytes"); |
| break; |
| case FieldDescriptor::TYPE_STRING: { |
| auto mode = GetUtf8CheckMode(field, options); |
| switch (mode) { |
| case Utf8CheckMode::kStrict: |
| format("Utf8String"); |
| break; |
| case Utf8CheckMode::kVerify: |
| format("RawString"); |
| break; |
| case Utf8CheckMode::kNone: |
| // Treat LITE_RUNTIME strings as bytes. |
| format("Bytes"); |
| break; |
| default: |
| GOOGLE_LOG(FATAL) << "Invalid Utf8CheckMode (" << static_cast<int>(mode) |
| << ") for " << field->DebugString(); |
| } |
| break; |
| } |
| |
| case FieldDescriptor::TYPE_GROUP: |
| format("Message | ::_fl::kRepGroup"); |
| break; |
| case FieldDescriptor::TYPE_MESSAGE: |
| if (field->is_map()) { |
| format("Map"); |
| } else { |
| format("Message"); |
| if (IsLazy(field, options, scc_analyzer)) { |
| format(" | ::_fl::kRepLazy"); |
| } else if (IsImplicitWeakField(field, options, scc_analyzer)) { |
| format(" | ::_fl::kRepIWeak"); |
| } |
| } |
| break; |
| } |
| |
| // Fill in extra information about string and bytes field representations. |
| if (field->type() == FieldDescriptor::TYPE_BYTES || |
| field->type() == FieldDescriptor::TYPE_STRING) { |
| if (field->is_repeated()) { |
| format(" | ::_fl::kRepSString"); |
| } else { |
| format(" | ::_fl::kRepAString"); |
| } |
| } |
| |
| format(")"); |
| } |
| |
| void ParseFunctionGenerator::GenerateFieldEntries(Formatter& format) { |
| for (const auto& entry : tc_table_info_->field_entries) { |
| const FieldDescriptor* field = entry.field; |
| PrintFieldComment(format, field); |
| format("{"); |
| if (IsWeak(field, options_)) { |
| // Weak fields are handled by the generated fallback function. |
| // (These are handled by legacy Google-internal logic.) |
| format("/* weak */ 0, 0, 0, 0"); |
| } else { |
| const OneofDescriptor* oneof = field->real_containing_oneof(); |
| bool cold = ShouldSplit(field, options_); |
| format("PROTOBUF_FIELD_OFFSET($classname$$1$, $2$), $3$, $4$,\n ", |
| cold ? "::Impl_::Split" : "", |
| cold ? FieldName(field) + "_" |
| : FieldMemberName(field, /*cold=*/false), |
| (oneof ? oneof->index() : entry.hasbit_idx), entry.aux_idx); |
| FormatFieldKind(format, entry, options_, scc_analyzer_); |
| } |
| format("},\n"); |
| } |
| } |
| |
| static constexpr int kMaxNameLength = 255; |
| |
| int ParseFunctionGenerator::CalculateFieldNamesSize() const { |
| // The full name of the message appears first. |
| int size = std::min(static_cast<int>(descriptor_->full_name().size()), |
| kMaxNameLength); |
| int lengths_size = 1; |
| for (const auto& entry : tc_table_info_->field_entries) { |
| const FieldDescriptor* field = entry.field; |
| GOOGLE_CHECK_LE(field->name().size(), kMaxNameLength); |
| size += field->name().size(); |
| lengths_size += 1; |
| } |
| // align to an 8-byte boundary |
| lengths_size = (lengths_size + 7) & -8; |
| return size + lengths_size + 1; |
| } |
| |
| static void FormatOctal(Formatter& format, int size) { |
| int octal_size = ((size >> 6) & 3) * 100 + // |
| ((size >> 3) & 7) * 10 + // |
| ((size >> 0) & 7); |
| format("\\$1$", octal_size); |
| } |
| |
| void ParseFunctionGenerator::GenerateFieldNames(Formatter& format) { |
| // First, we output the size of each string, as an unsigned byte. The first |
| // string is the message name. |
| int count = 1; |
| format("\""); |
| FormatOctal(format, |
| std::min(static_cast<int>(descriptor_->full_name().size()), 255)); |
| for (const auto& entry : tc_table_info_->field_entries) { |
| FormatOctal(format, entry.field->name().size()); |
| ++count; |
| } |
| while (count & 7) { // align to an 8-byte boundary |
| format("\\0"); |
| ++count; |
| } |
| format("\"\n"); |
| // The message name is stored at the beginning of the string |
| std::string message_name = descriptor_->full_name(); |
| if (message_name.size() > kMaxNameLength) { |
| static constexpr int kNameHalfLength = (kMaxNameLength - 3) / 2; |
| message_name = StrCat( |
| message_name.substr(0, kNameHalfLength), "...", |
| message_name.substr(message_name.size() - kNameHalfLength)); |
| } |
| format("\"$1$\"\n", message_name); |
| // Then we output the actual field names |
| for (const auto& entry : tc_table_info_->field_entries) { |
| const FieldDescriptor* field = entry.field; |
| format("\"$1$\"\n", field->name()); |
| } |
| } |
| |
| void ParseFunctionGenerator::GenerateArenaString(Formatter& format, |
| const FieldDescriptor* field) { |
| if (HasHasbit(field)) { |
| format("_Internal::set_has_$1$(&$has_bits$);\n", FieldName(field)); |
| } |
| format( |
| "if (arena != nullptr) {\n" |
| " ptr = ctx->ReadArenaString(ptr, &$msg$$field$, arena"); |
| if (IsStringInlined(field, options_)) { |
| GOOGLE_DCHECK(!inlined_string_indices_.empty()); |
| int inlined_string_index = inlined_string_indices_[field->index()]; |
| GOOGLE_DCHECK_GT(inlined_string_index, 0); |
| format(", &$msg$$inlined_string_donated_array$[0], $1$, $this$", |
| inlined_string_index); |
| } else { |
| GOOGLE_DCHECK(field->default_value_string().empty()); |
| } |
| format( |
| ");\n" |
| "} else {\n" |
| " ptr = ::_pbi::InlineGreedyStringParser(" |
| "$msg$$field$.MutableNoCopy(nullptr), ptr, ctx);\n" |
| "}\n" |
| "const std::string* str = &$msg$$field$.Get(); (void)str;\n"); |
| } |
| |
| void ParseFunctionGenerator::GenerateStrings(Formatter& format, |
| const FieldDescriptor* field, |
| bool check_utf8) { |
| FieldOptions::CType ctype = FieldOptions::STRING; |
| if (!options_.opensource_runtime) { |
| // Open source doesn't support other ctypes; |
| ctype = field->options().ctype(); |
| } |
| if (!field->is_repeated() && !options_.opensource_runtime && |
| GetOptimizeFor(field->file(), options_) != FileOptions::LITE_RUNTIME && |
| // For now only use arena string for strings with empty defaults. |
| field->default_value_string().empty() && |
| !field->real_containing_oneof() && ctype == FieldOptions::STRING) { |
| GenerateArenaString(format, field); |
| } else { |
| std::string parser_name; |
| switch (ctype) { |
| case FieldOptions::STRING: |
| parser_name = "GreedyStringParser"; |
| break; |
| case FieldOptions::CORD: |
| parser_name = "CordParser"; |
| break; |
| case FieldOptions::STRING_PIECE: |
| parser_name = "StringPieceParser"; |
| break; |
| } |
| format( |
| "auto str = $msg$$1$$2$_$name$();\n" |
| "ptr = ::_pbi::Inline$3$(str, ptr, ctx);\n", |
| HasInternalAccessors(ctype) ? "_internal_" : "", |
| field->is_repeated() && !field->is_packable() ? "add" : "mutable", |
| parser_name); |
| } |
| // It is intentionally placed before VerifyUTF8 because it doesn't make sense |
| // to verify UTF8 when we already know parsing failed. |
| format("CHK_(ptr);\n"); |
| if (!check_utf8) return; // return if this is a bytes field |
| auto level = GetUtf8CheckMode(field, options_); |
| switch (level) { |
| case Utf8CheckMode::kNone: |
| return; |
| case Utf8CheckMode::kVerify: |
| format("#ifndef NDEBUG\n"); |
| break; |
| case Utf8CheckMode::kStrict: |
| format("CHK_("); |
| break; |
| } |
| std::string field_name; |
| field_name = "nullptr"; |
| if (HasDescriptorMethods(field->file(), options_)) { |
| field_name = StrCat("\"", field->full_name(), "\""); |
| } |
| format("::_pbi::VerifyUTF8(str, $1$)", field_name); |
| switch (level) { |
| case Utf8CheckMode::kNone: |
| return; |
| case Utf8CheckMode::kVerify: |
| format( |
| ";\n" |
| "#endif // !NDEBUG\n"); |
| break; |
| case Utf8CheckMode::kStrict: |
| format(");\n"); |
| break; |
| } |
| } |
| |
| void ParseFunctionGenerator::GenerateLengthDelim(Formatter& format, |
| const FieldDescriptor* field) { |
| if (field->is_packable()) { |
| if (field->type() == FieldDescriptor::TYPE_ENUM && |
| !HasPreservingUnknownEnumSemantics(field)) { |
| std::string enum_type = QualifiedClassName(field->enum_type(), options_); |
| format( |
| "ptr = " |
| "::$proto_ns$::internal::Packed$1$Parser<$unknown_fields_type$>(" |
| "$msg$_internal_mutable_$name$(), ptr, ctx, $2$_IsValid, " |
| "&$msg$_internal_metadata_, $3$);\n", |
| DeclaredTypeMethodName(field->type()), enum_type, field->number()); |
| } else { |
| format( |
| "ptr = ::$proto_ns$::internal::Packed$1$Parser(" |
| "$msg$_internal_mutable_$name$(), ptr, ctx);\n", |
| DeclaredTypeMethodName(field->type())); |
| } |
| format("CHK_(ptr);\n"); |
| } else { |
| auto field_type = field->type(); |
| switch (field_type) { |
| case FieldDescriptor::TYPE_STRING: |
| GenerateStrings(format, field, true /* utf8 */); |
| break; |
| case FieldDescriptor::TYPE_BYTES: |
| GenerateStrings(format, field, false /* utf8 */); |
| break; |
| case FieldDescriptor::TYPE_MESSAGE: { |
| if (field->is_map()) { |
| const FieldDescriptor* val = field->message_type()->map_value(); |
| GOOGLE_CHECK(val); |
| if (val->type() == FieldDescriptor::TYPE_ENUM && |
| !HasPreservingUnknownEnumSemantics(field)) { |
| format( |
| "auto object = " |
| "::$proto_ns$::internal::InitEnumParseWrapper<" |
| "$unknown_fields_type$>(&$msg$$field$, $1$_IsValid, " |
| "$2$, &$msg$_internal_metadata_);\n" |
| "ptr = ctx->ParseMessage(&object, ptr);\n", |
| QualifiedClassName(val->enum_type(), options_), |
| field->number()); |
| } else { |
| format("ptr = ctx->ParseMessage(&$msg$$field$, ptr);\n"); |
| } |
| } else if (IsLazy(field, options_, scc_analyzer_)) { |
| bool eager_verify = |
| IsEagerlyVerifiedLazy(field, options_, scc_analyzer_); |
| if (ShouldVerify(descriptor_, options_, scc_analyzer_)) { |
| format( |
| "ctx->set_lazy_eager_verify_func($1$);\n", |
| eager_verify |
| ? StrCat("&", ClassName(field->message_type(), true), |
| "::InternalVerify") |
| : "nullptr"); |
| } |
| if (field->real_containing_oneof()) { |
| format( |
| "if (!$msg$_internal_has_$name$()) {\n" |
| " $msg$clear_$1$();\n" |
| " $msg$$field$ = ::$proto_ns$::Arena::CreateMessage<\n" |
| " ::$proto_ns$::internal::LazyField>(" |
| "$msg$GetArenaForAllocation());\n" |
| " $msg$set_has_$name$();\n" |
| "}\n" |
| "auto* lazy_field = $msg$$field$;\n", |
| field->containing_oneof()->name()); |
| } else if (HasHasbit(field)) { |
| format( |
| "_Internal::set_has_$name$(&$has_bits$);\n" |
| "auto* lazy_field = &$msg$$field$;\n"); |
| } else { |
| format("auto* lazy_field = &$msg$$field$;\n"); |
| } |
| format( |
| "::$proto_ns$::internal::LazyFieldParseHelper<\n" |
| " ::$proto_ns$::internal::LazyField> parse_helper(\n" |
| " $1$::default_instance(),\n" |
| " $msg$GetArenaForAllocation(),\n" |
| " ::google::protobuf::internal::LazyVerifyOption::$2$,\n" |
| " lazy_field);\n" |
| "ptr = ctx->ParseMessage(&parse_helper, ptr);\n", |
| FieldMessageTypeName(field, options_), |
| eager_verify ? "kEager" : "kLazy"); |
| if (ShouldVerify(descriptor_, options_, scc_analyzer_) && |
| eager_verify) { |
| format("ctx->set_lazy_eager_verify_func(nullptr);\n"); |
| } |
| } else if (IsImplicitWeakField(field, options_, scc_analyzer_)) { |
| if (!field->is_repeated()) { |
| format( |
| "ptr = ctx->ParseMessage(_Internal::mutable_$name$($this$), " |
| "ptr);\n"); |
| } else { |
| format( |
| "ptr = ctx->ParseMessage($msg$$field$.AddWeak(" |
| "reinterpret_cast<const ::$proto_ns$::MessageLite*>($1$ptr_)" |
| "), ptr);\n", |
| QualifiedDefaultInstanceName(field->message_type(), options_)); |
| } |
| } else if (IsWeak(field, options_)) { |
| format( |
| "{\n" |
| " auto* default_ = &reinterpret_cast<const Message&>($1$);\n" |
| " ptr = ctx->ParseMessage($msg$$weak_field_map$.MutableMessage(" |
| "$2$, default_), ptr);\n" |
| "}\n", |
| QualifiedDefaultInstanceName(field->message_type(), options_), |
| field->number()); |
| } else { |
| format( |
| "ptr = ctx->ParseMessage($msg$_internal_$mutable_field$(), " |
| "ptr);\n"); |
| } |
| format("CHK_(ptr);\n"); |
| break; |
| } |
| default: |
| GOOGLE_LOG(FATAL) << "Illegal combination for length delimited wiretype " |
| << " filed type is " << field->type(); |
| } |
| } |
| } |
| |
| static bool ShouldRepeat(const FieldDescriptor* descriptor, |
| WireFormatLite::WireType wiretype) { |
| constexpr int kMaxTwoByteFieldNumber = 16 * 128; |
| return descriptor->number() < kMaxTwoByteFieldNumber && |
| descriptor->is_repeated() && |
| (!descriptor->is_packable() || |
| wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED); |
| } |
| |
| void ParseFunctionGenerator::GenerateFieldBody( |
| Formatter& format, WireFormatLite::WireType wiretype, |
| const FieldDescriptor* field) { |
| Formatter::SaveState formatter_state(&format); |
| format.AddMap( |
| {{"name", FieldName(field)}, |
| {"primitive_type", PrimitiveTypeName(options_, field->cpp_type())}}); |
| if (field->is_repeated()) { |
| format.AddMap({{"put_field", StrCat("add_", FieldName(field))}, |
| {"mutable_field", StrCat("add_", FieldName(field))}}); |
| } else { |
| format.AddMap( |
| {{"put_field", StrCat("set_", FieldName(field))}, |
| {"mutable_field", StrCat("mutable_", FieldName(field))}}); |
| } |
| uint32_t tag = WireFormatLite::MakeTag(field->number(), wiretype); |
| switch (wiretype) { |
| case WireFormatLite::WIRETYPE_VARINT: { |
| std::string type = PrimitiveTypeName(options_, field->cpp_type()); |
| if (field->type() == FieldDescriptor::TYPE_ENUM) { |
| format.Set("enum_type", |
| QualifiedClassName(field->enum_type(), options_)); |
| format( |
| "$uint64$ val = ::$proto_ns$::internal::ReadVarint64(&ptr);\n" |
| "CHK_(ptr);\n"); |
| if (!HasPreservingUnknownEnumSemantics(field)) { |
| format("if (PROTOBUF_PREDICT_TRUE($enum_type$_IsValid(val))) {\n"); |
| format.Indent(); |
| } |
| format("$msg$_internal_$put_field$(static_cast<$enum_type$>(val));\n"); |
| if (!HasPreservingUnknownEnumSemantics(field)) { |
| format.Outdent(); |
| format( |
| "} else {\n" |
| " ::$proto_ns$::internal::WriteVarint(" |
| "$1$, val, $msg$mutable_unknown_fields());\n" |
| "}\n", |
| field->number()); |
| } |
| } else { |
| std::string size = (field->type() == FieldDescriptor::TYPE_INT32 || |
| field->type() == FieldDescriptor::TYPE_SINT32 || |
| field->type() == FieldDescriptor::TYPE_UINT32) |
| ? "32" |
| : "64"; |
| std::string zigzag; |
| if ((field->type() == FieldDescriptor::TYPE_SINT32 || |
| field->type() == FieldDescriptor::TYPE_SINT64)) { |
| zigzag = "ZigZag"; |
| } |
| if (field->is_repeated() || field->real_containing_oneof()) { |
| format( |
| "$msg$_internal_$put_field$(" |
| "::$proto_ns$::internal::ReadVarint$1$$2$(&ptr));\n" |
| "CHK_(ptr);\n", |
| zigzag, size); |
| } else { |
| if (HasHasbit(field)) { |
| format("_Internal::set_has_$name$(&$has_bits$);\n"); |
| } |
| format( |
| "$msg$$field$ = ::$proto_ns$::internal::ReadVarint$1$$2$(&ptr);\n" |
| "CHK_(ptr);\n", |
| zigzag, size); |
| } |
| } |
| break; |
| } |
| case WireFormatLite::WIRETYPE_FIXED32: |
| case WireFormatLite::WIRETYPE_FIXED64: { |
| if (field->is_repeated() || field->real_containing_oneof()) { |
| format( |
| "$msg$_internal_$put_field$(" |
| "::$proto_ns$::internal::UnalignedLoad<$primitive_type$>(ptr));\n" |
| "ptr += sizeof($primitive_type$);\n"); |
| } else { |
| if (HasHasbit(field)) { |
| format("_Internal::set_has_$name$(&$has_bits$);\n"); |
| } |
| format( |
| "$msg$$field$ = " |
| "::$proto_ns$::internal::UnalignedLoad<$primitive_type$>(ptr);\n" |
| "ptr += sizeof($primitive_type$);\n"); |
| } |
| break; |
| } |
| case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: { |
| GenerateLengthDelim(format, field); |
| break; |
| } |
| case WireFormatLite::WIRETYPE_START_GROUP: { |
| format( |
| "ptr = ctx->ParseGroup($msg$_internal_$mutable_field$(), ptr, $1$);\n" |
| "CHK_(ptr);\n", |
| tag); |
| break; |
| } |
| case WireFormatLite::WIRETYPE_END_GROUP: { |
| GOOGLE_LOG(FATAL) << "Can't have end group field\n"; |
| break; |
| } |
| } // switch (wire_type) |
| } |
| |
| // Returns the tag for this field and in case of repeated packable fields, |
| // sets a fallback tag in fallback_tag_ptr. |
| static uint32_t ExpectedTag(const FieldDescriptor* field, |
| uint32_t* fallback_tag_ptr) { |
| uint32_t expected_tag; |
| if (field->is_packable()) { |
| auto expected_wiretype = WireFormat::WireTypeForFieldType(field->type()); |
| expected_tag = WireFormatLite::MakeTag(field->number(), expected_wiretype); |
| GOOGLE_CHECK(expected_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED); |
| auto fallback_wiretype = WireFormatLite::WIRETYPE_LENGTH_DELIMITED; |
| uint32_t fallback_tag = |
| WireFormatLite::MakeTag(field->number(), fallback_wiretype); |
| |
| if (field->is_packed()) std::swap(expected_tag, fallback_tag); |
| *fallback_tag_ptr = fallback_tag; |
| } else { |
| auto expected_wiretype = WireFormat::WireTypeForField(field); |
| expected_tag = WireFormatLite::MakeTag(field->number(), expected_wiretype); |
| } |
| return expected_tag; |
| } |
| |
| // These variables are used by the generated parse iteration, and must already |
| // be defined in the generated code: |
| // - `const char* ptr`: the input buffer. |
| // - `ParseContext* ctx`: the associated context for `ptr`. |
| // - implicit `this`: i.e., we must be in a non-static member function. |
| // |
| // The macro `CHK_(x)` must be defined. It should return an error condition if |
| // the macro parameter is false. |
| // |
| // Whenever an END_GROUP tag was read, or tag 0 was read, the generated code |
| // branches to the label `message_done`. |
| // |
| // These formatter variables are used: |
| // - `next_tag`: a single statement to begin parsing the next tag. |
| // |
| // At the end of the generated code, the enclosing function should proceed to |
| // parse the next tag in the stream. |
| void ParseFunctionGenerator::GenerateParseIterationBody( |
| Formatter& format, const Descriptor* descriptor, |
| const std::vector<const FieldDescriptor*>& fields) { |
| if (!fields.empty()) { |
| GenerateFieldSwitch(format, fields); |
| // Each field `case` only considers field number. Field numbers that are |
| // not defined in the message, or tags with an incompatible wire type, are |
| // considered "unusual" cases. They will be handled by the logic below. |
| format.Outdent(); |
| format("handle_unusual:\n"); |
| format.Indent(); |
| } |
| |
| // Unusual/extension/unknown case: |
| format( |
| "if ((tag == 0) || ((tag & 7) == 4)) {\n" |
| " CHK_(ptr);\n" |
| " ctx->SetLastTag(tag);\n" |
| " goto message_done;\n" |
| "}\n"); |
| if (IsMapEntryMessage(descriptor)) { |
| format("$next_tag$;\n"); |
| } else { |
| if (descriptor->extension_range_count() > 0) { |
| format("if ("); |
| for (int i = 0; i < descriptor->extension_range_count(); i++) { |
| const Descriptor::ExtensionRange* range = |
| descriptor->extension_range(i); |
| if (i > 0) format(" ||\n "); |
| |
| uint32_t start_tag = WireFormatLite::MakeTag( |
| range->start, static_cast<WireFormatLite::WireType>(0)); |
| uint32_t end_tag = WireFormatLite::MakeTag( |
| range->end, static_cast<WireFormatLite::WireType>(0)); |
| |
| if (range->end > FieldDescriptor::kMaxNumber) { |
| format("($1$u <= tag)", start_tag); |
| } else { |
| format("($1$u <= tag && tag < $2$u)", start_tag, end_tag); |
| } |
| } |
| format( |
| ") {\n" |
| " ptr = $msg$$extensions$.ParseField(tag, ptr, " |
| "internal_default_instance(), &$msg$_internal_metadata_, ctx);\n" |
| " CHK_(ptr != nullptr);\n" |
| " $next_tag$;\n" |
| "}\n"); |
| } |
| format( |
| "ptr = UnknownFieldParse(\n" |
| " tag,\n" |
| " $msg$_internal_metadata_.mutable_unknown_fields<" |
| "$unknown_fields_type$>(),\n" |
| " ptr, ctx);\n" |
| "CHK_(ptr != nullptr);\n"); |
| } |
| } |
| |
| void ParseFunctionGenerator::GenerateFieldSwitch( |
| Formatter& format, const std::vector<const FieldDescriptor*>& fields) { |
| format("switch (tag >> 3) {\n"); |
| format.Indent(); |
| |
| for (const auto* field : fields) { |
| bool cold = ShouldSplit(field, options_); |
| format.Set("field", FieldMemberName(field, cold)); |
| PrintFieldComment(format, field); |
| format("case $1$:\n", field->number()); |
| format.Indent(); |
| uint32_t fallback_tag = 0; |
| uint32_t expected_tag = ExpectedTag(field, &fallback_tag); |
| format("if (PROTOBUF_PREDICT_TRUE(static_cast<$uint8$>(tag) == $1$)) {\n", |
| expected_tag & 0xFF); |
| format.Indent(); |
| if (cold) { |
| format("$msg$PrepareSplitMessageForWrite();\n"); |
| } |
| auto wiretype = WireFormatLite::GetTagWireType(expected_tag); |
| uint32_t tag = WireFormatLite::MakeTag(field->number(), wiretype); |
| int tag_size = io::CodedOutputStream::VarintSize32(tag); |
| bool is_repeat = ShouldRepeat(field, wiretype); |
| if (is_repeat) { |
| format( |
| "ptr -= $1$;\n" |
| "do {\n" |
| " ptr += $1$;\n", |
| tag_size); |
| format.Indent(); |
| } |
| GenerateFieldBody(format, wiretype, field); |
| if (is_repeat) { |
| format.Outdent(); |
| format( |
| " if (!ctx->DataAvailable(ptr)) break;\n" |
| "} while (::$proto_ns$::internal::ExpectTag<$1$>(ptr));\n", |
| tag); |
| } |
| format.Outdent(); |
| if (fallback_tag) { |
| format("} else if (static_cast<$uint8$>(tag) == $1$) {\n", |
| fallback_tag & 0xFF); |
| format.Indent(); |
| GenerateFieldBody(format, WireFormatLite::GetTagWireType(fallback_tag), |
| field); |
| format.Outdent(); |
| } |
| format( |
| "} else\n" |
| " goto handle_unusual;\n" |
| "$next_tag$;\n"); |
| format.Outdent(); |
| } // for loop over ordered fields |
| |
| format( |
| "default:\n" |
| " goto handle_unusual;\n"); |
| format.Outdent(); |
| format("} // switch\n"); |
| } |
| |
| namespace { |
| |
| std::string FieldParseFunctionName( |
| const TailCallTableInfo::FieldEntryInfo& entry, const Options& options) { |
| const FieldDescriptor* field = entry.field; |
| std::string name = "::_pbi::TcParser::Fast"; |
| |
| switch (field->type()) { |
| case FieldDescriptor::TYPE_FIXED32: |
| case FieldDescriptor::TYPE_SFIXED32: |
| case FieldDescriptor::TYPE_FLOAT: |
| name.append("F32"); |
| break; |
| |
| case FieldDescriptor::TYPE_FIXED64: |
| case FieldDescriptor::TYPE_SFIXED64: |
| case FieldDescriptor::TYPE_DOUBLE: |
| name.append("F64"); |
| break; |
| |
| case FieldDescriptor::TYPE_BOOL: |
| name.append("V8"); |
| break; |
| case FieldDescriptor::TYPE_INT32: |
| case FieldDescriptor::TYPE_UINT32: |
| name.append("V32"); |
| break; |
| case FieldDescriptor::TYPE_INT64: |
| case FieldDescriptor::TYPE_UINT64: |
| name.append("V64"); |
| break; |
| |
| case FieldDescriptor::TYPE_ENUM: |
| if (HasPreservingUnknownEnumSemantics(field)) { |
| name.append("V32"); |
| break; |
| } |
| if (field->is_repeated() && field->is_packed()) { |
| GOOGLE_LOG(DFATAL) << "Enum validation not handled: " << field->DebugString(); |
| return ""; |
| } |
| name.append(entry.is_enum_range ? "Er" : "Ev"); |
| break; |
| |
| case FieldDescriptor::TYPE_SINT32: |
| name.append("Z32"); |
| break; |
| case FieldDescriptor::TYPE_SINT64: |
| name.append("Z64"); |
| break; |
| |
| case FieldDescriptor::TYPE_BYTES: |
| name.append("B"); |
| if (IsStringInlined(field, options)) { |
| name.append("i"); |
| } |
| break; |
| case FieldDescriptor::TYPE_STRING: |
| switch (GetUtf8CheckMode(field, options)) { |
| case Utf8CheckMode::kNone: |
| name.append("B"); |
| break; |
| case Utf8CheckMode::kVerify: |
| name.append("S"); |
| break; |
| case Utf8CheckMode::kStrict: |
| name.append("U"); |
| break; |
| default: |
| GOOGLE_LOG(DFATAL) << "Mode not handled: " |
| << static_cast<int>(GetUtf8CheckMode(field, options)); |
| return ""; |
| } |
| if (IsStringInlined(field, options)) { |
| name.append("i"); |
| } |
| break; |
| |
| case FieldDescriptor::TYPE_MESSAGE: |
| name.append("M"); |
| break; |
| case FieldDescriptor::TYPE_GROUP: |
| name.append("G"); |
| break; |
| |
| default: |
| GOOGLE_LOG(DFATAL) << "Type not handled: " << field->DebugString(); |
| return ""; |
| } |
| |
| // The field implementation functions are prefixed by cardinality: |
| // `S` for optional or implicit fields. |
| // `R` for non-packed repeated. |
| // `P` for packed repeated. |
| name.append(field->is_packed() ? "P" |
| : field->is_repeated() ? "R" |
| : field->real_containing_oneof() ? "O" |
| : "S"); |
| |
| // Append the tag length. Fast parsing only handles 1- or 2-byte tags. |
| name.append(TagSize(field->number()) == 1 ? "1" : "2"); |
| |
| return name; |
| } |
| |
| } // namespace |
| |
| } // namespace cpp |
| } // namespace compiler |
| } // namespace protobuf |
| } // namespace google |