blob: 78e15883813eb0ca13a0dfdf9395f05b7a3f276a [file] [log] [blame]
// Copyright 2019 Google LLC
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package icing.lib;
import "icing/proto/status.proto";
import "icing/proto/term.proto";
option java_package = "";
option java_multiple_files = true;
option objc_class_prefix = "ICNG";
// Defines the schema that every Document of a specific "type" should adhere
// to. These can be considered as definitions of rich structured types for
// Documents accepted by IcingSearchEngine.
// NOTE: Instances of SchemaTypeConfigProto are strongly recommended to be
// based on types defined in This makes the data/config/code more
// shareable and easier to extend in the future.
// TODO(cassiewang) Define a sample proto file that can be used by tests and for
// documentation.
// Next tag: 8
message SchemaTypeConfigProto {
// REQUIRED: Named type that uniquely identifies the structured, logical
// schema being defined.
// Recommended format: Human readable string that's one of the types defined
// in Eg: DigitalDocument, Message, Person, etc.
optional string schema_type = 1;
// OPTIONAL: A natural language description of the SchemaTypeConfigProto.
// This string is not used by Icing in any way. It simply exists to allow
// users to store semantic information about the SchemaTypeConfigProto for
// future retrieval.
optional string description = 7;
// List of all properties that are supported by Documents of this type.
// An Document should never have properties that are not listed here.
// TODO(cassiewang) Figure out if we should disallow, ignore or accept
// unknown properties. Accepting them could make switching between versions
// easier.
repeated PropertyConfigProto properties = 4;
// Version is an arbitrary number that the client may use to keep track of
// different incarnations of the schema. Icing library imposes no requirements
// on this field and will not validate it in anyway. If a client calls
// SetSchema with a schema that contains one or more new version numbers, then
// those version numbers will be updated so long as the SetSchema call
// succeeds. Clients are free to leave the version number unset, in which case
// it will default to value == 0.
optional int32 version = 5;
// An experimental field to make the type as a subtype of parent_types, which
// enables parent_types to be interpreted as its subtypes in the context of
// the Search APIs, including schema type filters and projections specified in
// TypePropertyMask.
repeated string parent_types = 6;
reserved 2, 3;
// Describes how a string property should be indexed.
// Next tag: 3
message StringIndexingConfig {
// Indicates how the content of this property should be matched in the index.
// TermMatchType.Code=UNKNOWN
// Content in this property will not be tokenized or indexed. Useful if the
// data type is not indexable. See schema-util for details.
// TermMatchType.Code=EXACT_ONLY
// Content in this property should only be returned for queries matching the
// exact tokens appearing in this property.
// Ex. A property with "fool" should NOT match a query for "foo".
// TermMatchType.Code=PREFIX
// Content in this property should be returned for queries that are either
// exact matches or query matches of the tokens appearing in this property.
// Ex. A property with "fool" *should* match a query for "foo".
optional TermMatchType.Code term_match_type = 1;
message TokenizerType {
enum Code {
// It is only valid for tokenizer_type to be 'NONE' if the data type is
// not indexed.
NONE = 0;
// Tokenization for plain text.
PLAIN = 1;
// Tokenizes text in verbatim. This means no normalization or segmentation
// is applied to string values that are tokenized using this type.
// Therefore, the output token is equivalent to the raw string text. For
// example, "Hello, world!" would be tokenized as "Hello, world!"
// preserving punctuation and capitalization, and not creating separate
// tokens between the space.
// Tokenizes text as an email address. This means it will tokenize a
// string into multiple emails, and further tokenize those into parts of
// an email address. These parts include the local address, host
// components, local components, as well as the name and comments. For
// example, "User (comment) <>" would be tokenized into a
// "User" name token, a "comment" comment token, a "user" local address, a
// "user" local component token, a "domain" host component token, a "com"
// host component token, a "" address token, and the entire
// original string as an rfc822 token.
// See more here:
RFC822 = 3;
// Tokenizes text as an url address. This tokenizes a url string into a
// token for each component in the url, as well as any significant
// url suffixes. For example,
// would be tokenizes into a
// scheme token "https“; 3 host tokens "www", "google", "com"; 2 path
// tokens "path", "subpath"; a query token "query"; a reference token
// "ref"; and 3 suffix tokens
// "",
// "",
// "".
// Currently only supports tokenization of one url string at a time
// i.e. the input string cannot have spaces in the middle, but can have
// leading or trailing spaces.
URL = 4;
optional TokenizerType.Code tokenizer_type = 2;
// Describes how a document property should be indexed.
// Next tag: 3
message DocumentIndexingConfig {
// OPTIONAL: Whether nested properties within the document property should be
// indexed. If true, then all nested properties will be indexed according to
// the property's own indexing configurations. If false, nested documents'
// properties will not be indexed even if they have an indexing configuration.
// The default value is false.
optional bool index_nested_properties = 1;
// List of nested properties within the document to index. Only the
// provided list of properties will be indexed according to the property's
// indexing configurations.
// index_nested_properties must be false in order to use this feature.
repeated string indexable_nested_properties_list = 2;
// Describes how a int64 property should be indexed.
// Next tag: 3
message IntegerIndexingConfig {
// OPTIONAL: Indicates how the int64 contents of this property should be
// matched.
// The default value is UNKNOWN.
message NumericMatchType {
enum Code {
// Contents in this property will not be indexed. Useful if the int64
// property type is not indexable.
// Contents in this property should only be returned for queries matching
// the range.
RANGE = 1;
optional NumericMatchType.Code numeric_match_type = 1;
// Describes how a property can be used to join this document with another
// document. See JoinSpecProto (in search.proto) for more details.
// Next tag: 3
message JoinableConfig {
// OPTIONAL: Indicates what joinable type the content value of this property
// is.
// The default value is NONE.
message ValueType {
enum Code {
// Value in this property is not joinable.
NONE = 0;
// Value in this property is a joinable (string) qualified id, which is
// composed of namespace and uri.
// See JoinSpecProto (in search.proto) and DocumentProto (in
// document.proto) for more details about qualified id, namespace and uri.
optional ValueType.Code value_type = 1;
// If the parent document a child document is joined to is deleted, delete the
// child document as well. This will only apply to children joined through
// QUALIFIED_ID, other (future) joinable value types won't use it.
optional bool propagate_delete = 2 [default = false];
// Describes the schema of a single property of Documents that belong to a
// specific SchemaTypeConfigProto. These can be considered as a rich, structured
// type for each property of Documents accepted by IcingSearchEngine.
// Next tag: 10
message PropertyConfigProto {
// REQUIRED: Name that uniquely identifies a property within an Document of
// a specific SchemaTypeConfigProto.
// Recommended format: Human readable string that's one of the properties
// defined in for the parent SchemaTypeConfigProto.
// Eg: 'author' for
// Eg: 'address' for
optional string property_name = 1;
// OPTIONAL: A natural language description of the property.
// This string is not used by Icing in any way. It simply exists to allow
// users to store semantic information about the PropertyConfigProto for
// future retrieval.
optional string description = 9;
// REQUIRED: Physical data-types of the contents of the property.
message DataType {
enum Code {
// This value should never purposely be used. This is used for backwards
// compatibility reasons.
INT64 = 2;
// Unstructured BLOB.
BYTES = 5;
// Indicates that the property itself is an Document, making it part
// a hierarchical Document schema. Any property using this data_type
// MUST have a valid 'schema_type'.
optional DataType.Code data_type = 2;
// REQUIRED if (data_type == DOCUMENT). OPTIONAL otherwise.
// Indicates the logical schema-type of the contents of this property.
// TODO(cassiewang): This could be useful for non-document properties, e.g.
// to set this field as a for some string property.
// Re-evaluate what recommendation we should give clients if we want to start
// using this for non-document properties as well.
// Recommended format: Human readable string that is one of the types defined
// in, matching the SchemaTypeConfigProto.schema_type of another
// type.
optional string schema_type = 3;
// REQUIRED: The cardinality of the property.
message Cardinality {
// NOTE: The order of the cardinality is purposefully set to be from least
// restrictive (REPEATED) to most restrictive (REQUIRED). This makes it
// easier to check if a field is backwards compatible by doing a simple
// greater-than/less-than check on the enum ints. Changing/adding new
// cardinalities should be done cautiously.
enum Code {
// This should never purposely be set. This is used for backwards
// compatibility reasons.
// Any number of items (including zero) [0...*].
// Zero or one value [0,1].
// Exactly one value [1].
optional Cardinality.Code cardinality = 4;
// OPTIONAL: Describes how string properties should be indexed. String
// properties that do not set the indexing config will not be indexed.
optional StringIndexingConfig string_indexing_config = 5;
// OPTIONAL: Describes how document properties should be indexed.
optional DocumentIndexingConfig document_indexing_config = 6;
// OPTIONAL: Describes how int64 properties should be indexed. Int64
// properties that do not set the indexing config will not be indexed.
optional IntegerIndexingConfig integer_indexing_config = 7;
// OPTIONAL: Describes how string properties can be used as a document joining
// matcher.
// Note: currently we only support STRING single joining, so if a property is
// set as joinable (i.e. joinable_config.content_type is not NONE), then:
// - DataType should be STRING. Otherwise joinable_config will be ignored.
// - The property itself and any upper-level (nested doc) property should
// contain at most one element (i.e. Cardinality is OPTIONAL or REQUIRED).
optional JoinableConfig joinable_config = 8;
// List of all supported types constitutes the schema used by Icing.
// Next tag: 2
message SchemaProto {
repeated SchemaTypeConfigProto types = 1;
// Result of a call to IcingSearchEngine.SetSchema
// Next tag: 9
message SetSchemaResultProto {
// Status code can be one of:
// OK
// See status.proto for more details.
// TODO(b/147699081): Fix error codes: +ABORTED, +WARNING_DATA_LOSS,
// -INTERNAL. go/icing-library-apis.
optional StatusProto status = 1;
// Schema types that existed in the previous schema, but were deleted from the
// new schema. If ignore_errors_and_delete_documents=true, then all documents
// of these types were also deleted.
repeated string deleted_schema_types = 2;
// Schema types that existed in the previous schema and were incompatible with
// the new schema type. If ignore_errors_and_delete_documents=true, then any
// documents that fail validation against the new schema types would also be
// deleted.
repeated string incompatible_schema_types = 3;
// Schema types that did not exist in the previous schema and were added with
// the new schema type.
repeated string new_schema_types = 4;
// Schema types that were changed in a way that was backwards compatible and
// didn't invalidate the index.
repeated string fully_compatible_changed_schema_types = 5;
// Schema types that were changed in a way that was backwards compatible, but
// invalidated the index.
repeated string index_incompatible_changed_schema_types = 6;
// Overall time used for the function call.
optional int32 latency_ms = 7;
// Schema types that were changed in a way that was backwards compatible, but
// invalidated the joinable cache.
// For example, a property was set non joinable in the old schema definition,
// but changed to joinable in the new definition. In this case, this property
// will be considered join incompatible when setting new schema.
repeated string join_incompatible_changed_schema_types = 8;
// Result of a call to IcingSearchEngine.GetSchema
// Next tag: 3
message GetSchemaResultProto {
// Status code can be one of:
// OK
// See status.proto for more details.
// TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL
// go/icing-library-apis.
optional StatusProto status = 1;
// Copy of the Schema proto. Modifying this does not affect the Schema that
// IcingSearchEngine holds.
optional SchemaProto schema = 2;
// Result of a call to IcingSearchEngine.GetSchemaType
// Next tag: 3
message GetSchemaTypeResultProto {
// Status code can be one of:
// OK
// See status.proto for more details.
// TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL
// go/icing-library-apis.
optional StatusProto status = 1;
// Copy of the SchemaTypeConfig proto with the specified schema_type.
// Modifying this does not affect the SchemaTypeConfig that IcingSearchEngine
// holds.
optional SchemaTypeConfigProto schema_type_config = 2;