| // Copyright 2019 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| syntax = "proto2"; |
| |
| package icing.lib; |
| |
| import "icing/proto/status.proto"; |
| import "icing/proto/term.proto"; |
| |
| option java_package = "com.google.android.icing.proto"; |
| option java_multiple_files = true; |
| option objc_class_prefix = "ICNG"; |
| |
| // Defines the schema that every Document of a specific "type" should adhere |
| // to. These can be considered as definitions of rich structured types for |
| // Documents accepted by IcingSearchEngine. |
| // |
| // NOTE: Instances of SchemaTypeConfigProto are strongly recommended to be |
| // based on types defined in schema.org. This makes the data/config/code more |
| // shareable and easier to extend in the future. |
| // |
| // TODO(cassiewang) Define a sample proto file that can be used by tests and for |
| // documentation. |
| // |
| // Next tag: 8 |
| message SchemaTypeConfigProto { |
| // REQUIRED: Named type that uniquely identifies the structured, logical |
| // schema being defined. |
| // |
| // Recommended format: Human readable string that's one of the types defined |
| // in http://schema.org. Eg: DigitalDocument, Message, Person, etc. |
| optional string schema_type = 1; |
| |
| // OPTIONAL: A natural language description of the SchemaTypeConfigProto. |
| // |
| // This string is not used by Icing in any way. It simply exists to allow |
| // users to store semantic information about the SchemaTypeConfigProto for |
| // future retrieval. |
| optional string description = 7; |
| |
| // List of all properties that are supported by Documents of this type. |
| // An Document should never have properties that are not listed here. |
| // |
| // TODO(cassiewang) Figure out if we should disallow, ignore or accept |
| // unknown properties. Accepting them could make switching between versions |
| // easier. |
| repeated PropertyConfigProto properties = 4; |
| |
| // Version is an arbitrary number that the client may use to keep track of |
| // different incarnations of the schema. Icing library imposes no requirements |
| // on this field and will not validate it in anyway. If a client calls |
| // SetSchema with a schema that contains one or more new version numbers, then |
| // those version numbers will be updated so long as the SetSchema call |
| // succeeds. Clients are free to leave the version number unset, in which case |
| // it will default to value == 0. |
| optional int32 version = 5; |
| |
| // An experimental field to make the type as a subtype of parent_types, which |
| // enables parent_types to be interpreted as its subtypes in the context of |
| // the Search APIs, including schema type filters and projections specified in |
| // TypePropertyMask. |
| repeated string parent_types = 6; |
| |
| reserved 2, 3; |
| } |
| |
| // Describes how a string property should be indexed. |
| // Next tag: 3 |
| message StringIndexingConfig { |
| // Indicates how the content of this property should be matched in the index. |
| // |
| // TermMatchType.Code=UNKNOWN |
| // Content in this property will not be tokenized or indexed. Useful if the |
| // data type is not indexable. See schema-util for details. |
| // |
| // TermMatchType.Code=EXACT_ONLY |
| // Content in this property should only be returned for queries matching the |
| // exact tokens appearing in this property. |
| // Ex. A property with "fool" should NOT match a query for "foo". |
| // |
| // TermMatchType.Code=PREFIX |
| // Content in this property should be returned for queries that are either |
| // exact matches or query matches of the tokens appearing in this property. |
| // Ex. A property with "fool" *should* match a query for "foo". |
| optional TermMatchType.Code term_match_type = 1; |
| |
| message TokenizerType { |
| enum Code { |
| // It is only valid for tokenizer_type to be 'NONE' if the data type is |
| // not indexed. |
| NONE = 0; |
| |
| // Tokenization for plain text. |
| PLAIN = 1; |
| |
| // Tokenizes text in verbatim. This means no normalization or segmentation |
| // is applied to string values that are tokenized using this type. |
| // Therefore, the output token is equivalent to the raw string text. For |
| // example, "Hello, world!" would be tokenized as "Hello, world!" |
| // preserving punctuation and capitalization, and not creating separate |
| // tokens between the space. |
| VERBATIM = 2; |
| |
| // Tokenizes text as an email address. This means it will tokenize a |
| // string into multiple emails, and further tokenize those into parts of |
| // an email address. These parts include the local address, host |
| // components, local components, as well as the name and comments. For |
| // example, "User (comment) <user@domain.com>" would be tokenized into a |
| // "User" name token, a "comment" comment token, a "user" local address, a |
| // "user" local component token, a "domain" host component token, a "com" |
| // host component token, a "user@domain.com" address token, and the entire |
| // original string as an rfc822 token. |
| // See more here: https://datatracker.ietf.org/doc/html/rfc822 |
| RFC822 = 3; |
| |
| // Tokenizes text as an url address. This tokenizes a url string into a |
| // token for each component in the url, as well as any significant |
| // url suffixes. For example, |
| // https://www.google.com/path/subpath?query#ref would be tokenizes into a |
| // scheme token "https“; 3 host tokens "www", "google", "com"; 2 path |
| // tokens "path", "subpath"; a query token "query"; a reference token |
| // "ref"; and 3 suffix tokens |
| // "https://www.google.com/path/subpath?query#ref", |
| // "www.google.com/path/subpath?query#ref", |
| // "google.com/path/subpath?query#ref". |
| // Currently only supports tokenization of one url string at a time |
| // i.e. the input string cannot have spaces in the middle, but can have |
| // leading or trailing spaces. |
| URL = 4; |
| } |
| } |
| optional TokenizerType.Code tokenizer_type = 2; |
| } |
| |
| // Describes how a document property should be indexed. |
| // Next tag: 3 |
| message DocumentIndexingConfig { |
| // OPTIONAL: Whether nested properties within the document property should be |
| // indexed. If true, then all nested properties will be indexed according to |
| // the property's own indexing configurations. If false, nested documents' |
| // properties will not be indexed even if they have an indexing configuration. |
| // |
| // The default value is false. |
| optional bool index_nested_properties = 1; |
| |
| // List of nested properties within the document to index. Only the |
| // provided list of properties will be indexed according to the property's |
| // indexing configurations. |
| // |
| // index_nested_properties must be false in order to use this feature. |
| repeated string indexable_nested_properties_list = 2; |
| } |
| |
| // Describes how a int64 property should be indexed. |
| // Next tag: 3 |
| message IntegerIndexingConfig { |
| // OPTIONAL: Indicates how the int64 contents of this property should be |
| // matched. |
| // |
| // The default value is UNKNOWN. |
| message NumericMatchType { |
| enum Code { |
| // Contents in this property will not be indexed. Useful if the int64 |
| // property type is not indexable. |
| UNKNOWN = 0; |
| |
| // Contents in this property should only be returned for queries matching |
| // the range. |
| RANGE = 1; |
| } |
| } |
| optional NumericMatchType.Code numeric_match_type = 1; |
| } |
| |
| // Describes how a property can be used to join this document with another |
| // document. See JoinSpecProto (in search.proto) for more details. |
| // Next tag: 3 |
| message JoinableConfig { |
| // OPTIONAL: Indicates what joinable type the content value of this property |
| // is. |
| // |
| // The default value is NONE. |
| message ValueType { |
| enum Code { |
| // Value in this property is not joinable. |
| NONE = 0; |
| |
| // Value in this property is a joinable (string) qualified id, which is |
| // composed of namespace and uri. |
| // See JoinSpecProto (in search.proto) and DocumentProto (in |
| // document.proto) for more details about qualified id, namespace and uri. |
| QUALIFIED_ID = 1; |
| } |
| } |
| optional ValueType.Code value_type = 1; |
| |
| // If the parent document a child document is joined to is deleted, delete the |
| // child document as well. This will only apply to children joined through |
| // QUALIFIED_ID, other (future) joinable value types won't use it. |
| optional bool propagate_delete = 2 [default = false]; |
| } |
| |
| // Describes the schema of a single property of Documents that belong to a |
| // specific SchemaTypeConfigProto. These can be considered as a rich, structured |
| // type for each property of Documents accepted by IcingSearchEngine. |
| // Next tag: 10 |
| message PropertyConfigProto { |
| // REQUIRED: Name that uniquely identifies a property within an Document of |
| // a specific SchemaTypeConfigProto. |
| // |
| // Recommended format: Human readable string that's one of the properties |
| // defined in schema.org for the parent SchemaTypeConfigProto. |
| // Eg: 'author' for http://schema.org/DigitalDocument. |
| // Eg: 'address' for http://schema.org/Place. |
| optional string property_name = 1; |
| |
| // OPTIONAL: A natural language description of the property. |
| // |
| // This string is not used by Icing in any way. It simply exists to allow |
| // users to store semantic information about the PropertyConfigProto for |
| // future retrieval. |
| optional string description = 9; |
| |
| // REQUIRED: Physical data-types of the contents of the property. |
| message DataType { |
| enum Code { |
| // This value should never purposely be used. This is used for backwards |
| // compatibility reasons. |
| UNKNOWN = 0; |
| |
| STRING = 1; |
| INT64 = 2; |
| DOUBLE = 3; |
| BOOLEAN = 4; |
| |
| // Unstructured BLOB. |
| BYTES = 5; |
| |
| // Indicates that the property itself is an Document, making it part |
| // a hierarchical Document schema. Any property using this data_type |
| // MUST have a valid 'schema_type'. |
| DOCUMENT = 6; |
| } |
| } |
| optional DataType.Code data_type = 2; |
| |
| // REQUIRED if (data_type == DOCUMENT). OPTIONAL otherwise. |
| // Indicates the logical schema-type of the contents of this property. |
| // |
| // TODO(cassiewang): This could be useful for non-document properties, e.g. |
| // to set this field as a schema.org/address for some string property. |
| // Re-evaluate what recommendation we should give clients if we want to start |
| // using this for non-document properties as well. |
| // |
| // Recommended format: Human readable string that is one of the types defined |
| // in schema.org, matching the SchemaTypeConfigProto.schema_type of another |
| // type. |
| optional string schema_type = 3; |
| |
| // REQUIRED: The cardinality of the property. |
| message Cardinality { |
| // NOTE: The order of the cardinality is purposefully set to be from least |
| // restrictive (REPEATED) to most restrictive (REQUIRED). This makes it |
| // easier to check if a field is backwards compatible by doing a simple |
| // greater-than/less-than check on the enum ints. Changing/adding new |
| // cardinalities should be done cautiously. |
| enum Code { |
| // This should never purposely be set. This is used for backwards |
| // compatibility reasons. |
| UNKNOWN = 0; |
| |
| // Any number of items (including zero) [0...*]. |
| REPEATED = 1; |
| |
| // Zero or one value [0,1]. |
| OPTIONAL = 2; |
| |
| // Exactly one value [1]. |
| REQUIRED = 3; |
| } |
| } |
| optional Cardinality.Code cardinality = 4; |
| |
| // OPTIONAL: Describes how string properties should be indexed. String |
| // properties that do not set the indexing config will not be indexed. |
| optional StringIndexingConfig string_indexing_config = 5; |
| |
| // OPTIONAL: Describes how document properties should be indexed. |
| optional DocumentIndexingConfig document_indexing_config = 6; |
| |
| // OPTIONAL: Describes how int64 properties should be indexed. Int64 |
| // properties that do not set the indexing config will not be indexed. |
| optional IntegerIndexingConfig integer_indexing_config = 7; |
| |
| // OPTIONAL: Describes how string properties can be used as a document joining |
| // matcher. |
| // |
| // Note: currently we only support STRING single joining, so if a property is |
| // set as joinable (i.e. joinable_config.content_type is not NONE), then: |
| // - DataType should be STRING. Otherwise joinable_config will be ignored. |
| // - The property itself and any upper-level (nested doc) property should |
| // contain at most one element (i.e. Cardinality is OPTIONAL or REQUIRED). |
| optional JoinableConfig joinable_config = 8; |
| } |
| |
| // List of all supported types constitutes the schema used by Icing. |
| // Next tag: 2 |
| message SchemaProto { |
| repeated SchemaTypeConfigProto types = 1; |
| } |
| |
| // Result of a call to IcingSearchEngine.SetSchema |
| // Next tag: 9 |
| message SetSchemaResultProto { |
| // Status code can be one of: |
| // OK |
| // INVALID_ARGUMENT |
| // FAILED_PRECONDITION |
| // INTERNAL |
| // |
| // See status.proto for more details. |
| // |
| // TODO(b/147699081): Fix error codes: +ABORTED, +WARNING_DATA_LOSS, |
| // -INTERNAL. go/icing-library-apis. |
| optional StatusProto status = 1; |
| |
| // Schema types that existed in the previous schema, but were deleted from the |
| // new schema. If ignore_errors_and_delete_documents=true, then all documents |
| // of these types were also deleted. |
| repeated string deleted_schema_types = 2; |
| |
| // Schema types that existed in the previous schema and were incompatible with |
| // the new schema type. If ignore_errors_and_delete_documents=true, then any |
| // documents that fail validation against the new schema types would also be |
| // deleted. |
| repeated string incompatible_schema_types = 3; |
| |
| // Schema types that did not exist in the previous schema and were added with |
| // the new schema type. |
| repeated string new_schema_types = 4; |
| |
| // Schema types that were changed in a way that was backwards compatible and |
| // didn't invalidate the index. |
| repeated string fully_compatible_changed_schema_types = 5; |
| |
| // Schema types that were changed in a way that was backwards compatible, but |
| // invalidated the index. |
| repeated string index_incompatible_changed_schema_types = 6; |
| |
| // Overall time used for the function call. |
| optional int32 latency_ms = 7; |
| |
| // Schema types that were changed in a way that was backwards compatible, but |
| // invalidated the joinable cache. |
| // |
| // For example, a property was set non joinable in the old schema definition, |
| // but changed to joinable in the new definition. In this case, this property |
| // will be considered join incompatible when setting new schema. |
| repeated string join_incompatible_changed_schema_types = 8; |
| } |
| |
| // Result of a call to IcingSearchEngine.GetSchema |
| // Next tag: 3 |
| message GetSchemaResultProto { |
| // Status code can be one of: |
| // OK |
| // FAILED_PRECONDITION |
| // NOT_FOUND |
| // INTERNAL |
| // |
| // See status.proto for more details. |
| // |
| // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL |
| // go/icing-library-apis. |
| optional StatusProto status = 1; |
| |
| // Copy of the Schema proto. Modifying this does not affect the Schema that |
| // IcingSearchEngine holds. |
| optional SchemaProto schema = 2; |
| } |
| |
| // Result of a call to IcingSearchEngine.GetSchemaType |
| // Next tag: 3 |
| message GetSchemaTypeResultProto { |
| // Status code can be one of: |
| // OK |
| // FAILED_PRECONDITION |
| // NOT_FOUND |
| // INTERNAL |
| // |
| // See status.proto for more details. |
| // |
| // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL |
| // go/icing-library-apis. |
| optional StatusProto status = 1; |
| |
| // Copy of the SchemaTypeConfig proto with the specified schema_type. |
| // Modifying this does not affect the SchemaTypeConfig that IcingSearchEngine |
| // holds. |
| optional SchemaTypeConfigProto schema_type_config = 2; |
| } |