Update Cobalt proto defintions
Cobalt proto defintions hadn't been updated since June 06, 2023. Update
the protos to get the `string_hashes_ff64` field in Observation that
allows AdServices to send strings and other new fields, that are not
used by AdServices.
Bug: b/321745113
Bug: b/303091672
Test: atest AdServicesCobaltUnitTests
Change-Id: I460e26ff4d978153a37a15dba2202faa513ea9aa
diff --git a/adservices/libraries/cobalt/proto/cobalt_registry.proto b/adservices/libraries/cobalt/proto/cobalt_registry.proto
index 670cc33..8fcadb7 100644
--- a/adservices/libraries/cobalt/proto/cobalt_registry.proto
+++ b/adservices/libraries/cobalt/proto/cobalt_registry.proto
@@ -63,6 +63,11 @@
repeated MetricDefinition metrics = 3;
string project_contact = 4;
+ // Identifier of the app that is expected to log the metrics for this project.
+ // Depending on the platform, this may represent an "app" or "component", and
+ // the format of package identifiers may be different.
+ string app_package_identifier = 7;
+
// Experiment namespaces supported for experiment ids in this project.
repeated string experiments_namespaces = 5;
@@ -75,15 +80,17 @@
//
// A CobaltRegistry can be in one of two states:
//
-// (1) It can contain data for a single Cobalt project. In this case, there is a
-// single
+// (1) It can contain data for a single Cobalt project. In this case, there is a single
// CustomerConfig which contains a single ProjectConfig.
//
-// (2) It can contain data for multiple Cobalt projects. In this case, there may
-// be any number of
+// (2) It can contain data for multiple Cobalt projects. In this case, there may be any number of
// |customers|, which in turn may contain any number of ProjectConfigs.
message CobaltRegistry {
reserved 1, 2, 3, 4;
// Cobalt customer registration.
repeated CustomerConfig customers = 5;
+
+ // Customer IDs that have been previously used and deleted from the registry.
+ // These IDs must not be reused for new customers.
+ repeated uint32 deleted_customer_ids = 6;
}
diff --git a/adservices/libraries/cobalt/proto/common.proto b/adservices/libraries/cobalt/proto/common.proto
index 93018ea..7de6aa4 100644
--- a/adservices/libraries/cobalt/proto/common.proto
+++ b/adservices/libraries/cobalt/proto/common.proto
@@ -60,25 +60,25 @@
// cannot be determined, then this field will be 'unknown:<cpu signature>'.
string board_name = 4;
- // This is a string representing the type of Fuchsia product from which
- // an observation is collected.
+ // This is a string representing the type of product running on the device.
//
- // During development, this is going to refer to layers of the Fuchsia cake
- // such as "garnet", "zircon", "topaz", etc... In the future, we will use
- // something related to what sort of device we are running on, such as
- // "Acme Lightbulb X" or "Machine Corp. Laptop III".
+ // The use of this field is system-specific. For example on Fuchsia it is the
+ // experience running on the device, e.g., "smart display" or "workstation".
string product_name = 5;
// This is a string representing the version of the currently running system.
- // The use of this field is system-specific. For example on Fuchsia it is the
- // build version (aka OS version) with a value that looks like
- // "0.20200114.1.1".
+ //
+ // The use of this field is system-specific. For example, on Fuchsia, it is
+ // the build version from fuchsia.buildinfo/Provider, e.g., "0.20200114.1.1".
+ // On Android, it is from android.os.Build.VERSION.RELEASE.
string system_version = 8;
// This is a string representing the version of the app sending information.
- // The use and format is application-specific. The main anticipated use of
- // this field is for experiments and debugging. I.e. to figure out if an
- // issue is version-specific or not.
+ // There can be multiple apps running on the system and updated separately
+ // from the system so sometimes it's more relevant to slice by the app version
+ // than the system version above.
+ //
+ // The use and format of this field is application-specific.
//
// The value '<unset>' means the system did not notify Cobalt of the current
// app_version.
@@ -87,9 +87,10 @@
// not know the app_version.
string app_version = 14;
- // This is a string representation of the current channel. It is an arbitrary
- // string that depends on the system. For example on Fuchsia some possible
- // values are "qa-daily" and "fishfood".
+ // This is a string representation of the current channel the device belongs
+ // to. It is common to segment a fleet of devices into disjoint populations.
+ //
+ // The use of this field is system-specific.
//
// The value '<unset>' means the system did not notify Cobalt of the current
// channel.
diff --git a/adservices/libraries/cobalt/proto/encrypted_message.proto b/adservices/libraries/cobalt/proto/encrypted_message.proto
index f9b0167..0c5808e 100644
--- a/adservices/libraries/cobalt/proto/encrypted_message.proto
+++ b/adservices/libraries/cobalt/proto/encrypted_message.proto
@@ -35,6 +35,7 @@
// the EncryptedMessage proto to carry the ciphertext in both cases.
//
message EncryptedMessage {
+
// The different schemes used in Cobalt to encrypt a message.
enum EncryptionScheme {
// The message is not encrypted. |ciphertext| contains plaintext bytes of a
diff --git a/adservices/libraries/cobalt/proto/metric_definition.proto b/adservices/libraries/cobalt/proto/metric_definition.proto
index 5a28da0..d99fe27 100644
--- a/adservices/libraries/cobalt/proto/metric_definition.proto
+++ b/adservices/libraries/cobalt/proto/metric_definition.proto
@@ -50,21 +50,16 @@
//
// Next ID: 27
message MetricDefinition {
- reserved 6, 7, 9, 13, 17, 21, 23, 24;
- reserved "event_codes", "event_code_buffer_max", "max_event_code", "parts",
- "proto_name", "string_buffer_max", "replacement_metric_id",
- "no_replacement_metric";
+ reserved 6, 7, 9, 13, 14, 15, 17, 21, 23, 24;
+ reserved "event_codes", "event_code_buffer_max", "max_event_code", "parts", "proto_name",
+ "string_buffer_max", "replacement_metric_id", "no_replacement_metric", "customer_name",
+ "project_name";
// Unique name for this Metric within its owning project.
// The name must obey the syntax of a C variable name and must have length
// at most 64.
string metric_name = 1;
- // The Cobalt registry YAML parser will automatically set the values of
- // customer_name and project_name based on the context of the YAML file.
- string customer_name = 14;
- string project_name = 15;
-
// These three numbers form this Metric's unique numerical ID in Cobalt. The
// Cobalt registry YAML parser will automatically set the value of
// customer_id and project_id based on the context of the YAML file.
@@ -78,8 +73,8 @@
// Next ID: 12
enum MetricType {
reserved 1, 2, 3, 4, 5, 6, 7, 9999;
- reserved "CUSTOM", "ELAPSED_TIME", "EVENT_COUNT", "EVENT_OCCURRED",
- "FRAME_RATE", "INT_HISTOGRAM", "MEMORY_USAGE", "STRING_USED";
+ reserved "CUSTOM", "ELAPSED_TIME", "EVENT_COUNT", "EVENT_OCCURRED", "FRAME_RATE",
+ "INT_HISTOGRAM", "MEMORY_USAGE", "STRING_USED";
UNSET = 0;
@@ -230,11 +225,14 @@
// The TimeZonePolicy for this Metric (Optional. Defaults to UTC)
TimeZonePolicy time_zone_policy = 10;
- // An IANA time zone identifier (https://iana.org/time-zones). Should be set
- // if and only if the metric's `time_zone_policy` is OTHER_TIME_ZONE.
+ // An IANA time zone identifier (https://iana.org/time-zones). Should be set if
+ // and only if the metric's `time_zone_policy` is OTHER_TIME_ZONE.
string other_time_zone = 25;
message Metadata {
+ reserved 2;
+ reserved "owner";
+
// The date after which this metric is no longer valid. If this field is not
// supplied, the metric is considered currently expired, and is not
// guaranteed to be reported by cobalt.
@@ -243,11 +241,6 @@
// It may be at most one year in the future.
string expiration_date = 1;
- // Primary contacts for questions/bugs regarding this metric (may be a
- // group). This should be a fully qualified email address (e.g.
- // my-group@test.com)
- repeated string owner = 2;
-
// Maximum ReleaseStage for which this Metric is allowed to be collected.
ReleaseStage max_release_stage = 4;
diff --git a/adservices/libraries/cobalt/proto/observation.proto b/adservices/libraries/cobalt/proto/observation.proto
index 208dc9a..fa57dc4 100644
--- a/adservices/libraries/cobalt/proto/observation.proto
+++ b/adservices/libraries/cobalt/proto/observation.proto
@@ -157,11 +157,17 @@
// STRING_COUNTS
// UNIQUE_DEVICE_STRING_COUNTS
message StringHistogramObservation {
- // List of hashes of strings (hashed using Farmhash Fingerprint128).
+ // TODO(b/322409910): Delete string_hashes after clients stop sending
+ // the field.
+ repeated bytes string_hashes = 1 [deprecated = true];
+
+ // List of hashes of strings (hashed using Farmhash Fingerprint64).
// The string that hashes to the bytes value in the ith position in
// |string_hashes| corresponds to the bucket with index i in each of the
// |bucket_indices| values in |string_histograms|.
- repeated bytes string_hashes = 1;
+ //
+ // Only one of `string_hashes` or `string_hashes_ff64` should be used.
+ repeated bytes string_hashes_ff64 = 3;
repeated IndexHistogram string_histograms = 2;
}
diff --git a/adservices/libraries/cobalt/proto/report_definition.proto b/adservices/libraries/cobalt/proto/report_definition.proto
index 20b9d4a..c45b711 100644
--- a/adservices/libraries/cobalt/proto/report_definition.proto
+++ b/adservices/libraries/cobalt/proto/report_definition.proto
@@ -33,8 +33,7 @@
//
// A Report is associated with a Metric and this means that the Report analyzes
// the Events that were logged to that Metric. The first step occurs on a
-// Fuchsia device where Cobalt analyzes the logged Events in order to form
-// Observations.
+// device where Cobalt analyzes the logged Events in order to form Observations.
//
// An Observation is built for a particular Report. The type of observation,
// including which of several privacy-oriented Encodings is used or not, depends
@@ -43,8 +42,7 @@
// The Observations are sent to the Cobalt Shuffler which shuffles them in order
// to break linkability between Observations and linkability with the
// originating device. Next the shuffled Observations are sent to the Analyzer
-// which aggregates Observations from all Fuchsia devices in order to generate
-// a report.
+// which aggregates Observations from all devices in order to generate a report.
//
// There are multiple types of Metrics and multiple types of Reports. Each
// Report type is compatible with only some of the Metric types.
@@ -54,13 +52,11 @@
// MetricDefinition called the owning MetricDefinition.
// Next ID: 33
message ReportDefinition {
- reserved 4, 5, 6, 7, 8, 11, 14, 15, 16, 12, 101, 102, 31, 21;
- reserved "aggregation_type", "aggregation_window", "candidate_lis",
- "dp_release_config", "expected_population_size",
- "expected_string_set_size", "export_location_override",
- "local_privacy_noise_level", "output_location", "percentiles",
- "threshold", "window_size", "use_poisson_mechanism_for_privacy",
- "prob_bit_flip";
+ reserved 4, 5, 6, 7, 8, 9, 11, 14, 15, 16, 12, 101, 102, 31, 21;
+ reserved "aggregation_type", "aggregation_window", "candidate_lis", "dp_release_config",
+ "expected_population_size", "expected_string_set_size", "export_location_override",
+ "local_privacy_noise_level", "output_location", "percentiles", "threshold", "window_size",
+ "use_poisson_mechanism_for_privacy", "prob_bit_flip", "candidate_file";
// Unique name for this Report within its owning MetricDefinition.
// The name must obey the syntax of a C variable name and must have length
@@ -79,12 +75,10 @@
// Next standard report type ID: 22
enum ReportType {
reserved 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 19, 9999;
- reserved "CUSTOM_RAW_DUMP", "EVENT_COMPONENT_OCCURRENCE_COUNT",
- "HIGH_FREQUENCY_STRING_COUNTS", "INT_RANGE_HISTOGRAM",
- "NUMERIC_AGGREGATION", "NUMERIC_PERF_RAW_DUMP", "PER_DEVICE_HISTOGRAM",
- "PER_DEVICE_NUMERIC_STATS", "SIMPLE_OCCURRENCE_COUNT",
- "STRING_COUNTS_WITH_THRESHOLD", "STRING_HISTOGRAMS",
- "UNIQUE_N_DAY_ACTIVES";
+ reserved "CUSTOM_RAW_DUMP", "EVENT_COMPONENT_OCCURRENCE_COUNT", "HIGH_FREQUENCY_STRING_COUNTS",
+ "INT_RANGE_HISTOGRAM", "NUMERIC_AGGREGATION", "NUMERIC_PERF_RAW_DUMP",
+ "PER_DEVICE_HISTOGRAM", "PER_DEVICE_NUMERIC_STATS", "SIMPLE_OCCURRENCE_COUNT",
+ "STRING_COUNTS_WITH_THRESHOLD", "STRING_HISTOGRAMS", "UNIQUE_N_DAY_ACTIVES";
REPORT_TYPE_UNSET = 0;
@@ -377,7 +371,6 @@
// (See report_row.proto)
//
// ReportDefinition fields particular to this type:
- // - candidate_file
// - string_buffer_max
STRING_COUNTS = 20;
@@ -402,7 +395,6 @@
// (See report_row.proto)
//
// ReportDefinition fields particular to this type:
- // - candidate_file
// - local_aggregation_period
// - string_buffer_max
// - system_profile_selection (SELECT_FIRST and SELECT LAST will maintain
@@ -444,7 +436,7 @@
// user population size. For now, it should be set manually in the Cobalt
// registry in consultation with the Cobalt team.
//
- // TODO(b/278932979): update this comment once the field is populated by
+ // TODO(b/295053509): update this comment once the field is populated by
// the registry parser.
double poisson_mean = 30;
@@ -514,11 +506,6 @@
//////////////// Fields specific to some report types /////////////////
- // Simple name or full path to file containing known string values.
- //
- // This field is used only for reports of type STRING.
- string candidate_file = 9;
-
// A specification of integer-range buckets for a histogram.
//
// This field is for reports of type UNIQUE_DEVICE_HISTOGRAMS,
@@ -589,44 +576,39 @@
// period is specified for UNIQUE_DEVICE_* report types.
WindowSize local_aggregation_period = 19;
- // The maximum number of distinct event vectors for which an instance of the
- // Cobalt client should produce an observation, for a given local aggregation
- // period. Event vectors are prioritized in order of first arrival during the
- // aggregation period.
+ // The maximum number of distinct event vectors for which an instance of the Cobalt
+ // client should produce an observation, for a given local aggregation period. Event
+ // vectors are prioritized in order of first arrival during the aggregation period.
//
- // For example, if a report has an event_vector_buffer_max of 10, and 12
- // distinct event vectors are logged for this metric over an aggregation
- // period, then Cobalt will send observations of the first 10 event vectors
- // for that aggregation period and drop the last 2.
+ // For example, if a report has an event_vector_buffer_max of 10, and 12 distinct event
+ // vectors are logged for this metric over an aggregation period, then Cobalt will send
+ // observations of the first 10 event vectors for that aggregation period and drop the
+ // last 2.
//
- // If this field is unset, the registry parser assigns to it the total number
- // of event vectors for the report's parent metric (i.e., the product over all
- // metric dimensions of the number of event codes per dimension).
+ // If this field is unset, the registry parser assigns to it the total number of event
+ // vectors for the report's parent metric (i.e., the product over all metric dimensions
+ // of the number of event codes per dimension).
//
- // The report's project will be charged against a resource budget for this
- // value so project owners are encouraged to set this as small as possible.
- // For example, the report's parent metric may include a dimension with
- // thousands of event codes, but it is expected that any one device will log
- // only a few distinct event vectors per day. In that case we may set
- // event_vector_buffer_max to a relatively small number, say 20. For reports
- // which use differential privacy, setting event_vector_buffer_max to a
- // smaller number will improve the signal for event vectors which are included
- // in observations.
+ // The report's project will be charged against a resource budget for this value
+ // so project owners are encouraged to set this as small as possible. For example,
+ // the report's parent metric may include a dimension with thousands of event codes,
+ // but it is expected that any one device will log only a few distinct event vectors
+ // per day. In that case we may set event_vector_buffer_max to a relatively small number,
+ // say 20. For reports which use differential privacy, setting event_vector_buffer_max
+ // to a smaller number will improve the signal for event vectors which are included in
+ // observations.
uint64 event_vector_buffer_max = 26;
- // The maximum number of distinct strings that Cobalt must keep in its
- // in-memory buffer on any single device. During local aggregation for reports
- // of type STRING_COUNTS and UNIQUE_DEVICE_STRING_COUNTS, Cobalt will keep
- // track of this many distinct strings per aggregation period. The report's
- // project will be charged against a resource budget for this value so project
- // owners are encouraged to set this as small as possible. A STRING metric
- // includes a file of candidate strings that may contain many thousands of
- // strings. But it is expected that any one device will log only a few of
- // these strings per day. We may set string_buffer_max to a relatively small
- // number, say 20.
+ // The maximum number of distinct strings that Cobalt must keep in its in-memory buffer
+ // on any single device. During local aggregation for reports of type STRING_COUNTS and
+ // UNIQUE_DEVICE_STRING_COUNTS, Cobalt will keep track of this many distinct strings per
+ // aggregation period. The report's project will be charged against a resource budget for this
+ // value so project owners are encouraged to set this as small as possible. A STRING metric
+ // includes a file of candidate strings that may contain many thousands of strings. But it is
+ // expected that any one device will log only a few of these strings per day. We may set
+ // string_buffer_max to a relatively small number, say 20.
//
- // This is a required field for reports of type STRING_COUNTS and
- // UNIQUE_DEVICE_STRING_COUNTS.
+ // This is a required field for reports of type STRING_COUNTS and UNIQUE_DEVICE_STRING_COUNTS.
uint32 string_buffer_max = 28;
// For reports of type UNIQUE_DEVICE_COUNTS, send observations as soon as the
@@ -645,7 +627,7 @@
bool expedited_sending = 29;
/////////////////// Fields used by all report types ///////////////////
- // Next id: 106
+ // Next id: 109
// The list of SystemProfileFields to include in each row of the report.
// Optional.
@@ -653,11 +635,10 @@
// The list of Experiments to include in each row of the report.
//
- // Each report row lists the intersection of the experiment ids active on the
- // device and experiment ids specified in this field.
+ // Each report row lists the intersection of the experiment ids active on the device and
+ // experiment ids specified in this field.
//
- // The specified experiment ids must be found in one of the project's
- // experiments_namespaces.
+ // The specified experiment ids must be found in one of the project's experiments_namespaces.
repeated int64 experiment_id = 104;
// This field is required for reports of type UNIQUE_DEVICE_COUNTS,
@@ -672,6 +653,68 @@
// Maximum ReleaseStage for which this Report is allowed to be collected.
ReleaseStage max_release_stage = 105;
+
+ // Report can be collected even if the user/device has not consented.
+ // This field can only be set to true on reports that use privacy mechanisms
+ // that include differential privacy (i.e. not DE_IDENTIFICATION). The use of
+ // this field is for collecting anonymized data that is allowed even when
+ // the consent is not given. These use cases need to be specially approved
+ // by privacy reviewers.
+ bool exempt_from_consent = 108;
+
+ // New Privacy API
+
+ // This enum identifies what privacy protection is applied to the report.
+ enum PrivacyMechanism {
+ PRIVACY_MECHANISM_UNSPECIFIED = 0;
+ // If you specify this value the data will be de-identified without
+ // additional privacy protections.
+ DE_IDENTIFICATION = 1;
+ // If you specify this value the data will be protected with Shuffled
+ // Differential Privacy guarantees (e.g., the noise wll be added on the
+ // devices)
+ SHUFFLED_DIFFERENTIAL_PRIVACY = 2;
+ }
+
+ // This field identifies what privacy protection is applied to the report.
+ // It will eventually be required once migration from privacy_level
+ // is complete.
+ PrivacyMechanism privacy_mechanism = 106;
+
+ // The object for grouping all parameters needed for SHUFFLED DP mode.
+ message ShuffledDifferentialPrivacyConfig {
+ // This field represents an upper bound on the amount of information which
+ // can be learned about a device from a report including that device.
+ // Lower values correspond to higher privacy.
+ // Epsilon must be > 0.
+ double epsilon = 1;
+ // This field represents the risk of the epsilon guarantee not holding. This
+ // is usually set as 1 over the expected number of participating devices.
+ // Delta must be > 0 and < 1.
+ double delta = 2;
+ // The generated report will exclude an Observation if there are not at
+ // least |reporting_threshold| number of distinct devices reporting
+ // Observations with the same ObservationMetadata.
+ uint32 reporting_threshold = 3;
+
+ // The mean number of observations added per index point when performing the
+ // Poisson mechanism encoding for Cobalt reports. Required.
+ //
+ // In the future, the value of this field will be computed by the registry
+ // parser as a function of other fields in this
+ // ShuffledDifferentialPrivacyConfig. For now, it should be set manually in
+ // the Cobalt registry in consultation with the Cobalt team.
+ //
+ // TODO(b/295053509): update this comment once the field is auto populated by
+ // the registry parser.
+ double poisson_mean = 4;
+ }
+
+ // If privacy_mechanism is SHUFFLED_DIFFERENTIAL_PRIVACY then privacy_config
+ // must contain valid ShuffledDifferentialPrivacyConfig otherwise empty.
+ oneof privacy_config {
+ ShuffledDifferentialPrivacyConfig shuffled_dp = 107;
+ }
}
// A specification for SystemProfile selection policy.