| // Copyright 2022 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| syntax = "proto3"; |
| |
| package google.cloud.dataproc.v1; |
| |
| import "google/api/annotations.proto"; |
| import "google/api/client.proto"; |
| import "google/api/field_behavior.proto"; |
| import "google/longrunning/operations.proto"; |
| import "google/protobuf/empty.proto"; |
| import "google/protobuf/field_mask.proto"; |
| import "google/protobuf/timestamp.proto"; |
| |
| option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc"; |
| option java_multiple_files = true; |
| option java_outer_classname = "JobsProto"; |
| option java_package = "com.google.cloud.dataproc.v1"; |
| |
| // The JobController provides methods to manage jobs. |
| service JobController { |
| option (google.api.default_host) = "dataproc.googleapis.com"; |
| option (google.api.oauth_scopes) = |
| "https://www.googleapis.com/auth/cloud-platform"; |
| |
| // Submits a job to a cluster. |
| rpc SubmitJob(SubmitJobRequest) returns (Job) { |
| option (google.api.http) = { |
| post: "/v1/projects/{project_id}/regions/{region}/jobs:submit" |
| body: "*" |
| }; |
| option (google.api.method_signature) = "project_id,region,job"; |
| } |
| |
| // Submits job to a cluster. |
| rpc SubmitJobAsOperation(SubmitJobRequest) |
| returns (google.longrunning.Operation) { |
| option (google.api.http) = { |
| post: "/v1/projects/{project_id}/regions/{region}/jobs:submitAsOperation" |
| body: "*" |
| }; |
| option (google.api.method_signature) = "project_id, region, job"; |
| option (google.longrunning.operation_info) = { |
| response_type: "Job" |
| metadata_type: "JobMetadata" |
| }; |
| } |
| |
| // Gets the resource representation for a job in a project. |
| rpc GetJob(GetJobRequest) returns (Job) { |
| option (google.api.http) = { |
| get: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" |
| }; |
| option (google.api.method_signature) = "project_id,region,job_id"; |
| } |
| |
| // Lists regions/{region}/jobs in a project. |
| rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) { |
| option (google.api.http) = { |
| get: "/v1/projects/{project_id}/regions/{region}/jobs" |
| }; |
| option (google.api.method_signature) = "project_id,region"; |
| option (google.api.method_signature) = "project_id,region,filter"; |
| } |
| |
| // Updates a job in a project. |
| rpc UpdateJob(UpdateJobRequest) returns (Job) { |
| option (google.api.http) = { |
| patch: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" |
| body: "job" |
| }; |
| } |
| |
| // Starts a job cancellation request. To access the job resource |
| // after cancellation, call |
| // [regions/{region}/jobs.list](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/list) |
| // or |
| // [regions/{region}/jobs.get](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/get). |
| rpc CancelJob(CancelJobRequest) returns (Job) { |
| option (google.api.http) = { |
| post: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}:cancel" |
| body: "*" |
| }; |
| option (google.api.method_signature) = "project_id,region,job_id"; |
| } |
| |
| // Deletes the job from the project. If the job is active, the delete fails, |
| // and the response returns `FAILED_PRECONDITION`. |
| rpc DeleteJob(DeleteJobRequest) returns (google.protobuf.Empty) { |
| option (google.api.http) = { |
| delete: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" |
| }; |
| option (google.api.method_signature) = "project_id,region,job_id"; |
| } |
| } |
| |
| // The runtime logging config of the job. |
| message LoggingConfig { |
| // The Log4j level for job execution. When running an |
| // [Apache Hive](https://hive.apache.org/) job, Cloud |
| // Dataproc configures the Hive client to an equivalent verbosity level. |
| enum Level { |
| // Level is unspecified. Use default level for log4j. |
| LEVEL_UNSPECIFIED = 0; |
| |
| // Use ALL level for log4j. |
| ALL = 1; |
| |
| // Use TRACE level for log4j. |
| TRACE = 2; |
| |
| // Use DEBUG level for log4j. |
| DEBUG = 3; |
| |
| // Use INFO level for log4j. |
| INFO = 4; |
| |
| // Use WARN level for log4j. |
| WARN = 5; |
| |
| // Use ERROR level for log4j. |
| ERROR = 6; |
| |
| // Use FATAL level for log4j. |
| FATAL = 7; |
| |
| // Turn off log4j. |
| OFF = 8; |
| } |
| |
| // The per-package log levels for the driver. This may include |
| // "root" package name to configure rootLogger. |
| // Examples: |
| // 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' |
| map<string, Level> driver_log_levels = 2; |
| } |
| |
| // A Dataproc job for running |
| // [Apache Hadoop |
| // MapReduce](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html) |
| // jobs on [Apache Hadoop |
| // YARN](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html). |
| message HadoopJob { |
| // Required. Indicates the location of the driver's main class. Specify |
| // either the jar file that contains the main class or the main class name. |
| // To specify both, add the jar file to `jar_file_uris`, and then specify |
| // the main class name in this property. |
| oneof driver { |
| // The HCFS URI of the jar file containing the main class. |
| // Examples: |
| // 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' |
| // 'hdfs:/tmp/test-samples/custom-wordcount.jar' |
| // 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar' |
| string main_jar_file_uri = 1; |
| |
| // The name of the driver's main class. The jar file containing the class |
| // must be in the default CLASSPATH or specified in `jar_file_uris`. |
| string main_class = 2; |
| } |
| |
| // Optional. The arguments to pass to the driver. Do not |
| // include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as |
| // job properties, since a collision may occur that causes an incorrect job |
| // submission. |
| repeated string args = 3 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. Jar file URIs to add to the CLASSPATHs of the |
| // Hadoop driver and tasks. |
| repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. HCFS (Hadoop Compatible Filesystem) URIs of files to be copied |
| // to the working directory of Hadoop drivers and distributed tasks. Useful |
| // for naively parallel tasks. |
| repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. HCFS URIs of archives to be extracted in the working directory of |
| // Hadoop drivers and tasks. Supported file types: |
| // .jar, .tar, .tar.gz, .tgz, or .zip. |
| repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. A mapping of property names to values, used to configure Hadoop. |
| // Properties that conflict with values set by the Dataproc API may be |
| // overwritten. Can include properties set in /etc/hadoop/conf/*-site and |
| // classes in user code. |
| map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. The runtime log config for job execution. |
| LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL]; |
| } |
| |
| // A Dataproc job for running [Apache Spark](https://spark.apache.org/) |
| // applications on YARN. |
| message SparkJob { |
| // Required. The specification of the main method to call to drive the job. |
| // Specify either the jar file that contains the main class or the main class |
| // name. To pass both a main jar and a main class in that jar, add the jar to |
| // `CommonJob.jar_file_uris`, and then specify the main class name in |
| // `main_class`. |
| oneof driver { |
| // The HCFS URI of the jar file that contains the main class. |
| string main_jar_file_uri = 1; |
| |
| // The name of the driver's main class. The jar file that contains the class |
| // must be in the default CLASSPATH or specified in `jar_file_uris`. |
| string main_class = 2; |
| } |
| |
| // Optional. The arguments to pass to the driver. Do not include arguments, |
| // such as `--conf`, that can be set as job properties, since a collision may |
| // occur that causes an incorrect job submission. |
| repeated string args = 3 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. HCFS URIs of jar files to add to the CLASSPATHs of the |
| // Spark driver and tasks. |
| repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. HCFS URIs of files to be placed in the working directory of |
| // each executor. Useful for naively parallel tasks. |
| repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. HCFS URIs of archives to be extracted into the working directory |
| // of each executor. Supported file types: |
| // .jar, .tar, .tar.gz, .tgz, and .zip. |
| repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. A mapping of property names to values, used to configure Spark. |
| // Properties that conflict with values set by the Dataproc API may be |
| // overwritten. Can include properties set in |
| // /etc/spark/conf/spark-defaults.conf and classes in user code. |
| map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. The runtime log config for job execution. |
| LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL]; |
| } |
| |
| // A Dataproc job for running |
| // [Apache |
| // PySpark](https://spark.apache.org/docs/0.9.0/python-programming-guide.html) |
| // applications on YARN. |
| message PySparkJob { |
| // Required. The HCFS URI of the main Python file to use as the driver. Must |
| // be a .py file. |
| string main_python_file_uri = 1 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Optional. The arguments to pass to the driver. Do not include arguments, |
| // such as `--conf`, that can be set as job properties, since a collision may |
| // occur that causes an incorrect job submission. |
| repeated string args = 2 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. HCFS file URIs of Python files to pass to the PySpark |
| // framework. Supported file types: .py, .egg, and .zip. |
| repeated string python_file_uris = 3 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. HCFS URIs of jar files to add to the CLASSPATHs of the |
| // Python driver and tasks. |
| repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. HCFS URIs of files to be placed in the working directory of |
| // each executor. Useful for naively parallel tasks. |
| repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. HCFS URIs of archives to be extracted into the working directory |
| // of each executor. Supported file types: |
| // .jar, .tar, .tar.gz, .tgz, and .zip. |
| repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. A mapping of property names to values, used to configure PySpark. |
| // Properties that conflict with values set by the Dataproc API may be |
| // overwritten. Can include properties set in |
| // /etc/spark/conf/spark-defaults.conf and classes in user code. |
| map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. The runtime log config for job execution. |
| LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL]; |
| } |
| |
| // A list of queries to run on a cluster. |
| message QueryList { |
| // Required. The queries to execute. You do not need to end a query expression |
| // with a semicolon. Multiple queries can be specified in one |
| // string by separating each with a semicolon. Here is an example of a |
| // Dataproc API snippet that uses a QueryList to specify a HiveJob: |
| // |
| // "hiveJob": { |
| // "queryList": { |
| // "queries": [ |
| // "query1", |
| // "query2", |
| // "query3;query4", |
| // ] |
| // } |
| // } |
| repeated string queries = 1 [(google.api.field_behavior) = REQUIRED]; |
| } |
| |
| // A Dataproc job for running [Apache Hive](https://hive.apache.org/) |
| // queries on YARN. |
| message HiveJob { |
| // Required. The sequence of Hive queries to execute, specified as either |
| // an HCFS file URI or a list of queries. |
| oneof queries { |
| // The HCFS URI of the script that contains Hive queries. |
| string query_file_uri = 1; |
| |
| // A list of queries. |
| QueryList query_list = 2; |
| } |
| |
| // Optional. Whether to continue executing queries if a query fails. |
| // The default value is `false`. Setting to `true` can be useful when |
| // executing independent parallel queries. |
| bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. Mapping of query variable names to values (equivalent to the |
| // Hive command: `SET name="value";`). |
| map<string, string> script_variables = 4 |
| [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. A mapping of property names and values, used to configure Hive. |
| // Properties that conflict with values set by the Dataproc API may be |
| // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, |
| // /etc/hive/conf/hive-site.xml, and classes in user code. |
| map<string, string> properties = 5 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. HCFS URIs of jar files to add to the CLASSPATH of the |
| // Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes |
| // and UDFs. |
| repeated string jar_file_uris = 6 [(google.api.field_behavior) = OPTIONAL]; |
| } |
| |
| // A Dataproc job for running [Apache Spark |
| // SQL](https://spark.apache.org/sql/) queries. |
| message SparkSqlJob { |
| // Required. The sequence of Spark SQL queries to execute, specified as |
| // either an HCFS file URI or as a list of queries. |
| oneof queries { |
| // The HCFS URI of the script that contains SQL queries. |
| string query_file_uri = 1; |
| |
| // A list of queries. |
| QueryList query_list = 2; |
| } |
| |
| // Optional. Mapping of query variable names to values (equivalent to the |
| // Spark SQL command: SET `name="value";`). |
| map<string, string> script_variables = 3 |
| [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. A mapping of property names to values, used to configure |
| // Spark SQL's SparkConf. Properties that conflict with values set by the |
| // Dataproc API may be overwritten. |
| map<string, string> properties = 4 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH. |
| repeated string jar_file_uris = 56 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. The runtime log config for job execution. |
| LoggingConfig logging_config = 6 [(google.api.field_behavior) = OPTIONAL]; |
| } |
| |
| // A Dataproc job for running [Apache Pig](https://pig.apache.org/) |
| // queries on YARN. |
| message PigJob { |
| // Required. The sequence of Pig queries to execute, specified as an HCFS |
| // file URI or a list of queries. |
| oneof queries { |
| // The HCFS URI of the script that contains the Pig queries. |
| string query_file_uri = 1; |
| |
| // A list of queries. |
| QueryList query_list = 2; |
| } |
| |
| // Optional. Whether to continue executing queries if a query fails. |
| // The default value is `false`. Setting to `true` can be useful when |
| // executing independent parallel queries. |
| bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. Mapping of query variable names to values (equivalent to the Pig |
| // command: `name=[value]`). |
| map<string, string> script_variables = 4 |
| [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. A mapping of property names to values, used to configure Pig. |
| // Properties that conflict with values set by the Dataproc API may be |
| // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, |
| // /etc/pig/conf/pig.properties, and classes in user code. |
| map<string, string> properties = 5 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. HCFS URIs of jar files to add to the CLASSPATH of |
| // the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs. |
| repeated string jar_file_uris = 6 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. The runtime log config for job execution. |
| LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL]; |
| } |
| |
| // A Dataproc job for running |
| // [Apache SparkR](https://spark.apache.org/docs/latest/sparkr.html) |
| // applications on YARN. |
| message SparkRJob { |
| // Required. The HCFS URI of the main R file to use as the driver. |
| // Must be a .R file. |
| string main_r_file_uri = 1 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Optional. The arguments to pass to the driver. Do not include arguments, |
| // such as `--conf`, that can be set as job properties, since a collision may |
| // occur that causes an incorrect job submission. |
| repeated string args = 2 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. HCFS URIs of files to be placed in the working directory of |
| // each executor. Useful for naively parallel tasks. |
| repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. HCFS URIs of archives to be extracted into the working directory |
| // of each executor. Supported file types: |
| // .jar, .tar, .tar.gz, .tgz, and .zip. |
| repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. A mapping of property names to values, used to configure SparkR. |
| // Properties that conflict with values set by the Dataproc API may be |
| // overwritten. Can include properties set in |
| // /etc/spark/conf/spark-defaults.conf and classes in user code. |
| map<string, string> properties = 5 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. The runtime log config for job execution. |
| LoggingConfig logging_config = 6 [(google.api.field_behavior) = OPTIONAL]; |
| } |
| |
| // A Dataproc job for running [Presto](https://prestosql.io/) queries. |
| // **IMPORTANT**: The [Dataproc Presto Optional |
| // Component](https://cloud.google.com/dataproc/docs/concepts/components/presto) |
| // must be enabled when the cluster is created to submit a Presto job to the |
| // cluster. |
| message PrestoJob { |
| // Required. The sequence of Presto queries to execute, specified as |
| // either an HCFS file URI or as a list of queries. |
| oneof queries { |
| // The HCFS URI of the script that contains SQL queries. |
| string query_file_uri = 1; |
| |
| // A list of queries. |
| QueryList query_list = 2; |
| } |
| |
| // Optional. Whether to continue executing queries if a query fails. |
| // The default value is `false`. Setting to `true` can be useful when |
| // executing independent parallel queries. |
| bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. The format in which query output will be displayed. See the |
| // Presto documentation for supported output formats |
| string output_format = 4 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. Presto client tags to attach to this query |
| repeated string client_tags = 5 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. A mapping of property names to values. Used to set Presto |
| // [session properties](https://prestodb.io/docs/current/sql/set-session.html) |
| // Equivalent to using the --session flag in the Presto CLI |
| map<string, string> properties = 6 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. The runtime log config for job execution. |
| LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL]; |
| } |
| |
| // Dataproc job config. |
| message JobPlacement { |
| // Required. The name of the cluster where the job will be submitted. |
| string cluster_name = 1 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Output only. A cluster UUID generated by the Dataproc service when |
| // the job is submitted. |
| string cluster_uuid = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; |
| |
| // Optional. Cluster labels to identify a cluster where the job will be |
| // submitted. |
| map<string, string> cluster_labels = 3 |
| [(google.api.field_behavior) = OPTIONAL]; |
| } |
| |
| // Dataproc job status. |
| message JobStatus { |
| // The job state. |
| enum State { |
| // The job state is unknown. |
| STATE_UNSPECIFIED = 0; |
| |
| // The job is pending; it has been submitted, but is not yet running. |
| PENDING = 1; |
| |
| // Job has been received by the service and completed initial setup; |
| // it will soon be submitted to the cluster. |
| SETUP_DONE = 8; |
| |
| // The job is running on the cluster. |
| RUNNING = 2; |
| |
| // A CancelJob request has been received, but is pending. |
| CANCEL_PENDING = 3; |
| |
| // Transient in-flight resources have been canceled, and the request to |
| // cancel the running job has been issued to the cluster. |
| CANCEL_STARTED = 7; |
| |
| // The job cancellation was successful. |
| CANCELLED = 4; |
| |
| // The job has completed successfully. |
| DONE = 5; |
| |
| // The job has completed, but encountered an error. |
| ERROR = 6; |
| |
| // Job attempt has failed. The detail field contains failure details for |
| // this attempt. |
| // |
| // Applies to restartable jobs only. |
| ATTEMPT_FAILURE = 9; |
| } |
| |
| // The job substate. |
| enum Substate { |
| // The job substate is unknown. |
| UNSPECIFIED = 0; |
| |
| // The Job is submitted to the agent. |
| // |
| // Applies to RUNNING state. |
| SUBMITTED = 1; |
| |
| // The Job has been received and is awaiting execution (it may be waiting |
| // for a condition to be met). See the "details" field for the reason for |
| // the delay. |
| // |
| // Applies to RUNNING state. |
| QUEUED = 2; |
| |
| // The agent-reported status is out of date, which may be caused by a |
| // loss of communication between the agent and Dataproc. If the |
| // agent does not send a timely update, the job will fail. |
| // |
| // Applies to RUNNING state. |
| STALE_STATUS = 3; |
| } |
| |
| // Output only. A state message specifying the overall job state. |
| State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; |
| |
| // Optional. Output only. Job state details, such as an error |
| // description if the state is <code>ERROR</code>. |
| string details = 2 [ |
| (google.api.field_behavior) = OUTPUT_ONLY, |
| (google.api.field_behavior) = OPTIONAL |
| ]; |
| |
| // Output only. The time when this state was entered. |
| google.protobuf.Timestamp state_start_time = 6 |
| [(google.api.field_behavior) = OUTPUT_ONLY]; |
| |
| // Output only. Additional state information, which includes |
| // status reported by the agent. |
| Substate substate = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; |
| } |
| |
| // Encapsulates the full scoping used to reference a job. |
| message JobReference { |
| // Optional. The ID of the Google Cloud Platform project that the job belongs |
| // to. If specified, must match the request project ID. |
| string project_id = 1 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. The job ID, which must be unique within the project. |
| // |
| // The ID must contain only letters (a-z, A-Z), numbers (0-9), |
| // underscores (_), or hyphens (-). The maximum length is 100 characters. |
| // |
| // If not specified by the caller, the job ID will be provided by the server. |
| string job_id = 2 [(google.api.field_behavior) = OPTIONAL]; |
| } |
| |
| // A YARN application created by a job. Application information is a subset of |
| // <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>. |
| // |
| // **Beta Feature**: This report is available for testing purposes only. It may |
| // be changed before final release. |
| message YarnApplication { |
| // The application state, corresponding to |
| // <code>YarnProtos.YarnApplicationStateProto</code>. |
| enum State { |
| // Status is unspecified. |
| STATE_UNSPECIFIED = 0; |
| |
| // Status is NEW. |
| NEW = 1; |
| |
| // Status is NEW_SAVING. |
| NEW_SAVING = 2; |
| |
| // Status is SUBMITTED. |
| SUBMITTED = 3; |
| |
| // Status is ACCEPTED. |
| ACCEPTED = 4; |
| |
| // Status is RUNNING. |
| RUNNING = 5; |
| |
| // Status is FINISHED. |
| FINISHED = 6; |
| |
| // Status is FAILED. |
| FAILED = 7; |
| |
| // Status is KILLED. |
| KILLED = 8; |
| } |
| |
| // Required. The application name. |
| string name = 1 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Required. The application state. |
| State state = 2 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Required. The numerical progress of the application, from 1 to 100. |
| float progress = 3 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Optional. The HTTP URL of the ApplicationMaster, HistoryServer, or |
| // TimelineServer that provides application-specific information. The URL uses |
| // the internal hostname, and requires a proxy server for resolution and, |
| // possibly, access. |
| string tracking_url = 4 [(google.api.field_behavior) = OPTIONAL]; |
| } |
| |
| // A Dataproc job resource. |
| message Job { |
| // Optional. The fully qualified reference to the job, which can be used to |
| // obtain the equivalent REST path of the job resource. If this property |
| // is not specified when a job is created, the server generates a |
| // <code>job_id</code>. |
| JobReference reference = 1 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Required. Job information, including how, when, and where to |
| // run the job. |
| JobPlacement placement = 2 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Required. The application/framework-specific portion of the job. |
| oneof type_job { |
| // Optional. Job is a Hadoop job. |
| HadoopJob hadoop_job = 3 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. Job is a Spark job. |
| SparkJob spark_job = 4 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. Job is a PySpark job. |
| PySparkJob pyspark_job = 5 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. Job is a Hive job. |
| HiveJob hive_job = 6 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. Job is a Pig job. |
| PigJob pig_job = 7 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. Job is a SparkR job. |
| SparkRJob spark_r_job = 21 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. Job is a SparkSql job. |
| SparkSqlJob spark_sql_job = 12 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. Job is a Presto job. |
| PrestoJob presto_job = 23 [(google.api.field_behavior) = OPTIONAL]; |
| } |
| |
| // Output only. The job status. Additional application-specific |
| // status information may be contained in the <code>type_job</code> |
| // and <code>yarn_applications</code> fields. |
| JobStatus status = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; |
| |
| // Output only. The previous job status. |
| repeated JobStatus status_history = 13 |
| [(google.api.field_behavior) = OUTPUT_ONLY]; |
| |
| // Output only. The collection of YARN applications spun up by this job. |
| // |
| // **Beta** Feature: This report is available for testing purposes only. It |
| // may be changed before final release. |
| repeated YarnApplication yarn_applications = 9 |
| [(google.api.field_behavior) = OUTPUT_ONLY]; |
| |
| // Output only. A URI pointing to the location of the stdout of the job's |
| // driver program. |
| string driver_output_resource_uri = 17 |
| [(google.api.field_behavior) = OUTPUT_ONLY]; |
| |
| // Output only. If present, the location of miscellaneous control files |
| // which may be used as part of job setup and handling. If not present, |
| // control files may be placed in the same location as `driver_output_uri`. |
| string driver_control_files_uri = 15 |
| [(google.api.field_behavior) = OUTPUT_ONLY]; |
| |
| // Optional. The labels to associate with this job. |
| // Label **keys** must contain 1 to 63 characters, and must conform to |
| // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). |
| // Label **values** may be empty, but, if present, must contain 1 to 63 |
| // characters, and must conform to [RFC |
| // 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be |
| // associated with a job. |
| map<string, string> labels = 18 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. Job scheduling configuration. |
| JobScheduling scheduling = 20 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Output only. A UUID that uniquely identifies a job within the project |
| // over time. This is in contrast to a user-settable reference.job_id that |
| // may be reused over time. |
| string job_uuid = 22 [(google.api.field_behavior) = OUTPUT_ONLY]; |
| |
| // Output only. Indicates whether the job is completed. If the value is |
| // `false`, the job is still in progress. If `true`, the job is completed, and |
| // `status.state` field will indicate if it was successful, failed, |
| // or cancelled. |
| bool done = 24 [(google.api.field_behavior) = OUTPUT_ONLY]; |
| |
| // Optional. Driver scheduling configuration. |
| DriverSchedulingConfig driver_scheduling_config = 27 |
| [(google.api.field_behavior) = OPTIONAL]; |
| } |
| |
| // Driver scheduling configuration. |
| message DriverSchedulingConfig { |
| // Required. The amount of memory in MB the driver is requesting. |
| int32 memory_mb = 1 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Required. The number of vCPUs the driver is requesting. |
| int32 vcores = 2 [(google.api.field_behavior) = REQUIRED]; |
| } |
| |
| // Job scheduling options. |
| message JobScheduling { |
| // Optional. Maximum number of times per hour a driver may be restarted as |
| // a result of driver exiting with non-zero code before job is |
| // reported failed. |
| // |
| // A job may be reported as thrashing if the driver exits with a non-zero code |
| // four times within a 10-minute window. |
| // |
| // Maximum value is 10. |
| // |
| // **Note:** This restartable job option is not supported in Dataproc |
| // [workflow templates] |
| // (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). |
| int32 max_failures_per_hour = 1 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. Maximum total number of times a driver may be restarted as a |
| // result of the driver exiting with a non-zero code. After the maximum number |
| // is reached, the job will be reported as failed. |
| // |
| // Maximum value is 240. |
| // |
| // **Note:** Currently, this restartable job option is |
| // not supported in Dataproc |
| // [workflow |
| // templates](https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). |
| int32 max_failures_total = 2 [(google.api.field_behavior) = OPTIONAL]; |
| } |
| |
| // A request to submit a job. |
| message SubmitJobRequest { |
| // Required. The ID of the Google Cloud Platform project that the job |
| // belongs to. |
| string project_id = 1 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Required. The Dataproc region in which to handle the request. |
| string region = 3 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Required. The job resource. |
| Job job = 2 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Optional. A unique id used to identify the request. If the server |
| // receives two |
| // [SubmitJobRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.SubmitJobRequest)s |
| // with the same id, then the second request will be ignored and the |
| // first [Job][google.cloud.dataproc.v1.Job] created and stored in the backend |
| // is returned. |
| // |
| // It is recommended to always set this value to a |
| // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier). |
| // |
| // The id must contain only letters (a-z, A-Z), numbers (0-9), |
| // underscores (_), and hyphens (-). The maximum length is 40 characters. |
| string request_id = 4 [(google.api.field_behavior) = OPTIONAL]; |
| } |
| |
| // Job Operation metadata. |
| message JobMetadata { |
| // Output only. The job id. |
| string job_id = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; |
| |
| // Output only. Most recent job status. |
| JobStatus status = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; |
| |
| // Output only. Operation type. |
| string operation_type = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; |
| |
| // Output only. Job submission time. |
| google.protobuf.Timestamp start_time = 4 |
| [(google.api.field_behavior) = OUTPUT_ONLY]; |
| } |
| |
| // A request to get the resource representation for a job in a project. |
| message GetJobRequest { |
| // Required. The ID of the Google Cloud Platform project that the job |
| // belongs to. |
| string project_id = 1 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Required. The Dataproc region in which to handle the request. |
| string region = 3 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Required. The job ID. |
| string job_id = 2 [(google.api.field_behavior) = REQUIRED]; |
| } |
| |
| // A request to list jobs in a project. |
| message ListJobsRequest { |
| // A matcher that specifies categories of job states. |
| enum JobStateMatcher { |
| // Match all jobs, regardless of state. |
| ALL = 0; |
| |
| // Only match jobs in non-terminal states: PENDING, RUNNING, or |
| // CANCEL_PENDING. |
| ACTIVE = 1; |
| |
| // Only match jobs in terminal states: CANCELLED, DONE, or ERROR. |
| NON_ACTIVE = 2; |
| } |
| |
| // Required. The ID of the Google Cloud Platform project that the job |
| // belongs to. |
| string project_id = 1 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Required. The Dataproc region in which to handle the request. |
| string region = 6 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Optional. The number of results to return in each response. |
| int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. The page token, returned by a previous call, to request the |
| // next page of results. |
| string page_token = 3 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. If set, the returned jobs list includes only jobs that were |
| // submitted to the named cluster. |
| string cluster_name = 4 [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. Specifies enumerated categories of jobs to list. |
| // (default = match ALL jobs). |
| // |
| // If `filter` is provided, `jobStateMatcher` will be ignored. |
| JobStateMatcher job_state_matcher = 5 |
| [(google.api.field_behavior) = OPTIONAL]; |
| |
| // Optional. A filter constraining the jobs to list. Filters are |
| // case-sensitive and have the following syntax: |
| // |
| // [field = value] AND [field [= value]] ... |
| // |
| // where **field** is `status.state` or `labels.[KEY]`, and `[KEY]` is a label |
| // key. **value** can be `*` to match all values. |
| // `status.state` can be either `ACTIVE` or `NON_ACTIVE`. |
| // Only the logical `AND` operator is supported; space-separated items are |
| // treated as having an implicit `AND` operator. |
| // |
| // Example filter: |
| // |
| // status.state = ACTIVE AND labels.env = staging AND labels.starred = * |
| string filter = 7 [(google.api.field_behavior) = OPTIONAL]; |
| } |
| |
| // A request to update a job. |
| message UpdateJobRequest { |
| // Required. The ID of the Google Cloud Platform project that the job |
| // belongs to. |
| string project_id = 1 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Required. The Dataproc region in which to handle the request. |
| string region = 2 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Required. The job ID. |
| string job_id = 3 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Required. The changes to the job. |
| Job job = 4 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Required. Specifies the path, relative to <code>Job</code>, of |
| // the field to update. For example, to update the labels of a Job the |
| // <code>update_mask</code> parameter would be specified as |
| // <code>labels</code>, and the `PATCH` request body would specify the new |
| // value. <strong>Note:</strong> Currently, <code>labels</code> is the only |
| // field that can be updated. |
| google.protobuf.FieldMask update_mask = 5 |
| [(google.api.field_behavior) = REQUIRED]; |
| } |
| |
| // A list of jobs in a project. |
| message ListJobsResponse { |
| // Output only. Jobs list. |
| repeated Job jobs = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; |
| |
| // Optional. This token is included in the response if there are more results |
| // to fetch. To fetch additional results, provide this value as the |
| // `page_token` in a subsequent <code>ListJobsRequest</code>. |
| string next_page_token = 2 [(google.api.field_behavior) = OPTIONAL]; |
| } |
| |
| // A request to cancel a job. |
| message CancelJobRequest { |
| // Required. The ID of the Google Cloud Platform project that the job |
| // belongs to. |
| string project_id = 1 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Required. The Dataproc region in which to handle the request. |
| string region = 3 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Required. The job ID. |
| string job_id = 2 [(google.api.field_behavior) = REQUIRED]; |
| } |
| |
| // A request to delete a job. |
| message DeleteJobRequest { |
| // Required. The ID of the Google Cloud Platform project that the job |
| // belongs to. |
| string project_id = 1 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Required. The Dataproc region in which to handle the request. |
| string region = 3 [(google.api.field_behavior) = REQUIRED]; |
| |
| // Required. The job ID. |
| string job_id = 2 [(google.api.field_behavior) = REQUIRED]; |
| } |