google/cloud/dataproc/v1/jobs.proto - platform/external/googleapis - Git at Google

 // Copyright 2022 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 syntax = "proto3";

 package google.cloud.dataproc.v1;

 import "google/api/annotations.proto";
 import "google/api/client.proto";
 import "google/api/field_behavior.proto";
 import "google/longrunning/operations.proto";
 import "google/protobuf/empty.proto";
 import "google/protobuf/field_mask.proto";
 import "google/protobuf/timestamp.proto";

 option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc";
 option java_multiple_files = true;
 option java_outer_classname = "JobsProto";
 option java_package = "com.google.cloud.dataproc.v1";

 // The JobController provides methods to manage jobs.
 service JobController {
   option (google.api.default_host) = "dataproc.googleapis.com";
   option (google.api.oauth_scopes) =
       "https://www.googleapis.com/auth/cloud-platform";

   // Submits a job to a cluster.
   rpc SubmitJob(SubmitJobRequest) returns (Job) {
     option (google.api.http) = {
       post: "/v1/projects/{project_id}/regions/{region}/jobs:submit"
       body: "*"
     };
     option (google.api.method_signature) = "project_id,region,job";
   }

   // Submits job to a cluster.
   rpc SubmitJobAsOperation(SubmitJobRequest)
       returns (google.longrunning.Operation) {
     option (google.api.http) = {
       post: "/v1/projects/{project_id}/regions/{region}/jobs:submitAsOperation"
       body: "*"
     };
     option (google.api.method_signature) = "project_id, region, job";
     option (google.longrunning.operation_info) = {
       response_type: "Job"
       metadata_type: "JobMetadata"
     };
   }

   // Gets the resource representation for a job in a project.
   rpc GetJob(GetJobRequest) returns (Job) {
     option (google.api.http) = {
       get: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}"
     };
     option (google.api.method_signature) = "project_id,region,job_id";
   }

   // Lists regions/{region}/jobs in a project.
   rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) {
     option (google.api.http) = {
       get: "/v1/projects/{project_id}/regions/{region}/jobs"
     };
     option (google.api.method_signature) = "project_id,region";
     option (google.api.method_signature) = "project_id,region,filter";
   }

   // Updates a job in a project.
   rpc UpdateJob(UpdateJobRequest) returns (Job) {
     option (google.api.http) = {
       patch: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}"
       body: "job"
     };
   }

   // Starts a job cancellation request. To access the job resource
   // after cancellation, call
   // [regions/{region}/jobs.list](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/list)
   // or
   // [regions/{region}/jobs.get](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/get).
   rpc CancelJob(CancelJobRequest) returns (Job) {
     option (google.api.http) = {
       post: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}:cancel"
       body: "*"
     };
     option (google.api.method_signature) = "project_id,region,job_id";
   }

   // Deletes the job from the project. If the job is active, the delete fails,
   // and the response returns `FAILED_PRECONDITION`.
   rpc DeleteJob(DeleteJobRequest) returns (google.protobuf.Empty) {
     option (google.api.http) = {
       delete: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}"
     };
     option (google.api.method_signature) = "project_id,region,job_id";
   }
 }

 // The runtime logging config of the job.
 message LoggingConfig {
   // The Log4j level for job execution. When running an
   // [Apache Hive](https://hive.apache.org/) job, Cloud
   // Dataproc configures the Hive client to an equivalent verbosity level.
   enum Level {
     // Level is unspecified. Use default level for log4j.
     LEVEL_UNSPECIFIED = 0;

     // Use ALL level for log4j.
     ALL = 1;

     // Use TRACE level for log4j.
     TRACE = 2;

     // Use DEBUG level for log4j.
     DEBUG = 3;

     // Use INFO level for log4j.
     INFO = 4;

     // Use WARN level for log4j.
     WARN = 5;

     // Use ERROR level for log4j.
     ERROR = 6;

     // Use FATAL level for log4j.
     FATAL = 7;

     // Turn off log4j.
     OFF = 8;
   }

   // The per-package log levels for the driver. This may include
   // "root" package name to configure rootLogger.
   // Examples:
   //   'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
   map<string, Level> driver_log_levels = 2;
 }

 // A Dataproc job for running
 // [Apache Hadoop
 // MapReduce](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html)
 // jobs on [Apache Hadoop
 // YARN](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html).
 message HadoopJob {
   // Required. Indicates the location of the driver's main class. Specify
   // either the jar file that contains the main class or the main class name.
   // To specify both, add the jar file to `jar_file_uris`, and then specify
   // the main class name in this property.
   oneof driver {
     // The HCFS URI of the jar file containing the main class.
     // Examples:
     //     'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar'
     //     'hdfs:/tmp/test-samples/custom-wordcount.jar'
     //     'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
     string main_jar_file_uri = 1;

     // The name of the driver's main class. The jar file containing the class
     // must be in the default CLASSPATH or specified in `jar_file_uris`.
     string main_class = 2;
   }

   // Optional. The arguments to pass to the driver. Do not
   // include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as
   // job properties, since a collision may occur that causes an incorrect job
   // submission.
   repeated string args = 3 [(google.api.field_behavior) = OPTIONAL];

   // Optional. Jar file URIs to add to the CLASSPATHs of the
   // Hadoop driver and tasks.
   repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];

   // Optional. HCFS (Hadoop Compatible Filesystem) URIs of files to be copied
   // to the working directory of Hadoop drivers and distributed tasks. Useful
   // for naively parallel tasks.
   repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];

   // Optional. HCFS URIs of archives to be extracted in the working directory of
   // Hadoop drivers and tasks. Supported file types:
   // .jar, .tar, .tar.gz, .tgz, or .zip.
   repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];

   // Optional. A mapping of property names to values, used to configure Hadoop.
   // Properties that conflict with values set by the Dataproc API may be
   // overwritten. Can include properties set in /etc/hadoop/conf/*-site and
   // classes in user code.
   map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL];

   // Optional. The runtime log config for job execution.
   LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL];
 }

 // A Dataproc job for running [Apache Spark](https://spark.apache.org/)
 // applications on YARN.
 message SparkJob {
   // Required. The specification of the main method to call to drive the job.
   // Specify either the jar file that contains the main class or the main class
   // name. To pass both a main jar and a main class in that jar, add the jar to
   // `CommonJob.jar_file_uris`, and then specify the main class name in
   // `main_class`.
   oneof driver {
     // The HCFS URI of the jar file that contains the main class.
     string main_jar_file_uri = 1;

     // The name of the driver's main class. The jar file that contains the class
     // must be in the default CLASSPATH or specified in `jar_file_uris`.
     string main_class = 2;
   }

   // Optional. The arguments to pass to the driver. Do not include arguments,
   // such as `--conf`, that can be set as job properties, since a collision may
   // occur that causes an incorrect job submission.
   repeated string args = 3 [(google.api.field_behavior) = OPTIONAL];

   // Optional. HCFS URIs of jar files to add to the CLASSPATHs of the
   // Spark driver and tasks.
   repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];

   // Optional. HCFS URIs of files to be placed in the working directory of
   // each executor. Useful for naively parallel tasks.
   repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];

   // Optional. HCFS URIs of archives to be extracted into the working directory
   // of each executor. Supported file types:
   // .jar, .tar, .tar.gz, .tgz, and .zip.
   repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];

   // Optional. A mapping of property names to values, used to configure Spark.
   // Properties that conflict with values set by the Dataproc API may be
   // overwritten. Can include properties set in
   // /etc/spark/conf/spark-defaults.conf and classes in user code.
   map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL];

   // Optional. The runtime log config for job execution.
   LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL];
 }

 // A Dataproc job for running
 // [Apache
 // PySpark](https://spark.apache.org/docs/0.9.0/python-programming-guide.html)
 // applications on YARN.
 message PySparkJob {
   // Required. The HCFS URI of the main Python file to use as the driver. Must
   // be a .py file.
   string main_python_file_uri = 1 [(google.api.field_behavior) = REQUIRED];

   // Optional. The arguments to pass to the driver.  Do not include arguments,
   // such as `--conf`, that can be set as job properties, since a collision may
   // occur that causes an incorrect job submission.
   repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];

   // Optional. HCFS file URIs of Python files to pass to the PySpark
   // framework. Supported file types: .py, .egg, and .zip.
   repeated string python_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];

   // Optional. HCFS URIs of jar files to add to the CLASSPATHs of the
   // Python driver and tasks.
   repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];

   // Optional. HCFS URIs of files to be placed in the working directory of
   // each executor. Useful for naively parallel tasks.
   repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];

   // Optional. HCFS URIs of archives to be extracted into the working directory
   // of each executor. Supported file types:
   // .jar, .tar, .tar.gz, .tgz, and .zip.
   repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];

   // Optional. A mapping of property names to values, used to configure PySpark.
   // Properties that conflict with values set by the Dataproc API may be
   // overwritten. Can include properties set in
   // /etc/spark/conf/spark-defaults.conf and classes in user code.
   map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL];

   // Optional. The runtime log config for job execution.
   LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL];
 }

 // A list of queries to run on a cluster.
 message QueryList {
   // Required. The queries to execute. You do not need to end a query expression
   // with a semicolon. Multiple queries can be specified in one
   // string by separating each with a semicolon. Here is an example of a
   // Dataproc API snippet that uses a QueryList to specify a HiveJob:
   //
   //     "hiveJob": {
   //       "queryList": {
   //         "queries": [
   //           "query1",
   //           "query2",
   //           "query3;query4",
   //         ]
   //       }
   //     }
   repeated string queries = 1 [(google.api.field_behavior) = REQUIRED];
 }

 // A Dataproc job for running [Apache Hive](https://hive.apache.org/)
 // queries on YARN.
 message HiveJob {
   // Required. The sequence of Hive queries to execute, specified as either
   // an HCFS file URI or a list of queries.
   oneof queries {
     // The HCFS URI of the script that contains Hive queries.
     string query_file_uri = 1;

     // A list of queries.
     QueryList query_list = 2;
   }

   // Optional. Whether to continue executing queries if a query fails.
   // The default value is `false`. Setting to `true` can be useful when
   // executing independent parallel queries.
   bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL];

   // Optional. Mapping of query variable names to values (equivalent to the
   // Hive command: `SET name="value";`).
   map<string, string> script_variables = 4
       [(google.api.field_behavior) = OPTIONAL];

   // Optional. A mapping of property names and values, used to configure Hive.
   // Properties that conflict with values set by the Dataproc API may be
   // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
   // /etc/hive/conf/hive-site.xml, and classes in user code.
   map<string, string> properties = 5 [(google.api.field_behavior) = OPTIONAL];

   // Optional. HCFS URIs of jar files to add to the CLASSPATH of the
   // Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes
   // and UDFs.
   repeated string jar_file_uris = 6 [(google.api.field_behavior) = OPTIONAL];
 }

 // A Dataproc job for running [Apache Spark
 // SQL](https://spark.apache.org/sql/) queries.
 message SparkSqlJob {
   // Required. The sequence of Spark SQL queries to execute, specified as
   // either an HCFS file URI or as a list of queries.
   oneof queries {
     // The HCFS URI of the script that contains SQL queries.
     string query_file_uri = 1;

     // A list of queries.
     QueryList query_list = 2;
   }

   // Optional. Mapping of query variable names to values (equivalent to the
   // Spark SQL command: SET `name="value";`).
   map<string, string> script_variables = 3
       [(google.api.field_behavior) = OPTIONAL];

   // Optional. A mapping of property names to values, used to configure
   // Spark SQL's SparkConf. Properties that conflict with values set by the
   // Dataproc API may be overwritten.
   map<string, string> properties = 4 [(google.api.field_behavior) = OPTIONAL];

   // Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH.
   repeated string jar_file_uris = 56 [(google.api.field_behavior) = OPTIONAL];

   // Optional. The runtime log config for job execution.
   LoggingConfig logging_config = 6 [(google.api.field_behavior) = OPTIONAL];
 }

 // A Dataproc job for running [Apache Pig](https://pig.apache.org/)
 // queries on YARN.
 message PigJob {
   // Required. The sequence of Pig queries to execute, specified as an HCFS
   // file URI or a list of queries.
   oneof queries {
     // The HCFS URI of the script that contains the Pig queries.
     string query_file_uri = 1;

     // A list of queries.
     QueryList query_list = 2;
   }

   // Optional. Whether to continue executing queries if a query fails.
   // The default value is `false`. Setting to `true` can be useful when
   // executing independent parallel queries.
   bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL];

   // Optional. Mapping of query variable names to values (equivalent to the Pig
   // command: `name=[value]`).
   map<string, string> script_variables = 4
       [(google.api.field_behavior) = OPTIONAL];

   // Optional. A mapping of property names to values, used to configure Pig.
   // Properties that conflict with values set by the Dataproc API may be
   // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
   // /etc/pig/conf/pig.properties, and classes in user code.
   map<string, string> properties = 5 [(google.api.field_behavior) = OPTIONAL];

   // Optional. HCFS URIs of jar files to add to the CLASSPATH of
   // the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
   repeated string jar_file_uris = 6 [(google.api.field_behavior) = OPTIONAL];

   // Optional. The runtime log config for job execution.
   LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL];
 }

 // A Dataproc job for running
 // [Apache SparkR](https://spark.apache.org/docs/latest/sparkr.html)
 // applications on YARN.
 message SparkRJob {
   // Required. The HCFS URI of the main R file to use as the driver.
   // Must be a .R file.
   string main_r_file_uri = 1 [(google.api.field_behavior) = REQUIRED];

   // Optional. The arguments to pass to the driver.  Do not include arguments,
   // such as `--conf`, that can be set as job properties, since a collision may
   // occur that causes an incorrect job submission.
   repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];

   // Optional. HCFS URIs of files to be placed in the working directory of
   // each executor. Useful for naively parallel tasks.
   repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];

   // Optional. HCFS URIs of archives to be extracted into the working directory
   // of each executor. Supported file types:
   // .jar, .tar, .tar.gz, .tgz, and .zip.
   repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];

   // Optional. A mapping of property names to values, used to configure SparkR.
   // Properties that conflict with values set by the Dataproc API may be
   // overwritten. Can include properties set in
   // /etc/spark/conf/spark-defaults.conf and classes in user code.
   map<string, string> properties = 5 [(google.api.field_behavior) = OPTIONAL];

   // Optional. The runtime log config for job execution.
   LoggingConfig logging_config = 6 [(google.api.field_behavior) = OPTIONAL];
 }

 // A Dataproc job for running [Presto](https://prestosql.io/) queries.
 // **IMPORTANT**: The [Dataproc Presto Optional
 // Component](https://cloud.google.com/dataproc/docs/concepts/components/presto)
 // must be enabled when the cluster is created to submit a Presto job to the
 // cluster.
 message PrestoJob {
   // Required. The sequence of Presto queries to execute, specified as
   // either an HCFS file URI or as a list of queries.
   oneof queries {
     // The HCFS URI of the script that contains SQL queries.
     string query_file_uri = 1;

     // A list of queries.
     QueryList query_list = 2;
   }

   // Optional. Whether to continue executing queries if a query fails.
   // The default value is `false`. Setting to `true` can be useful when
   // executing independent parallel queries.
   bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL];

   // Optional. The format in which query output will be displayed. See the
   // Presto documentation for supported output formats
   string output_format = 4 [(google.api.field_behavior) = OPTIONAL];

   // Optional. Presto client tags to attach to this query
   repeated string client_tags = 5 [(google.api.field_behavior) = OPTIONAL];

   // Optional. A mapping of property names to values. Used to set Presto
   // [session properties](https://prestodb.io/docs/current/sql/set-session.html)
   // Equivalent to using the --session flag in the Presto CLI
   map<string, string> properties = 6 [(google.api.field_behavior) = OPTIONAL];

   // Optional. The runtime log config for job execution.
   LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL];
 }

 // Dataproc job config.
 message JobPlacement {
   // Required. The name of the cluster where the job will be submitted.
   string cluster_name = 1 [(google.api.field_behavior) = REQUIRED];

   // Output only. A cluster UUID generated by the Dataproc service when
   // the job is submitted.
   string cluster_uuid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

   // Optional. Cluster labels to identify a cluster where the job will be
   // submitted.
   map<string, string> cluster_labels = 3
       [(google.api.field_behavior) = OPTIONAL];
 }

 // Dataproc job status.
 message JobStatus {
   // The job state.
   enum State {
     // The job state is unknown.
     STATE_UNSPECIFIED = 0;

     // The job is pending; it has been submitted, but is not yet running.
     PENDING = 1;

     // Job has been received by the service and completed initial setup;
     // it will soon be submitted to the cluster.
     SETUP_DONE = 8;

     // The job is running on the cluster.
     RUNNING = 2;

     // A CancelJob request has been received, but is pending.
     CANCEL_PENDING = 3;

     // Transient in-flight resources have been canceled, and the request to
     // cancel the running job has been issued to the cluster.
     CANCEL_STARTED = 7;

     // The job cancellation was successful.
     CANCELLED = 4;

     // The job has completed successfully.
     DONE = 5;

     // The job has completed, but encountered an error.
     ERROR = 6;

     // Job attempt has failed. The detail field contains failure details for
     // this attempt.
     //
     // Applies to restartable jobs only.
     ATTEMPT_FAILURE = 9;
   }

   // The job substate.
   enum Substate {
     // The job substate is unknown.
     UNSPECIFIED = 0;

     // The Job is submitted to the agent.
     //
     // Applies to RUNNING state.
     SUBMITTED = 1;

     // The Job has been received and is awaiting execution (it may be waiting
     // for a condition to be met). See the "details" field for the reason for
     // the delay.
     //
     // Applies to RUNNING state.
     QUEUED = 2;

     // The agent-reported status is out of date, which may be caused by a
     // loss of communication between the agent and Dataproc. If the
     // agent does not send a timely update, the job will fail.
     //
     // Applies to RUNNING state.
     STALE_STATUS = 3;
   }

   // Output only. A state message specifying the overall job state.
   State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

   // Optional. Output only. Job state details, such as an error
   // description if the state is <code>ERROR</code>.
   string details = 2 [
     (google.api.field_behavior) = OUTPUT_ONLY,
     (google.api.field_behavior) = OPTIONAL
   ];

   // Output only. The time when this state was entered.
   google.protobuf.Timestamp state_start_time = 6
       [(google.api.field_behavior) = OUTPUT_ONLY];

   // Output only. Additional state information, which includes
   // status reported by the agent.
   Substate substate = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
 }

 // Encapsulates the full scoping used to reference a job.
 message JobReference {
   // Optional. The ID of the Google Cloud Platform project that the job belongs
   // to. If specified, must match the request project ID.
   string project_id = 1 [(google.api.field_behavior) = OPTIONAL];

   // Optional. The job ID, which must be unique within the project.
   //
   // The ID must contain only letters (a-z, A-Z), numbers (0-9),
   // underscores (_), or hyphens (-). The maximum length is 100 characters.
   //
   // If not specified by the caller, the job ID will be provided by the server.
   string job_id = 2 [(google.api.field_behavior) = OPTIONAL];
 }

 // A YARN application created by a job. Application information is a subset of
 // <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
 //
 // **Beta Feature**: This report is available for testing purposes only. It may
 // be changed before final release.
 message YarnApplication {
   // The application state, corresponding to
   // <code>YarnProtos.YarnApplicationStateProto</code>.
   enum State {
     // Status is unspecified.
     STATE_UNSPECIFIED = 0;

     // Status is NEW.
     NEW = 1;

     // Status is NEW_SAVING.
     NEW_SAVING = 2;

     // Status is SUBMITTED.
     SUBMITTED = 3;

     // Status is ACCEPTED.
     ACCEPTED = 4;

     // Status is RUNNING.
     RUNNING = 5;

     // Status is FINISHED.
     FINISHED = 6;

     // Status is FAILED.
     FAILED = 7;

     // Status is KILLED.
     KILLED = 8;
   }

   // Required. The application name.
   string name = 1 [(google.api.field_behavior) = REQUIRED];

   // Required. The application state.
   State state = 2 [(google.api.field_behavior) = REQUIRED];

   // Required. The numerical progress of the application, from 1 to 100.
   float progress = 3 [(google.api.field_behavior) = REQUIRED];

   // Optional. The HTTP URL of the ApplicationMaster, HistoryServer, or
   // TimelineServer that provides application-specific information. The URL uses
   // the internal hostname, and requires a proxy server for resolution and,
   // possibly, access.
   string tracking_url = 4 [(google.api.field_behavior) = OPTIONAL];
 }

 // A Dataproc job resource.
 message Job {
   // Optional. The fully qualified reference to the job, which can be used to
   // obtain the equivalent REST path of the job resource. If this property
   // is not specified when a job is created, the server generates a
   // <code>job_id</code>.
   JobReference reference = 1 [(google.api.field_behavior) = OPTIONAL];

   // Required. Job information, including how, when, and where to
   // run the job.
   JobPlacement placement = 2 [(google.api.field_behavior) = REQUIRED];

   // Required. The application/framework-specific portion of the job.
   oneof type_job {
     // Optional. Job is a Hadoop job.
     HadoopJob hadoop_job = 3 [(google.api.field_behavior) = OPTIONAL];

     // Optional. Job is a Spark job.
     SparkJob spark_job = 4 [(google.api.field_behavior) = OPTIONAL];

     // Optional. Job is a PySpark job.
     PySparkJob pyspark_job = 5 [(google.api.field_behavior) = OPTIONAL];

     // Optional. Job is a Hive job.
     HiveJob hive_job = 6 [(google.api.field_behavior) = OPTIONAL];

     // Optional. Job is a Pig job.
     PigJob pig_job = 7 [(google.api.field_behavior) = OPTIONAL];

     // Optional. Job is a SparkR job.
     SparkRJob spark_r_job = 21 [(google.api.field_behavior) = OPTIONAL];

     // Optional. Job is a SparkSql job.
     SparkSqlJob spark_sql_job = 12 [(google.api.field_behavior) = OPTIONAL];

     // Optional. Job is a Presto job.
     PrestoJob presto_job = 23 [(google.api.field_behavior) = OPTIONAL];
   }

   // Output only. The job status. Additional application-specific
   // status information may be contained in the <code>type_job</code>
   // and <code>yarn_applications</code> fields.
   JobStatus status = 8 [(google.api.field_behavior) = OUTPUT_ONLY];

   // Output only. The previous job status.
   repeated JobStatus status_history = 13
       [(google.api.field_behavior) = OUTPUT_ONLY];

   // Output only. The collection of YARN applications spun up by this job.
   //
   // **Beta** Feature: This report is available for testing purposes only. It
   // may be changed before final release.
   repeated YarnApplication yarn_applications = 9
       [(google.api.field_behavior) = OUTPUT_ONLY];

   // Output only. A URI pointing to the location of the stdout of the job's
   // driver program.
   string driver_output_resource_uri = 17
       [(google.api.field_behavior) = OUTPUT_ONLY];

   // Output only. If present, the location of miscellaneous control files
   // which may be used as part of job setup and handling. If not present,
   // control files may be placed in the same location as `driver_output_uri`.
   string driver_control_files_uri = 15
       [(google.api.field_behavior) = OUTPUT_ONLY];

   // Optional. The labels to associate with this job.
   // Label **keys** must contain 1 to 63 characters, and must conform to
   // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
   // Label **values** may be empty, but, if present, must contain 1 to 63
   // characters, and must conform to [RFC
   // 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be
   // associated with a job.
   map<string, string> labels = 18 [(google.api.field_behavior) = OPTIONAL];

   // Optional. Job scheduling configuration.
   JobScheduling scheduling = 20 [(google.api.field_behavior) = OPTIONAL];

   // Output only. A UUID that uniquely identifies a job within the project
   // over time. This is in contrast to a user-settable reference.job_id that
   // may be reused over time.
   string job_uuid = 22 [(google.api.field_behavior) = OUTPUT_ONLY];

   // Output only. Indicates whether the job is completed. If the value is
   // `false`, the job is still in progress. If `true`, the job is completed, and
   // `status.state` field will indicate if it was successful, failed,
   // or cancelled.
   bool done = 24 [(google.api.field_behavior) = OUTPUT_ONLY];

   // Optional. Driver scheduling configuration.
   DriverSchedulingConfig driver_scheduling_config = 27
       [(google.api.field_behavior) = OPTIONAL];
 }

 // Driver scheduling configuration.
 message DriverSchedulingConfig {
   // Required. The amount of memory in MB the driver is requesting.
   int32 memory_mb = 1 [(google.api.field_behavior) = REQUIRED];

   // Required. The number of vCPUs the driver is requesting.
   int32 vcores = 2 [(google.api.field_behavior) = REQUIRED];
 }

 // Job scheduling options.
 message JobScheduling {
   // Optional. Maximum number of times per hour a driver may be restarted as
   // a result of driver exiting with non-zero code before job is
   // reported failed.
   //
   // A job may be reported as thrashing if the driver exits with a non-zero code
   // four times within a 10-minute window.
   //
   // Maximum value is 10.
   //
   // **Note:** This restartable job option is not supported in Dataproc
   // [workflow templates]
   // (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template).
   int32 max_failures_per_hour = 1 [(google.api.field_behavior) = OPTIONAL];

   // Optional. Maximum total number of times a driver may be restarted as a
   // result of the driver exiting with a non-zero code. After the maximum number
   // is reached, the job will be reported as failed.
   //
   // Maximum value is 240.
   //
   // **Note:** Currently, this restartable job option is
   // not supported in Dataproc
   // [workflow
   // templates](https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template).
   int32 max_failures_total = 2 [(google.api.field_behavior) = OPTIONAL];
 }

 // A request to submit a job.
 message SubmitJobRequest {
   // Required. The ID of the Google Cloud Platform project that the job
   // belongs to.
   string project_id = 1 [(google.api.field_behavior) = REQUIRED];

   // Required. The Dataproc region in which to handle the request.
   string region = 3 [(google.api.field_behavior) = REQUIRED];

   // Required. The job resource.
   Job job = 2 [(google.api.field_behavior) = REQUIRED];

   // Optional. A unique id used to identify the request. If the server
   // receives two
   // [SubmitJobRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.SubmitJobRequest)s
   // with the same id, then the second request will be ignored and the
   // first [Job][google.cloud.dataproc.v1.Job] created and stored in the backend
   // is returned.
   //
   // It is recommended to always set this value to a
   // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
   //
   // The id must contain only letters (a-z, A-Z), numbers (0-9),
   // underscores (_), and hyphens (-). The maximum length is 40 characters.
   string request_id = 4 [(google.api.field_behavior) = OPTIONAL];
 }

 // Job Operation metadata.
 message JobMetadata {
   // Output only. The job id.
   string job_id = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

   // Output only. Most recent job status.
   JobStatus status = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

   // Output only. Operation type.
   string operation_type = 3 [(google.api.field_behavior) = OUTPUT_ONLY];

   // Output only. Job submission time.
   google.protobuf.Timestamp start_time = 4
       [(google.api.field_behavior) = OUTPUT_ONLY];
 }

 // A request to get the resource representation for a job in a project.
 message GetJobRequest {
   // Required. The ID of the Google Cloud Platform project that the job
   // belongs to.
   string project_id = 1 [(google.api.field_behavior) = REQUIRED];

   // Required. The Dataproc region in which to handle the request.
   string region = 3 [(google.api.field_behavior) = REQUIRED];

   // Required. The job ID.
   string job_id = 2 [(google.api.field_behavior) = REQUIRED];
 }

 // A request to list jobs in a project.
 message ListJobsRequest {
   // A matcher that specifies categories of job states.
   enum JobStateMatcher {
     // Match all jobs, regardless of state.
     ALL = 0;

     // Only match jobs in non-terminal states: PENDING, RUNNING, or
     // CANCEL_PENDING.
     ACTIVE = 1;

     // Only match jobs in terminal states: CANCELLED, DONE, or ERROR.
     NON_ACTIVE = 2;
   }

   // Required. The ID of the Google Cloud Platform project that the job
   // belongs to.
   string project_id = 1 [(google.api.field_behavior) = REQUIRED];

   // Required. The Dataproc region in which to handle the request.
   string region = 6 [(google.api.field_behavior) = REQUIRED];

   // Optional. The number of results to return in each response.
   int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];

   // Optional. The page token, returned by a previous call, to request the
   // next page of results.
   string page_token = 3 [(google.api.field_behavior) = OPTIONAL];

   // Optional. If set, the returned jobs list includes only jobs that were
   // submitted to the named cluster.
   string cluster_name = 4 [(google.api.field_behavior) = OPTIONAL];

   // Optional. Specifies enumerated categories of jobs to list.
   // (default = match ALL jobs).
   //
   // If `filter` is provided, `jobStateMatcher` will be ignored.
   JobStateMatcher job_state_matcher = 5
       [(google.api.field_behavior) = OPTIONAL];

   // Optional. A filter constraining the jobs to list. Filters are
   // case-sensitive and have the following syntax:
   //
   // [field = value] AND [field [= value]] ...
   //
   // where **field** is `status.state` or `labels.[KEY]`, and `[KEY]` is a label
   // key. **value** can be `*` to match all values.
   // `status.state` can be either `ACTIVE` or `NON_ACTIVE`.
   // Only the logical `AND` operator is supported; space-separated items are
   // treated as having an implicit `AND` operator.
   //
   // Example filter:
   //
   // status.state = ACTIVE AND labels.env = staging AND labels.starred = *
   string filter = 7 [(google.api.field_behavior) = OPTIONAL];
 }

 // A request to update a job.
 message UpdateJobRequest {
   // Required. The ID of the Google Cloud Platform project that the job
   // belongs to.
   string project_id = 1 [(google.api.field_behavior) = REQUIRED];

   // Required. The Dataproc region in which to handle the request.
   string region = 2 [(google.api.field_behavior) = REQUIRED];

   // Required. The job ID.
   string job_id = 3 [(google.api.field_behavior) = REQUIRED];

   // Required. The changes to the job.
   Job job = 4 [(google.api.field_behavior) = REQUIRED];

   // Required. Specifies the path, relative to <code>Job</code>, of
   // the field to update. For example, to update the labels of a Job the
   // <code>update_mask</code> parameter would be specified as
   // <code>labels</code>, and the `PATCH` request body would specify the new
   // value. <strong>Note:</strong> Currently, <code>labels</code> is the only
   // field that can be updated.
   google.protobuf.FieldMask update_mask = 5
       [(google.api.field_behavior) = REQUIRED];
 }

 // A list of jobs in a project.
 message ListJobsResponse {
   // Output only. Jobs list.
   repeated Job jobs = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

   // Optional. This token is included in the response if there are more results
   // to fetch. To fetch additional results, provide this value as the
   // `page_token` in a subsequent <code>ListJobsRequest</code>.
   string next_page_token = 2 [(google.api.field_behavior) = OPTIONAL];
 }

 // A request to cancel a job.
 message CancelJobRequest {
   // Required. The ID of the Google Cloud Platform project that the job
   // belongs to.
   string project_id = 1 [(google.api.field_behavior) = REQUIRED];

   // Required. The Dataproc region in which to handle the request.
   string region = 3 [(google.api.field_behavior) = REQUIRED];

   // Required. The job ID.
   string job_id = 2 [(google.api.field_behavior) = REQUIRED];
 }

 // A request to delete a job.
 message DeleteJobRequest {
   // Required. The ID of the Google Cloud Platform project that the job
   // belongs to.
   string project_id = 1 [(google.api.field_behavior) = REQUIRED];

   // Required. The Dataproc region in which to handle the request.
   string region = 3 [(google.api.field_behavior) = REQUIRED];

   // Required. The job ID.
   string job_id = 2 [(google.api.field_behavior) = REQUIRED];
 }