Add support for grpc-dotnet in GKE benchmarks (#28975)

* GKE benchmarks: add support for benchmarking grpc-dotnet

* add dotnet to loadtest basic templates

* print full path for generated examples

* add grpc-dotnet scenario to loadtest_example.sh generator

* add grpc-dotnet to the experimental kokoro job

* yapf format code
diff --git a/tools/internal_ci/linux/grpc_e2e_performance_gke_experiment.sh b/tools/internal_ci/linux/grpc_e2e_performance_gke_experiment.sh
index 93d6e62..c558340 100755
--- a/tools/internal_ci/linux/grpc_e2e_performance_gke_experiment.sh
+++ b/tools/internal_ci/linux/grpc_e2e_performance_gke_experiment.sh
@@ -50,6 +50,7 @@
 else
     GRPC_CORE_GITREF="$(git ls-remote https://github.com/grpc/grpc.git master | cut -f1)"
 fi
+GRPC_DOTNET_GITREF="$(git ls-remote https://github.com/grpc/grpc-dotnet.git master | cut -f1)"
 GRPC_GO_GITREF="$(git ls-remote https://github.com/grpc/grpc-go.git master | cut -f1)"
 GRPC_JAVA_GITREF="$(git ls-remote https://github.com/grpc/grpc-java.git master | cut -f1)"
 # Kokoro jobs run on dedicated pools.
@@ -97,8 +98,8 @@
         -o "loadtest_with_prebuilt_workers_${pool}.yaml"
 }
 
-buildConfigs "${WORKER_POOL_8CORE}" "${BIGQUERY_TABLE_8CORE}" -l c++ -l csharp -l go -l java -l php7 -l php7_protobuf_c -l python -l ruby
-buildConfigs "${WORKER_POOL_32CORE}" "${BIGQUERY_TABLE_32CORE}" -l c++ -l csharp -l go -l java
+buildConfigs "${WORKER_POOL_8CORE}" "${BIGQUERY_TABLE_8CORE}" -l c++ -l csharp -l dotnet -l go -l java -l php7 -l php7_protobuf_c -l python -l ruby
+buildConfigs "${WORKER_POOL_32CORE}" "${BIGQUERY_TABLE_32CORE}" -l c++ -l csharp -l dotnet -l go -l java
 
 # Delete prebuilt images on exit.
 deleteImages() {
@@ -113,6 +114,7 @@
 time ../test-infra/bin/prepare_prebuilt_workers \
     -l "cxx:${GRPC_CORE_GITREF}" \
     -l "csharp:${GRPC_CORE_GITREF}" \
+    -l "dotnet:${GRPC_DOTNET_GITREF}" \
     -l "go:${GRPC_GO_GITREF}" \
     -l "java:${GRPC_JAVA_GITREF}" \
     -l "php7:${GRPC_CORE_GITREF}" \
diff --git a/tools/run_tests/performance/loadtest_examples.sh b/tools/run_tests/performance/loadtest_examples.sh
index c7714b3..d544f9c 100755
--- a/tools/run_tests/performance/loadtest_examples.sh
+++ b/tools/run_tests/performance/loadtest_examples.sh
@@ -69,6 +69,7 @@
 scenarios=(
     "cpp_generic_async_streaming_ping_pong_secure"
     "csharp_protobuf_async_unary_ping_pong"
+    "dotnet_protobuf_async_unary_ping_pong"
     "go_generic_sync_streaming_ping_pong_secure"
     "java_generic_async_streaming_ping_pong_secure"
     "node_to_node_generic_async_streaming_ping_pong_secure"
@@ -95,7 +96,7 @@
         --allow_client_language=c++ --allow_server_language=c++ \
         --allow_server_language=node \
         -o "${outputdir}/${outputfile}"
-    echo "Created example: ${outputfile}"
+    echo "Created example: ${outputdir}/${outputfile}"
 }
 
 # Prebuilt examples contain substitution keys, so must be processed before
@@ -117,7 +118,7 @@
         --allow_client_language=c++ --allow_server_language=c++ \
         --allow_server_language=node \
         -o "${outputdir}/${outputfile}"
-    echo "Created example: ${outputfile}"
+    echo "Created example: ${outputdir}/${outputfile}"
 }
 
 for scenario in "${scenarios[@]}"; do
diff --git a/tools/run_tests/performance/scenario_config.py b/tools/run_tests/performance/scenario_config.py
index 21338c4..cc0b873 100644
--- a/tools/run_tests/performance/scenario_config.py
+++ b/tools/run_tests/performance/scenario_config.py
@@ -602,6 +602,7 @@
 
 
 class CSharpLanguage(Language):
+    """The legacy Grpc.Core implementation from grpc/grpc."""
 
     def worker_cmdline(self):
         return ['tools/run_tests/performance/run_worker_csharp.sh']
@@ -726,6 +727,134 @@
         return 'csharp'
 
 
+class DotnetLanguage(Language):
+    """The pure C# implementation from grpc/grpc-dotnet."""
+
+    def worker_cmdline(self):
+        # grpc-dotnet worker is only supported by the new GKE based OSS benchmark
+        # framework, and the worker_cmdline() is only used by run_performance_tests.py
+        return ['grpc_dotnet_not_supported_by_legacy_performance_runner.sh']
+
+    def worker_port_offset(self):
+        return 1100
+
+    def scenarios(self):
+        yield _ping_pong_scenario('dotnet_generic_async_streaming_ping_pong',
+                                  rpc_type='STREAMING',
+                                  client_type='ASYNC_CLIENT',
+                                  server_type='ASYNC_GENERIC_SERVER',
+                                  use_generic_payload=True,
+                                  categories=[SMOKETEST, SCALABLE])
+
+        yield _ping_pong_scenario(
+            'dotnet_generic_async_streaming_ping_pong_insecure_1MB',
+            rpc_type='STREAMING',
+            client_type='ASYNC_CLIENT',
+            server_type='ASYNC_GENERIC_SERVER',
+            req_size=1024 * 1024,
+            resp_size=1024 * 1024,
+            use_generic_payload=True,
+            secure=False,
+            categories=[SMOKETEST, SCALABLE])
+
+        yield _ping_pong_scenario(
+            'dotnet_generic_async_streaming_qps_unconstrained_insecure',
+            rpc_type='STREAMING',
+            client_type='ASYNC_CLIENT',
+            server_type='ASYNC_GENERIC_SERVER',
+            unconstrained_client='async',
+            use_generic_payload=True,
+            secure=False,
+            categories=[SMOKETEST, SCALABLE])
+
+        yield _ping_pong_scenario('dotnet_protobuf_async_streaming_ping_pong',
+                                  rpc_type='STREAMING',
+                                  client_type='ASYNC_CLIENT',
+                                  server_type='ASYNC_SERVER')
+
+        yield _ping_pong_scenario('dotnet_protobuf_async_unary_ping_pong',
+                                  rpc_type='UNARY',
+                                  client_type='ASYNC_CLIENT',
+                                  server_type='ASYNC_SERVER',
+                                  categories=[SMOKETEST, SCALABLE])
+
+        yield _ping_pong_scenario(
+            'dotnet_protobuf_sync_to_async_unary_ping_pong',
+            rpc_type='UNARY',
+            client_type='SYNC_CLIENT',
+            server_type='ASYNC_SERVER')
+
+        yield _ping_pong_scenario(
+            'dotnet_protobuf_async_unary_qps_unconstrained',
+            rpc_type='UNARY',
+            client_type='ASYNC_CLIENT',
+            server_type='ASYNC_SERVER',
+            unconstrained_client='async',
+            categories=[SMOKETEST, SCALABLE])
+
+        yield _ping_pong_scenario(
+            'dotnet_protobuf_async_streaming_qps_unconstrained',
+            rpc_type='STREAMING',
+            client_type='ASYNC_CLIENT',
+            server_type='ASYNC_SERVER',
+            unconstrained_client='async',
+            categories=[SCALABLE])
+
+        yield _ping_pong_scenario('dotnet_to_cpp_protobuf_sync_unary_ping_pong',
+                                  rpc_type='UNARY',
+                                  client_type='SYNC_CLIENT',
+                                  server_type='SYNC_SERVER',
+                                  server_language='c++',
+                                  async_server_threads=1,
+                                  categories=[SMOKETEST, SCALABLE])
+
+        yield _ping_pong_scenario(
+            'dotnet_to_cpp_protobuf_async_streaming_ping_pong',
+            rpc_type='STREAMING',
+            client_type='ASYNC_CLIENT',
+            server_type='ASYNC_SERVER',
+            server_language='c++',
+            async_server_threads=1)
+
+        yield _ping_pong_scenario(
+            'dotnet_to_cpp_protobuf_async_unary_qps_unconstrained',
+            rpc_type='UNARY',
+            client_type='ASYNC_CLIENT',
+            server_type='ASYNC_SERVER',
+            unconstrained_client='async',
+            server_language='c++',
+            categories=[SCALABLE])
+
+        yield _ping_pong_scenario(
+            'dotnet_to_cpp_protobuf_sync_to_async_unary_qps_unconstrained',
+            rpc_type='UNARY',
+            client_type='SYNC_CLIENT',
+            server_type='ASYNC_SERVER',
+            unconstrained_client='sync',
+            server_language='c++',
+            categories=[SCALABLE])
+
+        yield _ping_pong_scenario(
+            'cpp_to_dotnet_protobuf_async_unary_qps_unconstrained',
+            rpc_type='UNARY',
+            client_type='ASYNC_CLIENT',
+            server_type='ASYNC_SERVER',
+            unconstrained_client='async',
+            client_language='c++',
+            categories=[SCALABLE])
+
+        yield _ping_pong_scenario('dotnet_protobuf_async_unary_ping_pong_1MB',
+                                  rpc_type='UNARY',
+                                  client_type='ASYNC_CLIENT',
+                                  server_type='ASYNC_SERVER',
+                                  req_size=1024 * 1024,
+                                  resp_size=1024 * 1024,
+                                  categories=[SMOKETEST, SCALABLE])
+
+    def __str__(self):
+        return 'dotnet'
+
+
 class PythonLanguage(Language):
 
     def worker_cmdline(self):
@@ -1359,6 +1488,7 @@
 LANGUAGES = {
     'c++': CXXLanguage(),
     'csharp': CSharpLanguage(),
+    'dotnet': DotnetLanguage(),
     'ruby': RubyLanguage(),
     'php7': Php7Language(),
     'php7_protobuf_c': Php7Language(php7_protobuf_c=True),
diff --git a/tools/run_tests/performance/templates/loadtest_template_basic_all_languages.yaml b/tools/run_tests/performance/templates/loadtest_template_basic_all_languages.yaml
index dbc9133..bf804e7 100644
--- a/tools/run_tests/performance/templates/loadtest_template_basic_all_languages.yaml
+++ b/tools/run_tests/performance/templates/loadtest_template_basic_all_languages.yaml
@@ -34,6 +34,25 @@
       - bash
       name: main
   - build:
+      command:
+      - bash
+      - /build_scripts/build_qps_worker.sh
+    clone:
+      gitRef: master
+      repo: https://github.com/grpc/grpc-dotnet.git
+    language: dotnet
+    pool: ${client_pool}
+    run:
+    - args:
+      - -c
+      # TODO(jtattermusch): why is the extra "--" in cmdline needed?
+      - |
+        timeout --kill-after="${KILL_AFTER}" "${POD_TIMEOUT}" \
+            qps_worker/QpsWorker -- --driver_port="${DRIVER_PORT}"
+      command:
+      - bash
+      name: main
+  - build:
       args:
       - build
       - --config
@@ -243,6 +262,24 @@
       - bash
       name: main
   - build:
+      command:
+      - bash
+      - /build_scripts/build_qps_worker.sh
+    clone:
+      gitRef: master
+      repo: https://github.com/grpc/grpc-dotnet.git
+    language: dotnet
+    pool: ${server_pool}
+    run:
+    - args:
+      - -c
+      - |
+        timeout --kill-after="${KILL_AFTER}" "${POD_TIMEOUT}" \
+            qps_worker/QpsWorker -- --driver_port="${DRIVER_PORT}"
+      command:
+      - bash
+      name: main
+  - build:
       args:
       - build
       - --config
diff --git a/tools/run_tests/performance/templates/loadtest_template_prebuilt_all_languages.yaml b/tools/run_tests/performance/templates/loadtest_template_prebuilt_all_languages.yaml
index 7238f78..fcc8170 100644
--- a/tools/run_tests/performance/templates/loadtest_template_prebuilt_all_languages.yaml
+++ b/tools/run_tests/performance/templates/loadtest_template_prebuilt_all_languages.yaml
@@ -27,6 +27,19 @@
       - bash
       image: ${prebuilt_image_prefix}/csharp:${prebuilt_image_tag}
       name: main
+  - language: dotnet
+    pool: ${client_pool}
+    run:
+    - args:
+      - -c
+      # TODO(jtattermusch): why is the extra "--" in cmdline needed?
+      - |
+        timeout --kill-after="${KILL_AFTER}" "${POD_TIMEOUT}" \
+            /execute/qps_worker/QpsWorker -- --driver_port="${DRIVER_PORT}"
+      command:
+      - bash
+      image: ${prebuilt_image_prefix}/dotnet:${prebuilt_image_tag}
+      name: main
   - language: cxx
     pool: ${client_pool}
     run:
@@ -163,6 +176,18 @@
       - bash
       image: ${prebuilt_image_prefix}/csharp:${prebuilt_image_tag}
       name: main
+  - language: dotnet
+    pool: ${server_pool}
+    run:
+    - args:
+      - -c
+      - |
+        timeout --kill-after="${KILL_AFTER}" "${POD_TIMEOUT}" \
+            /execute/qps_worker/QpsWorker -- --driver_port="${DRIVER_PORT}"
+      command:
+      - bash
+      image: ${prebuilt_image_prefix}/dotnet:${prebuilt_image_tag}
+      name: main
   - language: cxx
     pool: ${server_pool}
     run: