tools/buildgen/extract_metadata_from_bazel_xml.py - platform/external/grpc-grpc - Git at Google

 #!/usr/bin/env python
 # Copyright 2020 The gRPC Authors
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 # Script to extract build metadata from bazel BUILD.
 # To avoid having two sources of truth for the build metadata (build
 # targets, source files, header files etc.), this script analyzes the contents
 # of bazel BUILD files and generates a YAML file (currently called
 # build_autogenerated.yaml). The format and semantics of the generated YAML files
 # is chosen to match the format of a "build.yaml" file, which used
 # to be build the source of truth for gRPC build before bazel became
 # the primary build system.
 # A good basic overview of the "build.yaml" format is available here:
 # https://github.com/grpc/grpc/blob/master/templates/README.md. Note that
 # while useful as an overview, the doc does not act as formal spec
 # (formal spec does not exist in fact) and the doc can be incomplete,
 # inaccurate or slightly out of date.
 # TODO(jtattermusch): In the future we want to get rid of the legacy build.yaml
 # format entirely or simplify it to a point where it becomes self-explanatory
 # and doesn't need any detailed documentation.

 import subprocess
 import yaml
 import xml.etree.ElementTree as ET
 import os
 import sys
 import build_cleaner

 _ROOT = os.path.abspath(os.path.join(os.path.dirname(sys.argv[0]), '../..'))
 os.chdir(_ROOT)


 def _bazel_query_xml_tree(query):
     """Get xml output of bazel query invocation, parsed as XML tree"""
     output = subprocess.check_output(
         ['tools/bazel', 'query', '--noimplicit_deps', '--output', 'xml', query])
     return ET.fromstring(output)


 def _rule_dict_from_xml_node(rule_xml_node):
     """Converts XML node representing a rule (obtained from "bazel query --output xml") to a dictionary that contains all the metadata we will need."""
     result = {
         'class': rule_xml_node.attrib.get('class'),
         'name': rule_xml_node.attrib.get('name'),
         'srcs': [],
         'hdrs': [],
         'deps': [],
         'data': [],
         'tags': [],
         'args': [],
         'generator_function': None,
         'size': None,
         'flaky': False,
     }
     for child in rule_xml_node:
         # all the metadata we want is stored under "list" tags
         if child.tag == 'list':
             list_name = child.attrib['name']
             if list_name in ['srcs', 'hdrs', 'deps', 'data', 'tags', 'args']:
                 result[list_name] += [item.attrib['value'] for item in child]
         if child.tag == 'string':
             string_name = child.attrib['name']
             if string_name in ['generator_function', 'size']:
                 result[string_name] = child.attrib['value']
         if child.tag == 'boolean':
             bool_name = child.attrib['name']
             if bool_name in ['flaky']:
                 result[bool_name] = child.attrib['value'] == 'true'
     return result


 def _extract_rules_from_bazel_xml(xml_tree):
     """Extract bazel rules from an XML tree node obtained from "bazel query --output xml" command."""
     result = {}
     for child in xml_tree:
         if child.tag == 'rule':
             rule_dict = _rule_dict_from_xml_node(child)
             rule_clazz = rule_dict['class']
             rule_name = rule_dict['name']
             if rule_clazz in [
                     'cc_library', 'cc_binary', 'cc_test', 'cc_proto_library',
                     'proto_library'
             ]:
                 if rule_name in result:
                     raise Exception('Rule %s already present' % rule_name)
                 result[rule_name] = rule_dict
     return result


 def _get_bazel_label(target_name):
     if ':' in target_name:
         return '//%s' % target_name
     else:
         return '//:%s' % target_name


 def _extract_source_file_path(label):
     """Gets relative path to source file from bazel deps listing"""
     if label.startswith('//'):
         label = label[len('//'):]
     # labels in form //:src/core/lib/surface/call_test_only.h
     if label.startswith(':'):
         label = label[len(':'):]
     # labels in form //test/core/util:port.cc
     label = label.replace(':', '/')
     return label


 def _extract_public_headers(bazel_rule):
     """Gets list of public headers from a bazel rule"""
     result = []
     for dep in bazel_rule['hdrs']:
         if dep.startswith('//:include/') and dep.endswith('.h'):
             result.append(_extract_source_file_path(dep))
     return list(sorted(result))


 def _extract_nonpublic_headers(bazel_rule):
     """Gets list of non-public headers from a bazel rule"""
     result = []
     for dep in bazel_rule['hdrs']:
         if dep.startswith('//') and not dep.startswith(
                 '//:include/') and dep.endswith('.h'):
             result.append(_extract_source_file_path(dep))
     return list(sorted(result))


 def _extract_sources(bazel_rule):
     """Gets list of source files from a bazel rule"""
     result = []
     for dep in bazel_rule['srcs']:
         if dep.startswith('//') and (dep.endswith('.cc') or dep.endswith('.c')
                                      or dep.endswith('.proto')):
             result.append(_extract_source_file_path(dep))
     return list(sorted(result))


 def _extract_deps(bazel_rule):
     """Gets list of deps from from a bazel rule"""
     return list(sorted(bazel_rule['deps']))


 def _create_target_from_bazel_rule(target_name, bazel_rules):
     """Create build.yaml-like target definition from bazel metadata"""
     bazel_rule = bazel_rules[_get_bazel_label(target_name)]

     # Create a template for our target from the bazel rule. Initially we only
     # populate some "private" fields with the original info we got from bazel
     # and only later we will populate the public fields (once we do some extra
     # postprocessing).
     result = {
         'name': target_name,
         '_PUBLIC_HEADERS_BAZEL': _extract_public_headers(bazel_rule),
         '_HEADERS_BAZEL': _extract_nonpublic_headers(bazel_rule),
         '_SRC_BAZEL': _extract_sources(bazel_rule),
         '_DEPS_BAZEL': _extract_deps(bazel_rule),
     }
     return result


 def _sort_by_build_order(lib_names, lib_dict, deps_key_name, verbose=False):
     """Sort library names to form correct build order. Use metadata from lib_dict"""
     # we find correct build order by performing a topological sort
     # expected output: if library B depends on A, A should be listed first

     # all libs that are not in the dictionary are considered external.
     external_deps = list(
         sorted([lib_name for lib_name in lib_names if lib_name not in lib_dict
                ]))
     if verbose:
         print('topo_ordering ' + str(lib_names))
         print('    external_deps ' + str(external_deps))

     result = list(external_deps)  # external deps will be listed first
     while len(result) < len(lib_names):
         more_results = []
         for lib in lib_names:
             if lib not in result:
                 dep_set = set(lib_dict[lib].get(deps_key_name, []))
                 dep_set = dep_set.intersection(lib_names)
                 # if lib only depends on what's already built, add it to the results
                 if not dep_set.difference(set(result)):
                     more_results.append(lib)
         if not more_results:
             raise Exception(
                 'Cannot sort topologically, there seems to be a cyclic dependency'
             )
         if verbose:
             print('    adding ' + str(more_results))
         result = result + list(
             sorted(more_results
                   ))  # when build order doesn't matter, sort lexicographically
     return result


 # TODO(jtattermusch): deduplicate with transitive_dependencies.py (which has a slightly different logic)
 def _populate_transitive_deps(bazel_rules):
     """Add 'transitive_deps' field for each of the rules"""
     transitive_deps = {}
     for rule_name in bazel_rules.keys():
         transitive_deps[rule_name] = set(bazel_rules[rule_name]['deps'])

     while True:
         deps_added = 0
         for rule_name in bazel_rules.keys():
             old_deps = transitive_deps[rule_name]
             new_deps = set(old_deps)
             for dep_name in old_deps:
                 new_deps.update(transitive_deps.get(dep_name, set()))
             deps_added += len(new_deps) - len(old_deps)
             transitive_deps[rule_name] = new_deps
         # if none of the transitive dep sets has changed, we're done
         if deps_added == 0:
             break

     for rule_name, bazel_rule in bazel_rules.items():
         bazel_rule['transitive_deps'] = list(sorted(transitive_deps[rule_name]))


 def _external_dep_name_from_bazel_dependency(bazel_dep):
     """Returns name of dependency if external bazel dependency is provided or None"""
     if bazel_dep.startswith('@com_google_absl//'):
         # special case for add dependency on one of the absl libraries (there is not just one absl library)
         prefixlen = len('@com_google_absl//')
         return bazel_dep[prefixlen:]
     elif bazel_dep == '//external:upb_lib':
         return 'upb'
     elif bazel_dep == '//external:benchmark':
         return 'benchmark'
     else:
         # all the other external deps such as protobuf, cares, zlib
         # don't need to be listed explicitly, they are handled automatically
         # by the build system (make, cmake)
         return None


 def _expand_intermediate_deps(target_dict, public_dep_names, bazel_rules):
     # Some of the libraries defined by bazel won't be exposed in build.yaml
     # We call these "intermediate" dependencies. This method expands
     # the intermediate deps for given target (populates library's
     # headers, sources and dicts as if the intermediate dependency never existed)

     # use this dictionary to translate from bazel labels to dep names
     bazel_label_to_dep_name = {}
     for dep_name in public_dep_names:
         bazel_label_to_dep_name[_get_bazel_label(dep_name)] = dep_name

     target_name = target_dict['name']
     bazel_deps = target_dict['_DEPS_BAZEL']

     # initial values
     public_headers = set(target_dict['_PUBLIC_HEADERS_BAZEL'])
     headers = set(target_dict['_HEADERS_BAZEL'])
     src = set(target_dict['_SRC_BAZEL'])
     deps = set()

     expansion_blocklist = set()
     to_expand = set(bazel_deps)
     while to_expand:

         # start with the last dependency to be built
         build_order = _sort_by_build_order(list(to_expand), bazel_rules,
                                            'transitive_deps')

         bazel_dep = build_order[-1]
         to_expand.remove(bazel_dep)

         is_public = bazel_dep in bazel_label_to_dep_name
         external_dep_name_maybe = _external_dep_name_from_bazel_dependency(
             bazel_dep)

         if is_public:
             # this is not an intermediate dependency we so we add it
             # to the list of public dependencies to the list, in the right format
             deps.add(bazel_label_to_dep_name[bazel_dep])

             # we do not want to expand any intermediate libraries that are already included
             # by the dependency we just added
             expansion_blocklist.update(
                 bazel_rules[bazel_dep]['transitive_deps'])

         elif external_dep_name_maybe:
             deps.add(external_dep_name_maybe)

         elif bazel_dep.startswith(
                 '//external:') or not bazel_dep.startswith('//'):
             # all the other external deps can be skipped
             pass

         elif bazel_dep in expansion_blocklist:
             # do not expand if a public dependency that depends on this has already been expanded
             pass

         else:
             if bazel_dep in bazel_rules:
                 # this is an intermediate library, expand it
                 public_headers.update(
                     _extract_public_headers(bazel_rules[bazel_dep]))
                 headers.update(
                     _extract_nonpublic_headers(bazel_rules[bazel_dep]))
                 src.update(_extract_sources(bazel_rules[bazel_dep]))

                 new_deps = _extract_deps(bazel_rules[bazel_dep])
                 to_expand.update(new_deps)
             else:
                 raise Exception(bazel_dep + ' not in bazel_rules')

     # make the 'deps' field transitive, but only list non-intermediate deps and selected external deps
     bazel_transitive_deps = bazel_rules[_get_bazel_label(
         target_name)]['transitive_deps']
     for transitive_bazel_dep in bazel_transitive_deps:
         public_name = bazel_label_to_dep_name.get(transitive_bazel_dep, None)
         if public_name:
             deps.add(public_name)
         external_dep_name_maybe = _external_dep_name_from_bazel_dependency(
             transitive_bazel_dep)
         if external_dep_name_maybe:
             # expanding all absl libraries is technically correct but creates too much noise
             if not external_dep_name_maybe.startswith('absl'):
                 deps.add(external_dep_name_maybe)

     target_dict['public_headers'] = list(sorted(public_headers))
     target_dict['headers'] = list(sorted(headers))
     target_dict['src'] = list(sorted(src))
     target_dict['deps'] = list(sorted(deps))


 def _generate_build_metadata(build_extra_metadata, bazel_rules):
     """Generate build metadata in build.yaml-like format bazel build metadata and build.yaml-specific "extra metadata"."""
     lib_names = list(build_extra_metadata.keys())
     result = {}

     for lib_name in lib_names:
         lib_dict = _create_target_from_bazel_rule(lib_name, bazel_rules)

         # Figure out the final list of headers and sources for given target.
         # While this is mostly based on bazel build metadata, build.yaml does
         # not necessarily expose all the targets that are present in bazel build.
         # These "intermediate dependencies" might get flattened.
         # TODO(jtattermusch): This is done to avoid introducing too many intermediate
         # libraries into the build.yaml-based builds (which might in cause issues
         # building language-specific artifacts) and also because the libraries
         # in build.yaml-based build are generally considered units of distributions
         # (= public libraries that are visible to the user and are installable),
         # while in bazel builds it is customary to define larger number of smaller
         # "sublibraries". The need for elision (and expansion)
         # of intermediate libraries can be re-evaluated in the future.
         _expand_intermediate_deps(lib_dict, lib_names, bazel_rules)

         # populate extra properties from the build.yaml-specific "extra metadata"
         lib_dict.update(build_extra_metadata.get(lib_name, {}))

         # store to results
         result[lib_name] = lib_dict

     # Rename targets marked with "_RENAME" extra metadata.
     # This is mostly a cosmetic change to ensure that we end up with build.yaml target
     # names we're used to from the past (and also to avoid too long target names).
     # The rename step needs to be made after we're done with most of processing logic
     # otherwise the already-renamed libraries will have different names than expected
     for lib_name in lib_names:
         to_name = build_extra_metadata.get(lib_name, {}).get('_RENAME', None)
         if to_name:
             # store lib under the new name and also change its 'name' property
             if to_name in result:
                 raise Exception('Cannot rename target ' + lib_name + ', ' +
                                 to_name + ' already exists.')
             lib_dict = result.pop(lib_name)
             lib_dict['name'] = to_name
             result[to_name] = lib_dict

             # dep names need to be updated as well
             for lib_dict_to_update in result.values():
                 lib_dict_to_update['deps'] = list([
                     to_name if dep == lib_name else dep
                     for dep in lib_dict_to_update['deps']
                 ])

     # make sure deps are listed in reverse topological order (e.g. "grpc gpr" and not "gpr grpc")
     for lib_dict in result.values():
         lib_dict['deps'] = list(
             reversed(_sort_by_build_order(lib_dict['deps'], result, 'deps')))

     return result


 def _convert_to_build_yaml_like(lib_dict):
     lib_names = [
         lib_name for lib_name in list(lib_dict.keys())
         if lib_dict[lib_name].get('_TYPE', 'library') == 'library'
     ]
     target_names = [
         lib_name for lib_name in list(lib_dict.keys())
         if lib_dict[lib_name].get('_TYPE', 'library') == 'target'
     ]
     test_names = [
         lib_name for lib_name in list(lib_dict.keys())
         if lib_dict[lib_name].get('_TYPE', 'library') == 'test'
     ]

     # list libraries and targets in predefined order
     lib_list = [lib_dict[lib_name] for lib_name in lib_names]
     target_list = [lib_dict[lib_name] for lib_name in target_names]
     test_list = [lib_dict[lib_name] for lib_name in test_names]

     # get rid of temporary private fields prefixed with "_" and some other useless fields
     for lib in lib_list:
         for field_to_remove in [k for k in lib.keys() if k.startswith('_')]:
             lib.pop(field_to_remove, None)
     for target in target_list:
         for field_to_remove in [k for k in target.keys() if k.startswith('_')]:
             target.pop(field_to_remove, None)
         target.pop('public_headers',
                    None)  # public headers make no sense for targets
     for test in test_list:
         for field_to_remove in [k for k in test.keys() if k.startswith('_')]:
             test.pop(field_to_remove, None)
         test.pop('public_headers',
                  None)  # public headers make no sense for tests

     build_yaml_like = {
         'libs': lib_list,
         'filegroups': [],
         'targets': target_list,
         'tests': test_list,
     }
     return build_yaml_like


 def _extract_cc_tests(bazel_rules):
     """Gets list of cc_test tests from bazel rules"""
     result = []
     for bazel_rule in bazel_rules.values():
         if bazel_rule['class'] == 'cc_test':
             test_name = bazel_rule['name']
             if test_name.startswith('//'):
                 prefixlen = len('//')
                 result.append(test_name[prefixlen:])
     return list(sorted(result))


 def _exclude_unwanted_cc_tests(tests):
     """Filters out bazel tests that we don't want to run with other build systems or we cannot build them reasonably"""

     # most qps tests are autogenerated, we are fine without them
     tests = [test for test in tests if not test.startswith('test/cpp/qps:')]

     # we have trouble with census dependency outside of bazel
     tests = [
         test for test in tests
         if not test.startswith('test/cpp/ext/filters/census:')
     ]
     tests = [
         test for test in tests
         if not test.startswith('test/cpp/microbenchmarks:bm_opencensus_plugin')
     ]

     # missing opencensus/stats/stats.h
     tests = [
         test for test in tests if not test.startswith(
             'test/cpp/end2end:server_load_reporting_end2end_test')
     ]
     tests = [
         test for test in tests if not test.startswith(
             'test/cpp/server/load_reporter:lb_load_reporter_test')
     ]

     # The test uses --running_under_bazel cmdline argument
     # To avoid the trouble needing to adjust it, we just skip the test
     tests = [
         test for test in tests if not test.startswith(
             'test/cpp/naming:resolver_component_tests_runner_invoker')
     ]

     # the test requires 'client_crash_test_server' to be built
     tests = [
         test for test in tests
         if not test.startswith('test/cpp/end2end:time_change_test')
     ]

     # the test requires 'client_crash_test_server' to be built
     tests = [
         test for test in tests
         if not test.startswith('test/cpp/end2end:client_crash_test')
     ]

     # the test requires 'server_crash_test_client' to be built
     tests = [
         test for test in tests
         if not test.startswith('test/cpp/end2end:server_crash_test')
     ]

     # test never existed under build.yaml and it fails -> skip it
     tests = [
         test for test in tests
         if not test.startswith('test/core/tsi:ssl_session_cache_test')
     ]

     # the binary of this test does not get built with cmake
     tests = [
         test for test in tests
         if not test.startswith('test/cpp/util:channelz_sampler_test')
     ]

     return tests


 def _generate_build_extra_metadata_for_tests(tests, bazel_rules):
     """For given tests, generate the "extra metadata" that we need for our "build.yaml"-like output. The extra metadata is generated from the bazel rule metadata by using a bunch of heuristics."""
     test_metadata = {}
     for test in tests:
         test_dict = {'build': 'test', '_TYPE': 'target'}

         bazel_rule = bazel_rules[_get_bazel_label(test)]

         bazel_tags = bazel_rule['tags']
         if 'manual' in bazel_tags:
             # don't run the tests marked as "manual"
             test_dict['run'] = False

         if bazel_rule['flaky']:
             # don't run tests that are marked as "flaky" under bazel
             # because that would only add noise for the run_tests.py tests
             # and seeing more failures for tests that we already know are flaky
             # doesn't really help anything
             test_dict['run'] = False

         if 'no_uses_polling' in bazel_tags:
             test_dict['uses_polling'] = False

         if 'grpc_fuzzer' == bazel_rule['generator_function']:
             # currently we hand-list fuzzers instead of generating them automatically
             # because there's no way to obtain maxlen property from bazel BUILD file.
             print('skipping fuzzer ' + test)
             continue

         # if any tags that restrict platform compatibility are present,
         # generate the "platforms" field accordingly
         # TODO(jtattermusch): there is also a "no_linux" tag, but we cannot take
         # it into account as it is applied by grpc_cc_test when poller expansion
         # is made (for tests where uses_polling=True). So for now, we just
         # assume all tests are compatible with linux and ignore the "no_linux" tag
         # completely.
         known_platform_tags = set(['no_windows', 'no_mac'])
         if set(bazel_tags).intersection(known_platform_tags):
             platforms = []
             # assume all tests are compatible with linux and posix
             platforms.append('linux')
             platforms.append(
                 'posix')  # there is no posix-specific tag in bazel BUILD
             if not 'no_mac' in bazel_tags:
                 platforms.append('mac')
             if not 'no_windows' in bazel_tags:
                 platforms.append('windows')
             test_dict['platforms'] = platforms

         if '//external:benchmark' in bazel_rule['transitive_deps']:
             test_dict['benchmark'] = True
             test_dict['defaults'] = 'benchmark'

         cmdline_args = bazel_rule['args']
         if cmdline_args:
             test_dict['args'] = list(cmdline_args)

         uses_gtest = '//external:gtest' in bazel_rule['transitive_deps']
         if uses_gtest:
             test_dict['gtest'] = True

         if test.startswith('test/cpp') or uses_gtest:
             test_dict['language'] = 'c++'

         elif test.startswith('test/core'):
             test_dict['language'] = 'c'
         else:
             raise Exception('wrong test' + test)

         # short test name without the path.
         # There can be name collisions, but we will resolve them later
         simple_test_name = os.path.basename(_extract_source_file_path(test))
         test_dict['_RENAME'] = simple_test_name

         test_metadata[test] = test_dict

     # detect duplicate test names
     tests_by_simple_name = {}
     for test_name, test_dict in test_metadata.items():
         simple_test_name = test_dict['_RENAME']
         if not simple_test_name in tests_by_simple_name:
             tests_by_simple_name[simple_test_name] = []
         tests_by_simple_name[simple_test_name].append(test_name)

     # choose alternative names for tests with a name collision
     for collision_list in tests_by_simple_name.values():
         if len(collision_list) > 1:
             for test_name in collision_list:
                 long_name = test_name.replace('/', '_').replace(':', '_')
                 print(
                     'short name of "%s" collides with another test, renaming to %s'
                     % (test_name, long_name))
                 test_metadata[test_name]['_RENAME'] = long_name

     return test_metadata


 def _detect_and_print_issues(build_yaml_like):
     """Try detecting some unusual situations and warn about them."""
     for tgt in build_yaml_like['targets']:
         if tgt['build'] == 'test':
             for src in tgt['src']:
                 if src.startswith('src/') and not src.endswith('.proto'):
                     print('source file from under "src/" tree used in test ' +
                           tgt['name'] + ': ' + src)


 # extra metadata that will be used to construct build.yaml
 # there are mostly extra properties that we weren't able to obtain from the bazel build
 # _TYPE: whether this is library, target or test
 # _RENAME: whether this target should be renamed to a different name (to match expectations of make and cmake builds)
 # NOTE: secure is 'check' by default, so setting secure = False below does matter
 _BUILD_EXTRA_METADATA = {
     'third_party/address_sorting:address_sorting': {
         'language': 'c',
         'build': 'all',
         'secure': False,
         '_RENAME': 'address_sorting'
     },
     'gpr': {
         'language': 'c',
         'build': 'all',
         'secure': False
     },
     'grpc': {
         'language': 'c',
         'build': 'all',
         'baselib': True,
         'secure': True,
         'generate_plugin_registry': True
     },
     'grpc++': {
         'language': 'c++',
         'build': 'all',
         'baselib': True,
     },
     'grpc++_alts': {
         'language': 'c++',
         'build': 'all',
         'baselib': True
     },
     'grpc++_error_details': {
         'language': 'c++',
         'build': 'all'
     },
     'grpc++_reflection': {
         'language': 'c++',
         'build': 'all'
     },
     'grpc++_unsecure': {
         'language': 'c++',
         'build': 'all',
         'baselib': True,
         'secure': False,
     },
     # TODO(jtattermusch): do we need to set grpc_csharp_ext's LDFLAGS for wrapping memcpy in the same way as in build.yaml?
     'grpc_csharp_ext': {
         'language': 'c',
         'build': 'all',
     },
     'grpc_unsecure': {
         'language': 'c',
         'build': 'all',
         'baselib': True,
         'secure': False,
         'generate_plugin_registry': True
     },
     'grpcpp_channelz': {
         'language': 'c++',
         'build': 'all'
     },
     'grpc++_test': {
         'language': 'c++',
         'build': 'private',
     },
     'src/compiler:grpc_plugin_support': {
         'language': 'c++',
         'build': 'protoc',
         'secure': False,
         '_RENAME': 'grpc_plugin_support'
     },
     'src/compiler:grpc_cpp_plugin': {
         'language': 'c++',
         'build': 'protoc',
         'secure': False,
         '_TYPE': 'target',
         '_RENAME': 'grpc_cpp_plugin'
     },
     'src/compiler:grpc_csharp_plugin': {
         'language': 'c++',
         'build': 'protoc',
         'secure': False,
         '_TYPE': 'target',
         '_RENAME': 'grpc_csharp_plugin'
     },
     'src/compiler:grpc_node_plugin': {
         'language': 'c++',
         'build': 'protoc',
         'secure': False,
         '_TYPE': 'target',
         '_RENAME': 'grpc_node_plugin'
     },
     'src/compiler:grpc_objective_c_plugin': {
         'language': 'c++',
         'build': 'protoc',
         'secure': False,
         '_TYPE': 'target',
         '_RENAME': 'grpc_objective_c_plugin'
     },
     'src/compiler:grpc_php_plugin': {
         'language': 'c++',
         'build': 'protoc',
         'secure': False,
         '_TYPE': 'target',
         '_RENAME': 'grpc_php_plugin'
     },
     'src/compiler:grpc_python_plugin': {
         'language': 'c++',
         'build': 'protoc',
         'secure': False,
         '_TYPE': 'target',
         '_RENAME': 'grpc_python_plugin'
     },
     'src/compiler:grpc_ruby_plugin': {
         'language': 'c++',
         'build': 'protoc',
         'secure': False,
         '_TYPE': 'target',
         '_RENAME': 'grpc_ruby_plugin'
     },

     # TODO(jtattermusch): consider adding grpc++_core_stats

     # test support libraries
     'test/core/util:grpc_test_util': {
         'language': 'c',
         'build': 'private',
         '_RENAME': 'grpc_test_util'
     },
     'test/core/util:grpc_test_util_unsecure': {
         'language': 'c',
         'build': 'private',
         'secure': False,
         '_RENAME': 'grpc_test_util_unsecure'
     },
     # TODO(jtattermusch): consider adding grpc++_test_util_unsecure - it doesn't seem to be used by bazel build (don't forget to set secure: False)
     'test/cpp/util:test_config': {
         'language': 'c++',
         'build': 'private',
         '_RENAME': 'grpc++_test_config'
     },
     'test/cpp/util:test_util': {
         'language': 'c++',
         'build': 'private',
         '_RENAME': 'grpc++_test_util'
     },

     # end2end test support libraries
     'test/core/end2end:end2end_tests': {
         'language': 'c',
         'build': 'private',
         'secure': True,
         '_RENAME': 'end2end_tests'
     },
     'test/core/end2end:end2end_nosec_tests': {
         'language': 'c',
         'build': 'private',
         'secure': False,
         '_RENAME': 'end2end_nosec_tests'
     },

     # benchmark support libraries
     'test/cpp/microbenchmarks:helpers': {
         'language': 'c++',
         'build': 'test',
         'defaults': 'benchmark',
         '_RENAME': 'benchmark_helpers'
     },
     'test/cpp/interop:interop_client': {
         'language': 'c++',
         'build': 'test',
         'run': False,
         '_TYPE': 'target',
         '_RENAME': 'interop_client'
     },
     'test/cpp/interop:interop_server': {
         'language': 'c++',
         'build': 'test',
         'run': False,
         '_TYPE': 'target',
         '_RENAME': 'interop_server'
     },
     'test/cpp/interop:xds_interop_client': {
         'language': 'c++',
         'build': 'test',
         'run': False,
         '_TYPE': 'target',
         '_RENAME': 'xds_interop_client'
     },
     'test/cpp/interop:xds_interop_server': {
         'language': 'c++',
         'build': 'test',
         'run': False,
         '_TYPE': 'target',
         '_RENAME': 'xds_interop_server'
     },
     'test/cpp/interop:http2_client': {
         'language': 'c++',
         'build': 'test',
         'run': False,
         '_TYPE': 'target',
         '_RENAME': 'http2_client'
     },
     'test/cpp/qps:qps_json_driver': {
         'language': 'c++',
         'build': 'test',
         'run': False,
         '_TYPE': 'target',
         '_RENAME': 'qps_json_driver'
     },
     'test/cpp/qps:qps_worker': {
         'language': 'c++',
         'build': 'test',
         'run': False,
         '_TYPE': 'target',
         '_RENAME': 'qps_worker'
     },
     'test/cpp/util:grpc_cli': {
         'language': 'c++',
         'build': 'test',
         'run': False,
         '_TYPE': 'target',
         '_RENAME': 'grpc_cli'
     },

     # TODO(jtattermusch): create_jwt and verify_jwt breaks distribtests because it depends on grpc_test_utils and thus requires tests to be built
     # For now it's ok to disable them as these binaries aren't very useful anyway.
     #'test/core/security:create_jwt': { 'language': 'c', 'build': 'tool', '_TYPE': 'target', '_RENAME': 'grpc_create_jwt' },
     #'test/core/security:verify_jwt': { 'language': 'c', 'build': 'tool', '_TYPE': 'target', '_RENAME': 'grpc_verify_jwt' },

     # TODO(jtattermusch): add remaining tools such as grpc_print_google_default_creds_token (they are not used by bazel build)

     # Fuzzers
     'test/core/security:alts_credentials_fuzzer': {
         'language': 'c++',
         'build': 'fuzzer',
         'corpus_dirs': ['test/core/security/corpus/alts_credentials_corpus'],
         'maxlen': 2048,
         '_TYPE': 'target',
         '_RENAME': 'alts_credentials_fuzzer'
     },
     'test/core/end2end/fuzzers:client_fuzzer': {
         'language': 'c++',
         'build': 'fuzzer',
         'corpus_dirs': ['test/core/end2end/fuzzers/client_fuzzer_corpus'],
         'maxlen': 2048,
         'dict': 'test/core/end2end/fuzzers/hpack.dictionary',
         '_TYPE': 'target',
         '_RENAME': 'client_fuzzer'
     },
     'test/core/transport/chttp2:hpack_parser_fuzzer': {
         'language': 'c++',
         'build': 'fuzzer',
         'corpus_dirs': ['test/core/transport/chttp2/hpack_parser_corpus'],
         'maxlen': 512,
         'dict': 'test/core/end2end/fuzzers/hpack.dictionary',
         '_TYPE': 'target',
         '_RENAME': 'hpack_parser_fuzzer_test'
     },
     'test/core/http:request_fuzzer': {
         'language': 'c++',
         'build': 'fuzzer',
         'corpus_dirs': ['test/core/http/request_corpus'],
         'maxlen': 2048,
         '_TYPE': 'target',
         '_RENAME': 'http_request_fuzzer_test'
     },
     'test/core/http:response_fuzzer': {
         'language': 'c++',
         'build': 'fuzzer',
         'corpus_dirs': ['test/core/http/response_corpus'],
         'maxlen': 2048,
         '_TYPE': 'target',
         '_RENAME': 'http_response_fuzzer_test'
     },
     'test/core/json:json_fuzzer': {
         'language': 'c++',
         'build': 'fuzzer',
         'corpus_dirs': ['test/core/json/corpus'],
         'maxlen': 512,
         '_TYPE': 'target',
         '_RENAME': 'json_fuzzer_test'
     },
     'test/core/nanopb:fuzzer_response': {
         'language': 'c++',
         'build': 'fuzzer',
         'corpus_dirs': ['test/core/nanopb/corpus_response'],
         'maxlen': 128,
         '_TYPE': 'target',
         '_RENAME': 'nanopb_fuzzer_response_test'
     },
     'test/core/nanopb:fuzzer_serverlist': {
         'language': 'c++',
         'build': 'fuzzer',
         'corpus_dirs': ['test/core/nanopb/corpus_serverlist'],
         'maxlen': 128,
         '_TYPE': 'target',
         '_RENAME': 'nanopb_fuzzer_serverlist_test'
     },
     'test/core/slice:percent_decode_fuzzer': {
         'language': 'c++',
         'build': 'fuzzer',
         'corpus_dirs': ['test/core/slice/percent_decode_corpus'],
         'maxlen': 32,
         '_TYPE': 'target',
         '_RENAME': 'percent_decode_fuzzer'
     },
     'test/core/slice:percent_encode_fuzzer': {
         'language': 'c++',
         'build': 'fuzzer',
         'corpus_dirs': ['test/core/slice/percent_encode_corpus'],
         'maxlen': 32,
         '_TYPE': 'target',
         '_RENAME': 'percent_encode_fuzzer'
     },
     'test/core/end2end/fuzzers:server_fuzzer': {
         'language': 'c++',
         'build': 'fuzzer',
         'corpus_dirs': ['test/core/end2end/fuzzers/server_fuzzer_corpus'],
         'maxlen': 2048,
         'dict': 'test/core/end2end/fuzzers/hpack.dictionary',
         '_TYPE': 'target',
         '_RENAME': 'server_fuzzer'
     },
     'test/core/security:ssl_server_fuzzer': {
         'language': 'c++',
         'build': 'fuzzer',
         'corpus_dirs': ['test/core/security/corpus/ssl_server_corpus'],
         'maxlen': 2048,
         '_TYPE': 'target',
         '_RENAME': 'ssl_server_fuzzer'
     },
     'test/core/uri:uri_fuzzer_test': {
         'language': 'c++',
         'build': 'fuzzer',
         'corpus_dirs': ['test/core/uri/uri_corpus'],
         'maxlen': 128,
         '_TYPE': 'target',
         '_RENAME': 'uri_fuzzer_test'
     },

     # TODO(jtattermusch): these fuzzers had no build.yaml equivalent
     # test/core/compression:message_compress_fuzzer
     # test/core/compression:message_decompress_fuzzer
     # test/core/compression:stream_compression_fuzzer
     # test/core/compression:stream_decompression_fuzzer
     # test/core/slice:b64_decode_fuzzer
     # test/core/slice:b64_encode_fuzzer
 }

 # We need a complete picture of all the targets and dependencies we're interested in
 # so we run multiple bazel queries and merge the results.
 _BAZEL_DEPS_QUERIES = [
     'deps("//test/...")',
     'deps("//:all")',
     'deps("//src/compiler/...")',
     'deps("//src/proto/...")',
 ]

 # Step 1: run a bunch of "bazel query --output xml" queries to collect
 # the raw build metadata from the bazel build.
 # At the end of this step we will have a dictionary of bazel rules
 # that are interesting to us (libraries, binaries, etc.) along
 # with their most important metadata (sources, headers, dependencies)
 #
 # Example of a single bazel rule after being populated:
 # '//:grpc' : { 'class': 'cc_library',
 #               'hdrs': ['//:include/grpc/byte_buffer.h', ... ],
 #               'srcs': ['//:src/core/lib/surface/init.cc', ... ],
 #               'deps': ['//:grpc_common', ...],
 #               ... }
 bazel_rules = {}
 for query in _BAZEL_DEPS_QUERIES:
     bazel_rules.update(
         _extract_rules_from_bazel_xml(_bazel_query_xml_tree(query)))

 # Step 1a: Knowing the transitive closure of dependencies will make
 # the postprocessing simpler, so compute the info for all our rules.
 #
 # Example:
 # '//:grpc' : { ...,
 #               'transitive_deps': ['//:gpr_base', ...] }
 _populate_transitive_deps(bazel_rules)

 # Step 2: Extract the known bazel cc_test tests. While most tests
 # will be buildable with other build systems just fine, some of these tests
 # would be too difficult to build and run with other build systems,
 # so we simply exclude the ones we don't want.
 # Note that while making tests buildable with other build systems
 # than just bazel is extra effort, we still need to do that for these
 # reasons:
 # - If our cmake build doesn't have any tests at all, it's hard to make
 #   sure that what it built actually works (we need at least some "smoke tests").
 #   This is quite important because the build flags between bazel / non-bazel flag might differ
 #   (sometimes it's for interesting reasons that are not easy to overcome)
 #   which makes it even more important to have at least some tests for cmake/make
 # - Our portability suite actually runs cmake tests and migration of portability
 #   suite fully towards bazel might be intricate (e.g. it's unclear whether it's
 #   possible to get a good enough coverage of different compilers / distros etc.
 #   with bazel)
 # - some things that are considered "tests" in build.yaml-based builds are actually binaries
 #   we'd want to be able to build anyway (qps_json_worker, interop_client, interop_server, grpc_cli)
 #   so it's unclear how much make/cmake simplification we would gain by removing just some (but not all) test
 # TODO(jtattermusch): Investigate feasibility of running portability suite with bazel.
 tests = _exclude_unwanted_cc_tests(_extract_cc_tests(bazel_rules))

 # Step 3: Generate the "extra metadata" for all our build targets.
 # While the bazel rules give us most of the information we need,
 # the legacy "build.yaml" format requires some additional fields that
 # we cannot get just from bazel alone (we call that "extra metadata").
 # In this step, we basically analyze the build metadata we have from bazel
 # and use heuristics to determine (and sometimes guess) the right
 # extra metadata to use for each target.
 #
 # - For some targets (such as the public libraries, helper libraries
 #   and executables) determining the right extra metadata is hard to do
 #   automatically. For these targets, the extra metadata is supplied "manually"
 #   in form of the _BUILD_EXTRA_METADATA dictionary. That allows us to match
 #   the semantics of the legacy "build.yaml" as closely as possible.
 #
 # - For test binaries, it is possible to generate the "extra metadata" mostly
 #   automatically using a rule-based heuristic approach because most tests
 #   look and behave alike from the build's perspective.
 #
 # TODO(jtattermusch): Of course neither "_BUILD_EXTRA_METADATA" or
 # the heuristic approach used for tests are ideal and they cannot be made
 # to cover all possible situations (and are tailored to work with the way
 # the grpc build currently works), but the idea was to start with something
 # reasonably simple that matches the "build.yaml"-like semantics as closely
 # as possible (to avoid changing too many things at once) and gradually get
 # rid of the legacy "build.yaml"-specific fields one by one. Once that is done,
 # only very little "extra metadata" would be needed and/or it would be trivial
 # to generate it automatically.
 all_extra_metadata = {}
 all_extra_metadata.update(_BUILD_EXTRA_METADATA)
 all_extra_metadata.update(
     _generate_build_extra_metadata_for_tests(tests, bazel_rules))

 # Step 4: Generate the final metadata for all the targets.
 # This is done by combining the bazel build metadata and the "extra metadata"
 # we obtained in the previous step.
 # In this step, we also perform some interesting massaging of the target metadata
 # to end up with a result that is as similar to the legacy build.yaml data
 # as possible.
 # - Some targets get renamed (to match the legacy build.yaml target names)
 # - Some intermediate libraries get elided ("expanded") to better match the set
 #   of targets provided by the legacy build.yaml build
 #
 # Originally the target renaming was introduced to address these concerns:
 # - avoid changing too many things at the same time and avoid people getting
 #   confused by some well know targets suddenly being missing
 # - Makefile/cmake and also language-specific generators rely on some build
 #   targets being called exactly the way they they are. Some of our testing
 #   scrips also invoke executables (e.g. "qps_json_driver") by their name.
 # - The autogenerated test name from bazel includes the package path
 #   (e.g. "test_cpp_TEST_NAME"). Without renaming, the target names would
 #   end up pretty ugly (e.g. test_cpp_qps_qps_json_driver).
 # TODO(jtattermusch): reevaluate the need for target renaming in the future.
 #
 # Example of a single generated target:
 # 'grpc' : { 'language': 'c',
 #            'public_headers': ['include/grpc/byte_buffer.h', ... ],
 #            'headers': ['src/core/ext/filters/client_channel/client_channel.h', ... ],
 #            'src': ['src/core/lib/surface/init.cc', ... ],
 #            'deps': ['gpr', 'address_sorting', ...],
 #            ... }
 all_targets_dict = _generate_build_metadata(all_extra_metadata, bazel_rules)

 # Step 5: convert the dictionary with all the targets to a dict that has
 # the desired "build.yaml"-like layout.
 # TODO(jtattermusch): We use the custom "build.yaml"-like layout because
 # currently all other build systems use that format as their source of truth.
 # In the future, we can get rid of this custom & legacy format entirely,
 # but we would need to update the generators for other build systems
 # at the same time.
 #
 # Layout of the result:
 # { 'libs': { TARGET_DICT_FOR_LIB_XYZ, ... },
 #   'targets': { TARGET_DICT_FOR_BIN_XYZ, ... },
 #   'tests': { TARGET_DICT_FOR_TEST_XYZ, ...} }
 build_yaml_like = _convert_to_build_yaml_like(all_targets_dict)

 # detect and report some suspicious situations we've seen before
 _detect_and_print_issues(build_yaml_like)

 # Step 6: Store the build_autogenerated.yaml in a deterministic (=sorted)
 # and cleaned-up form.
 # A basic overview of the resulting "build.yaml"-like format is here:
 # https://github.com/grpc/grpc/blob/master/templates/README.md
 # TODO(jtattermusch): The "cleanup" function is taken from the legacy
 # build system (which used build.yaml) and can be eventually removed.
 build_yaml_string = build_cleaner.cleaned_build_yaml_dict_as_string(
     build_yaml_like)
 with open('build_autogenerated.yaml', 'w') as file:
     file.write(build_yaml_string)